1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include "ctree.h"
10#include "volumes.h"
11#include "disk-io.h"
12#include "ordered-data.h"
13#include "transaction.h"
14#include "backref.h"
15#include "extent_io.h"
16#include "dev-replace.h"
17#include "check-integrity.h"
18#include "rcu-string.h"
19#include "raid56.h"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34struct scrub_block;
35struct scrub_ctx;
36
37
38
39
40
41
42
43#define SCRUB_PAGES_PER_RD_BIO 32
44#define SCRUB_PAGES_PER_WR_BIO 32
45#define SCRUB_BIOS_PER_SCTX 64
46
47
48
49
50
51
52#define SCRUB_MAX_PAGES_PER_BLOCK 16
53
54struct scrub_recover {
55 refcount_t refs;
56 struct btrfs_bio *bbio;
57 u64 map_length;
58};
59
60struct scrub_page {
61 struct scrub_block *sblock;
62 struct page *page;
63 struct btrfs_device *dev;
64 struct list_head list;
65 u64 flags;
66 u64 generation;
67 u64 logical;
68 u64 physical;
69 u64 physical_for_dev_replace;
70 atomic_t refs;
71 struct {
72 unsigned int mirror_num:8;
73 unsigned int have_csum:1;
74 unsigned int io_error:1;
75 };
76 u8 csum[BTRFS_CSUM_SIZE];
77
78 struct scrub_recover *recover;
79};
80
81struct scrub_bio {
82 int index;
83 struct scrub_ctx *sctx;
84 struct btrfs_device *dev;
85 struct bio *bio;
86 blk_status_t status;
87 u64 logical;
88 u64 physical;
89#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
90 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
91#else
92 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
93#endif
94 int page_count;
95 int next_free;
96 struct btrfs_work work;
97};
98
99struct scrub_block {
100 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
101 int page_count;
102 atomic_t outstanding_pages;
103 refcount_t refs;
104 struct scrub_ctx *sctx;
105 struct scrub_parity *sparity;
106 struct {
107 unsigned int header_error:1;
108 unsigned int checksum_error:1;
109 unsigned int no_io_error_seen:1;
110 unsigned int generation_error:1;
111
112
113
114 unsigned int data_corrected:1;
115 };
116 struct btrfs_work work;
117};
118
119
120struct scrub_parity {
121 struct scrub_ctx *sctx;
122
123 struct btrfs_device *scrub_dev;
124
125 u64 logic_start;
126
127 u64 logic_end;
128
129 int nsectors;
130
131 u64 stripe_len;
132
133 refcount_t refs;
134
135 struct list_head spages;
136
137
138 struct btrfs_work work;
139
140
141 unsigned long *dbitmap;
142
143
144
145
146
147 unsigned long *ebitmap;
148
149 unsigned long bitmap[0];
150};
151
152struct scrub_ctx {
153 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
154 struct btrfs_fs_info *fs_info;
155 int first_free;
156 int curr;
157 atomic_t bios_in_flight;
158 atomic_t workers_pending;
159 spinlock_t list_lock;
160 wait_queue_head_t list_wait;
161 u16 csum_size;
162 struct list_head csum_list;
163 atomic_t cancel_req;
164 int readonly;
165 int pages_per_rd_bio;
166
167 int is_dev_replace;
168
169 struct scrub_bio *wr_curr_bio;
170 struct mutex wr_lock;
171 int pages_per_wr_bio;
172 struct btrfs_device *wr_tgtdev;
173 bool flush_all_writes;
174
175
176
177
178 struct btrfs_scrub_progress stat;
179 spinlock_t stat_lock;
180
181
182
183
184
185
186
187
188 refcount_t refs;
189};
190
191struct scrub_fixup_nodatasum {
192 struct scrub_ctx *sctx;
193 struct btrfs_device *dev;
194 u64 logical;
195 struct btrfs_root *root;
196 struct btrfs_work work;
197 int mirror_num;
198};
199
200struct scrub_nocow_inode {
201 u64 inum;
202 u64 offset;
203 u64 root;
204 struct list_head list;
205};
206
207struct scrub_copy_nocow_ctx {
208 struct scrub_ctx *sctx;
209 u64 logical;
210 u64 len;
211 int mirror_num;
212 u64 physical_for_dev_replace;
213 struct list_head inodes;
214 struct btrfs_work work;
215};
216
217struct scrub_warning {
218 struct btrfs_path *path;
219 u64 extent_item_size;
220 const char *errstr;
221 u64 physical;
222 u64 logical;
223 struct btrfs_device *dev;
224};
225
226struct full_stripe_lock {
227 struct rb_node node;
228 u64 logical;
229 u64 refs;
230 struct mutex mutex;
231};
232
233static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
234static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
235static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
236static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
237static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
238static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
239 struct scrub_block *sblocks_for_recheck);
240static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
241 struct scrub_block *sblock,
242 int retry_failed_mirror);
243static void scrub_recheck_block_checksum(struct scrub_block *sblock);
244static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
245 struct scrub_block *sblock_good);
246static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
247 struct scrub_block *sblock_good,
248 int page_num, int force_write);
249static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
250static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
251 int page_num);
252static int scrub_checksum_data(struct scrub_block *sblock);
253static int scrub_checksum_tree_block(struct scrub_block *sblock);
254static int scrub_checksum_super(struct scrub_block *sblock);
255static void scrub_block_get(struct scrub_block *sblock);
256static void scrub_block_put(struct scrub_block *sblock);
257static void scrub_page_get(struct scrub_page *spage);
258static void scrub_page_put(struct scrub_page *spage);
259static void scrub_parity_get(struct scrub_parity *sparity);
260static void scrub_parity_put(struct scrub_parity *sparity);
261static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
262 struct scrub_page *spage);
263static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
264 u64 physical, struct btrfs_device *dev, u64 flags,
265 u64 gen, int mirror_num, u8 *csum, int force,
266 u64 physical_for_dev_replace);
267static void scrub_bio_end_io(struct bio *bio);
268static void scrub_bio_end_io_worker(struct btrfs_work *work);
269static void scrub_block_complete(struct scrub_block *sblock);
270static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
271 u64 extent_logical, u64 extent_len,
272 u64 *extent_physical,
273 struct btrfs_device **extent_dev,
274 int *extent_mirror_num);
275static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
276 struct scrub_page *spage);
277static void scrub_wr_submit(struct scrub_ctx *sctx);
278static void scrub_wr_bio_end_io(struct bio *bio);
279static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
280static int write_page_nocow(struct scrub_ctx *sctx,
281 u64 physical_for_dev_replace, struct page *page);
282static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
283 struct scrub_copy_nocow_ctx *ctx);
284static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
285 int mirror_num, u64 physical_for_dev_replace);
286static void copy_nocow_pages_worker(struct btrfs_work *work);
287static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
288static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
289static void scrub_put_ctx(struct scrub_ctx *sctx);
290
291static inline int scrub_is_page_on_raid56(struct scrub_page *page)
292{
293 return page->recover &&
294 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
295}
296
297static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
298{
299 refcount_inc(&sctx->refs);
300 atomic_inc(&sctx->bios_in_flight);
301}
302
303static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
304{
305 atomic_dec(&sctx->bios_in_flight);
306 wake_up(&sctx->list_wait);
307 scrub_put_ctx(sctx);
308}
309
310static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
311{
312 while (atomic_read(&fs_info->scrub_pause_req)) {
313 mutex_unlock(&fs_info->scrub_lock);
314 wait_event(fs_info->scrub_pause_wait,
315 atomic_read(&fs_info->scrub_pause_req) == 0);
316 mutex_lock(&fs_info->scrub_lock);
317 }
318}
319
320static void scrub_pause_on(struct btrfs_fs_info *fs_info)
321{
322 atomic_inc(&fs_info->scrubs_paused);
323 wake_up(&fs_info->scrub_pause_wait);
324}
325
326static void scrub_pause_off(struct btrfs_fs_info *fs_info)
327{
328 mutex_lock(&fs_info->scrub_lock);
329 __scrub_blocked_if_needed(fs_info);
330 atomic_dec(&fs_info->scrubs_paused);
331 mutex_unlock(&fs_info->scrub_lock);
332
333 wake_up(&fs_info->scrub_pause_wait);
334}
335
336static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
337{
338 scrub_pause_on(fs_info);
339 scrub_pause_off(fs_info);
340}
341
342
343
344
345
346
347
348
349
350
351
352static struct full_stripe_lock *insert_full_stripe_lock(
353 struct btrfs_full_stripe_locks_tree *locks_root,
354 u64 fstripe_logical)
355{
356 struct rb_node **p;
357 struct rb_node *parent = NULL;
358 struct full_stripe_lock *entry;
359 struct full_stripe_lock *ret;
360
361 lockdep_assert_held(&locks_root->lock);
362
363 p = &locks_root->root.rb_node;
364 while (*p) {
365 parent = *p;
366 entry = rb_entry(parent, struct full_stripe_lock, node);
367 if (fstripe_logical < entry->logical) {
368 p = &(*p)->rb_left;
369 } else if (fstripe_logical > entry->logical) {
370 p = &(*p)->rb_right;
371 } else {
372 entry->refs++;
373 return entry;
374 }
375 }
376
377
378 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
379 if (!ret)
380 return ERR_PTR(-ENOMEM);
381 ret->logical = fstripe_logical;
382 ret->refs = 1;
383 mutex_init(&ret->mutex);
384
385 rb_link_node(&ret->node, parent, p);
386 rb_insert_color(&ret->node, &locks_root->root);
387 return ret;
388}
389
390
391
392
393
394
395
396static struct full_stripe_lock *search_full_stripe_lock(
397 struct btrfs_full_stripe_locks_tree *locks_root,
398 u64 fstripe_logical)
399{
400 struct rb_node *node;
401 struct full_stripe_lock *entry;
402
403 lockdep_assert_held(&locks_root->lock);
404
405 node = locks_root->root.rb_node;
406 while (node) {
407 entry = rb_entry(node, struct full_stripe_lock, node);
408 if (fstripe_logical < entry->logical)
409 node = node->rb_left;
410 else if (fstripe_logical > entry->logical)
411 node = node->rb_right;
412 else
413 return entry;
414 }
415 return NULL;
416}
417
418
419
420
421
422
423static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
424 u64 bytenr)
425{
426 u64 ret;
427
428
429
430
431
432 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
433
434
435
436
437
438 ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
439 cache->full_stripe_len + cache->key.objectid;
440 return ret;
441}
442
443
444
445
446
447
448
449
450
451
452
453
454static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
455 bool *locked_ret)
456{
457 struct btrfs_block_group_cache *bg_cache;
458 struct btrfs_full_stripe_locks_tree *locks_root;
459 struct full_stripe_lock *existing;
460 u64 fstripe_start;
461 int ret = 0;
462
463 *locked_ret = false;
464 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
465 if (!bg_cache) {
466 ASSERT(0);
467 return -ENOENT;
468 }
469
470
471 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
472 goto out;
473 locks_root = &bg_cache->full_stripe_locks_root;
474
475 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
476
477
478 mutex_lock(&locks_root->lock);
479 existing = insert_full_stripe_lock(locks_root, fstripe_start);
480 mutex_unlock(&locks_root->lock);
481 if (IS_ERR(existing)) {
482 ret = PTR_ERR(existing);
483 goto out;
484 }
485 mutex_lock(&existing->mutex);
486 *locked_ret = true;
487out:
488 btrfs_put_block_group(bg_cache);
489 return ret;
490}
491
492
493
494
495
496
497
498
499
500
501static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
502 bool locked)
503{
504 struct btrfs_block_group_cache *bg_cache;
505 struct btrfs_full_stripe_locks_tree *locks_root;
506 struct full_stripe_lock *fstripe_lock;
507 u64 fstripe_start;
508 bool freeit = false;
509 int ret = 0;
510
511
512 if (!locked)
513 return 0;
514
515 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
516 if (!bg_cache) {
517 ASSERT(0);
518 return -ENOENT;
519 }
520 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
521 goto out;
522
523 locks_root = &bg_cache->full_stripe_locks_root;
524 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
525
526 mutex_lock(&locks_root->lock);
527 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
528
529 if (!fstripe_lock) {
530 WARN_ON(1);
531 ret = -ENOENT;
532 mutex_unlock(&locks_root->lock);
533 goto out;
534 }
535
536 if (fstripe_lock->refs == 0) {
537 WARN_ON(1);
538 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
539 fstripe_lock->logical);
540 } else {
541 fstripe_lock->refs--;
542 }
543
544 if (fstripe_lock->refs == 0) {
545 rb_erase(&fstripe_lock->node, &locks_root->root);
546 freeit = true;
547 }
548 mutex_unlock(&locks_root->lock);
549
550 mutex_unlock(&fstripe_lock->mutex);
551 if (freeit)
552 kfree(fstripe_lock);
553out:
554 btrfs_put_block_group(bg_cache);
555 return ret;
556}
557
558
559
560
561
562static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
563{
564 struct btrfs_fs_info *fs_info = sctx->fs_info;
565
566 refcount_inc(&sctx->refs);
567
568
569
570
571
572
573
574
575
576 mutex_lock(&fs_info->scrub_lock);
577 atomic_inc(&fs_info->scrubs_running);
578 atomic_inc(&fs_info->scrubs_paused);
579 mutex_unlock(&fs_info->scrub_lock);
580
581
582
583
584
585
586
587
588 wake_up(&fs_info->scrub_pause_wait);
589
590 atomic_inc(&sctx->workers_pending);
591}
592
593
594static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
595{
596 struct btrfs_fs_info *fs_info = sctx->fs_info;
597
598
599
600
601
602 mutex_lock(&fs_info->scrub_lock);
603 atomic_dec(&fs_info->scrubs_running);
604 atomic_dec(&fs_info->scrubs_paused);
605 mutex_unlock(&fs_info->scrub_lock);
606 atomic_dec(&sctx->workers_pending);
607 wake_up(&fs_info->scrub_pause_wait);
608 wake_up(&sctx->list_wait);
609 scrub_put_ctx(sctx);
610}
611
612static void scrub_free_csums(struct scrub_ctx *sctx)
613{
614 while (!list_empty(&sctx->csum_list)) {
615 struct btrfs_ordered_sum *sum;
616 sum = list_first_entry(&sctx->csum_list,
617 struct btrfs_ordered_sum, list);
618 list_del(&sum->list);
619 kfree(sum);
620 }
621}
622
623static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
624{
625 int i;
626
627 if (!sctx)
628 return;
629
630
631 if (sctx->curr != -1) {
632 struct scrub_bio *sbio = sctx->bios[sctx->curr];
633
634 for (i = 0; i < sbio->page_count; i++) {
635 WARN_ON(!sbio->pagev[i]->page);
636 scrub_block_put(sbio->pagev[i]->sblock);
637 }
638 bio_put(sbio->bio);
639 }
640
641 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
642 struct scrub_bio *sbio = sctx->bios[i];
643
644 if (!sbio)
645 break;
646 kfree(sbio);
647 }
648
649 kfree(sctx->wr_curr_bio);
650 scrub_free_csums(sctx);
651 kfree(sctx);
652}
653
654static void scrub_put_ctx(struct scrub_ctx *sctx)
655{
656 if (refcount_dec_and_test(&sctx->refs))
657 scrub_free_ctx(sctx);
658}
659
660static noinline_for_stack
661struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
662{
663 struct scrub_ctx *sctx;
664 int i;
665 struct btrfs_fs_info *fs_info = dev->fs_info;
666
667 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
668 if (!sctx)
669 goto nomem;
670 refcount_set(&sctx->refs, 1);
671 sctx->is_dev_replace = is_dev_replace;
672 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
673 sctx->curr = -1;
674 sctx->fs_info = dev->fs_info;
675 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
676 struct scrub_bio *sbio;
677
678 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
679 if (!sbio)
680 goto nomem;
681 sctx->bios[i] = sbio;
682
683 sbio->index = i;
684 sbio->sctx = sctx;
685 sbio->page_count = 0;
686 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
687 scrub_bio_end_io_worker, NULL, NULL);
688
689 if (i != SCRUB_BIOS_PER_SCTX - 1)
690 sctx->bios[i]->next_free = i + 1;
691 else
692 sctx->bios[i]->next_free = -1;
693 }
694 sctx->first_free = 0;
695 atomic_set(&sctx->bios_in_flight, 0);
696 atomic_set(&sctx->workers_pending, 0);
697 atomic_set(&sctx->cancel_req, 0);
698 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
699 INIT_LIST_HEAD(&sctx->csum_list);
700
701 spin_lock_init(&sctx->list_lock);
702 spin_lock_init(&sctx->stat_lock);
703 init_waitqueue_head(&sctx->list_wait);
704
705 WARN_ON(sctx->wr_curr_bio != NULL);
706 mutex_init(&sctx->wr_lock);
707 sctx->wr_curr_bio = NULL;
708 if (is_dev_replace) {
709 WARN_ON(!fs_info->dev_replace.tgtdev);
710 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
711 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
712 sctx->flush_all_writes = false;
713 }
714
715 return sctx;
716
717nomem:
718 scrub_free_ctx(sctx);
719 return ERR_PTR(-ENOMEM);
720}
721
722static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
723 void *warn_ctx)
724{
725 u64 isize;
726 u32 nlink;
727 int ret;
728 int i;
729 unsigned nofs_flag;
730 struct extent_buffer *eb;
731 struct btrfs_inode_item *inode_item;
732 struct scrub_warning *swarn = warn_ctx;
733 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
734 struct inode_fs_paths *ipath = NULL;
735 struct btrfs_root *local_root;
736 struct btrfs_key root_key;
737 struct btrfs_key key;
738
739 root_key.objectid = root;
740 root_key.type = BTRFS_ROOT_ITEM_KEY;
741 root_key.offset = (u64)-1;
742 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
743 if (IS_ERR(local_root)) {
744 ret = PTR_ERR(local_root);
745 goto err;
746 }
747
748
749
750
751 key.objectid = inum;
752 key.type = BTRFS_INODE_ITEM_KEY;
753 key.offset = 0;
754
755 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
756 if (ret) {
757 btrfs_release_path(swarn->path);
758 goto err;
759 }
760
761 eb = swarn->path->nodes[0];
762 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
763 struct btrfs_inode_item);
764 isize = btrfs_inode_size(eb, inode_item);
765 nlink = btrfs_inode_nlink(eb, inode_item);
766 btrfs_release_path(swarn->path);
767
768
769
770
771
772
773 nofs_flag = memalloc_nofs_save();
774 ipath = init_ipath(4096, local_root, swarn->path);
775 memalloc_nofs_restore(nofs_flag);
776 if (IS_ERR(ipath)) {
777 ret = PTR_ERR(ipath);
778 ipath = NULL;
779 goto err;
780 }
781 ret = paths_from_inode(inum, ipath);
782
783 if (ret < 0)
784 goto err;
785
786
787
788
789
790 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
791 btrfs_warn_in_rcu(fs_info,
792"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
793 swarn->errstr, swarn->logical,
794 rcu_str_deref(swarn->dev->name),
795 swarn->physical,
796 root, inum, offset,
797 min(isize - offset, (u64)PAGE_SIZE), nlink,
798 (char *)(unsigned long)ipath->fspath->val[i]);
799
800 free_ipath(ipath);
801 return 0;
802
803err:
804 btrfs_warn_in_rcu(fs_info,
805 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
806 swarn->errstr, swarn->logical,
807 rcu_str_deref(swarn->dev->name),
808 swarn->physical,
809 root, inum, offset, ret);
810
811 free_ipath(ipath);
812 return 0;
813}
814
815static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
816{
817 struct btrfs_device *dev;
818 struct btrfs_fs_info *fs_info;
819 struct btrfs_path *path;
820 struct btrfs_key found_key;
821 struct extent_buffer *eb;
822 struct btrfs_extent_item *ei;
823 struct scrub_warning swarn;
824 unsigned long ptr = 0;
825 u64 extent_item_pos;
826 u64 flags = 0;
827 u64 ref_root;
828 u32 item_size;
829 u8 ref_level = 0;
830 int ret;
831
832 WARN_ON(sblock->page_count < 1);
833 dev = sblock->pagev[0]->dev;
834 fs_info = sblock->sctx->fs_info;
835
836 path = btrfs_alloc_path();
837 if (!path)
838 return;
839
840 swarn.physical = sblock->pagev[0]->physical;
841 swarn.logical = sblock->pagev[0]->logical;
842 swarn.errstr = errstr;
843 swarn.dev = NULL;
844
845 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
846 &flags);
847 if (ret < 0)
848 goto out;
849
850 extent_item_pos = swarn.logical - found_key.objectid;
851 swarn.extent_item_size = found_key.offset;
852
853 eb = path->nodes[0];
854 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
855 item_size = btrfs_item_size_nr(eb, path->slots[0]);
856
857 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
858 do {
859 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
860 item_size, &ref_root,
861 &ref_level);
862 btrfs_warn_in_rcu(fs_info,
863"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
864 errstr, swarn.logical,
865 rcu_str_deref(dev->name),
866 swarn.physical,
867 ref_level ? "node" : "leaf",
868 ret < 0 ? -1 : ref_level,
869 ret < 0 ? -1 : ref_root);
870 } while (ret != 1);
871 btrfs_release_path(path);
872 } else {
873 btrfs_release_path(path);
874 swarn.path = path;
875 swarn.dev = dev;
876 iterate_extent_inodes(fs_info, found_key.objectid,
877 extent_item_pos, 1,
878 scrub_print_warning_inode, &swarn, false);
879 }
880
881out:
882 btrfs_free_path(path);
883}
884
885static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
886{
887 struct page *page = NULL;
888 unsigned long index;
889 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
890 int ret;
891 int corrected = 0;
892 struct btrfs_key key;
893 struct inode *inode = NULL;
894 struct btrfs_fs_info *fs_info;
895 u64 end = offset + PAGE_SIZE - 1;
896 struct btrfs_root *local_root;
897 int srcu_index;
898
899 key.objectid = root;
900 key.type = BTRFS_ROOT_ITEM_KEY;
901 key.offset = (u64)-1;
902
903 fs_info = fixup->root->fs_info;
904 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
905
906 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
907 if (IS_ERR(local_root)) {
908 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
909 return PTR_ERR(local_root);
910 }
911
912 key.type = BTRFS_INODE_ITEM_KEY;
913 key.objectid = inum;
914 key.offset = 0;
915 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
916 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
917 if (IS_ERR(inode))
918 return PTR_ERR(inode);
919
920 index = offset >> PAGE_SHIFT;
921
922 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
923 if (!page) {
924 ret = -ENOMEM;
925 goto out;
926 }
927
928 if (PageUptodate(page)) {
929 if (PageDirty(page)) {
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946 ret = -EIO;
947 goto out;
948 }
949 ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
950 fixup->logical, page,
951 offset - page_offset(page),
952 fixup->mirror_num);
953 unlock_page(page);
954 corrected = !ret;
955 } else {
956
957
958
959
960
961 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
962 EXTENT_DAMAGED);
963 if (ret) {
964
965 WARN_ON(ret > 0);
966 if (ret > 0)
967 ret = -EFAULT;
968 goto out;
969 }
970
971 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
972 btrfs_get_extent,
973 fixup->mirror_num);
974 wait_on_page_locked(page);
975
976 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
977 end, EXTENT_DAMAGED, 0, NULL);
978 if (!corrected)
979 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
980 EXTENT_DAMAGED);
981 }
982
983out:
984 if (page)
985 put_page(page);
986
987 iput(inode);
988
989 if (ret < 0)
990 return ret;
991
992 if (ret == 0 && corrected) {
993
994
995
996
997 return 1;
998 }
999
1000 return -EIO;
1001}
1002
1003static void scrub_fixup_nodatasum(struct btrfs_work *work)
1004{
1005 struct btrfs_fs_info *fs_info;
1006 int ret;
1007 struct scrub_fixup_nodatasum *fixup;
1008 struct scrub_ctx *sctx;
1009 struct btrfs_trans_handle *trans = NULL;
1010 struct btrfs_path *path;
1011 int uncorrectable = 0;
1012
1013 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
1014 sctx = fixup->sctx;
1015 fs_info = fixup->root->fs_info;
1016
1017 path = btrfs_alloc_path();
1018 if (!path) {
1019 spin_lock(&sctx->stat_lock);
1020 ++sctx->stat.malloc_errors;
1021 spin_unlock(&sctx->stat_lock);
1022 uncorrectable = 1;
1023 goto out;
1024 }
1025
1026 trans = btrfs_join_transaction(fixup->root);
1027 if (IS_ERR(trans)) {
1028 uncorrectable = 1;
1029 goto out;
1030 }
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041 ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
1042 scrub_fixup_readpage, fixup, false);
1043 if (ret < 0) {
1044 uncorrectable = 1;
1045 goto out;
1046 }
1047 WARN_ON(ret != 1);
1048
1049 spin_lock(&sctx->stat_lock);
1050 ++sctx->stat.corrected_errors;
1051 spin_unlock(&sctx->stat_lock);
1052
1053out:
1054 if (trans && !IS_ERR(trans))
1055 btrfs_end_transaction(trans);
1056 if (uncorrectable) {
1057 spin_lock(&sctx->stat_lock);
1058 ++sctx->stat.uncorrectable_errors;
1059 spin_unlock(&sctx->stat_lock);
1060 btrfs_dev_replace_stats_inc(
1061 &fs_info->dev_replace.num_uncorrectable_read_errors);
1062 btrfs_err_rl_in_rcu(fs_info,
1063 "unable to fixup (nodatasum) error at logical %llu on dev %s",
1064 fixup->logical, rcu_str_deref(fixup->dev->name));
1065 }
1066
1067 btrfs_free_path(path);
1068 kfree(fixup);
1069
1070 scrub_pending_trans_workers_dec(sctx);
1071}
1072
1073static inline void scrub_get_recover(struct scrub_recover *recover)
1074{
1075 refcount_inc(&recover->refs);
1076}
1077
1078static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
1079 struct scrub_recover *recover)
1080{
1081 if (refcount_dec_and_test(&recover->refs)) {
1082 btrfs_bio_counter_dec(fs_info);
1083 btrfs_put_bbio(recover->bbio);
1084 kfree(recover);
1085 }
1086}
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
1097{
1098 struct scrub_ctx *sctx = sblock_to_check->sctx;
1099 struct btrfs_device *dev;
1100 struct btrfs_fs_info *fs_info;
1101 u64 logical;
1102 unsigned int failed_mirror_index;
1103 unsigned int is_metadata;
1104 unsigned int have_csum;
1105 struct scrub_block *sblocks_for_recheck;
1106 struct scrub_block *sblock_bad;
1107 int ret;
1108 int mirror_index;
1109 int page_num;
1110 int success;
1111 bool full_stripe_locked;
1112 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
1113 DEFAULT_RATELIMIT_BURST);
1114
1115 BUG_ON(sblock_to_check->page_count < 1);
1116 fs_info = sctx->fs_info;
1117 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
1118
1119
1120
1121
1122
1123 spin_lock(&sctx->stat_lock);
1124 ++sctx->stat.super_errors;
1125 spin_unlock(&sctx->stat_lock);
1126 return 0;
1127 }
1128 logical = sblock_to_check->pagev[0]->logical;
1129 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
1130 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
1131 is_metadata = !(sblock_to_check->pagev[0]->flags &
1132 BTRFS_EXTENT_FLAG_DATA);
1133 have_csum = sblock_to_check->pagev[0]->have_csum;
1134 dev = sblock_to_check->pagev[0]->dev;
1135
1136
1137
1138
1139
1140
1141
1142
1143 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
1144 if (ret < 0) {
1145 spin_lock(&sctx->stat_lock);
1146 if (ret == -ENOMEM)
1147 sctx->stat.malloc_errors++;
1148 sctx->stat.read_errors++;
1149 sctx->stat.uncorrectable_errors++;
1150 spin_unlock(&sctx->stat_lock);
1151 return ret;
1152 }
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
1184 sizeof(*sblocks_for_recheck), GFP_NOFS);
1185 if (!sblocks_for_recheck) {
1186 spin_lock(&sctx->stat_lock);
1187 sctx->stat.malloc_errors++;
1188 sctx->stat.read_errors++;
1189 sctx->stat.uncorrectable_errors++;
1190 spin_unlock(&sctx->stat_lock);
1191 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
1192 goto out;
1193 }
1194
1195
1196 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
1197 if (ret) {
1198 spin_lock(&sctx->stat_lock);
1199 sctx->stat.read_errors++;
1200 sctx->stat.uncorrectable_errors++;
1201 spin_unlock(&sctx->stat_lock);
1202 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
1203 goto out;
1204 }
1205 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
1206 sblock_bad = sblocks_for_recheck + failed_mirror_index;
1207
1208
1209 scrub_recheck_block(fs_info, sblock_bad, 1);
1210
1211 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
1212 sblock_bad->no_io_error_seen) {
1213
1214
1215
1216
1217
1218
1219
1220
1221 spin_lock(&sctx->stat_lock);
1222 sctx->stat.unverified_errors++;
1223 sblock_to_check->data_corrected = 1;
1224 spin_unlock(&sctx->stat_lock);
1225
1226 if (sctx->is_dev_replace)
1227 scrub_write_block_to_dev_replace(sblock_bad);
1228 goto out;
1229 }
1230
1231 if (!sblock_bad->no_io_error_seen) {
1232 spin_lock(&sctx->stat_lock);
1233 sctx->stat.read_errors++;
1234 spin_unlock(&sctx->stat_lock);
1235 if (__ratelimit(&_rs))
1236 scrub_print_warning("i/o error", sblock_to_check);
1237 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
1238 } else if (sblock_bad->checksum_error) {
1239 spin_lock(&sctx->stat_lock);
1240 sctx->stat.csum_errors++;
1241 spin_unlock(&sctx->stat_lock);
1242 if (__ratelimit(&_rs))
1243 scrub_print_warning("checksum error", sblock_to_check);
1244 btrfs_dev_stat_inc_and_print(dev,
1245 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1246 } else if (sblock_bad->header_error) {
1247 spin_lock(&sctx->stat_lock);
1248 sctx->stat.verify_errors++;
1249 spin_unlock(&sctx->stat_lock);
1250 if (__ratelimit(&_rs))
1251 scrub_print_warning("checksum/header error",
1252 sblock_to_check);
1253 if (sblock_bad->generation_error)
1254 btrfs_dev_stat_inc_and_print(dev,
1255 BTRFS_DEV_STAT_GENERATION_ERRS);
1256 else
1257 btrfs_dev_stat_inc_and_print(dev,
1258 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1259 }
1260
1261 if (sctx->readonly) {
1262 ASSERT(!sctx->is_dev_replace);
1263 goto out;
1264 }
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274 if (0 && !is_metadata && !have_csum) {
1275 struct scrub_fixup_nodatasum *fixup_nodatasum;
1276
1277 WARN_ON(sctx->is_dev_replace);
1278
1279
1280
1281
1282
1283
1284
1285
1286 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
1287 if (!fixup_nodatasum)
1288 goto did_not_correct_error;
1289 fixup_nodatasum->sctx = sctx;
1290 fixup_nodatasum->dev = dev;
1291 fixup_nodatasum->logical = logical;
1292 fixup_nodatasum->root = fs_info->extent_root;
1293 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1294 scrub_pending_trans_workers_inc(sctx);
1295 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1296 scrub_fixup_nodatasum, NULL, NULL);
1297 btrfs_queue_work(fs_info->scrub_workers,
1298 &fixup_nodatasum->work);
1299 goto out;
1300 }
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317 for (mirror_index = 0; ;mirror_index++) {
1318 struct scrub_block *sblock_other;
1319
1320 if (mirror_index == failed_mirror_index)
1321 continue;
1322
1323
1324 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1325 if (mirror_index >= BTRFS_MAX_MIRRORS)
1326 break;
1327 if (!sblocks_for_recheck[mirror_index].page_count)
1328 break;
1329
1330 sblock_other = sblocks_for_recheck + mirror_index;
1331 } else {
1332 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1333 int max_allowed = r->bbio->num_stripes -
1334 r->bbio->num_tgtdevs;
1335
1336 if (mirror_index >= max_allowed)
1337 break;
1338 if (!sblocks_for_recheck[1].page_count)
1339 break;
1340
1341 ASSERT(failed_mirror_index == 0);
1342 sblock_other = sblocks_for_recheck + 1;
1343 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1344 }
1345
1346
1347 scrub_recheck_block(fs_info, sblock_other, 0);
1348
1349 if (!sblock_other->header_error &&
1350 !sblock_other->checksum_error &&
1351 sblock_other->no_io_error_seen) {
1352 if (sctx->is_dev_replace) {
1353 scrub_write_block_to_dev_replace(sblock_other);
1354 goto corrected_error;
1355 } else {
1356 ret = scrub_repair_block_from_good_copy(
1357 sblock_bad, sblock_other);
1358 if (!ret)
1359 goto corrected_error;
1360 }
1361 }
1362 }
1363
1364 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1365 goto did_not_correct_error;
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391 success = 1;
1392 for (page_num = 0; page_num < sblock_bad->page_count;
1393 page_num++) {
1394 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1395 struct scrub_block *sblock_other = NULL;
1396
1397
1398 if (!page_bad->io_error && !sctx->is_dev_replace)
1399 continue;
1400
1401 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1402
1403
1404
1405
1406
1407
1408
1409 sblock_other = NULL;
1410 } else if (page_bad->io_error) {
1411
1412 for (mirror_index = 0;
1413 mirror_index < BTRFS_MAX_MIRRORS &&
1414 sblocks_for_recheck[mirror_index].page_count > 0;
1415 mirror_index++) {
1416 if (!sblocks_for_recheck[mirror_index].
1417 pagev[page_num]->io_error) {
1418 sblock_other = sblocks_for_recheck +
1419 mirror_index;
1420 break;
1421 }
1422 }
1423 if (!sblock_other)
1424 success = 0;
1425 }
1426
1427 if (sctx->is_dev_replace) {
1428
1429
1430
1431
1432
1433
1434
1435 if (!sblock_other)
1436 sblock_other = sblock_bad;
1437
1438 if (scrub_write_page_to_dev_replace(sblock_other,
1439 page_num) != 0) {
1440 btrfs_dev_replace_stats_inc(
1441 &fs_info->dev_replace.num_write_errors);
1442 success = 0;
1443 }
1444 } else if (sblock_other) {
1445 ret = scrub_repair_page_from_good_copy(sblock_bad,
1446 sblock_other,
1447 page_num, 0);
1448 if (0 == ret)
1449 page_bad->io_error = 0;
1450 else
1451 success = 0;
1452 }
1453 }
1454
1455 if (success && !sctx->is_dev_replace) {
1456 if (is_metadata || have_csum) {
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466 scrub_recheck_block(fs_info, sblock_bad, 1);
1467 if (!sblock_bad->header_error &&
1468 !sblock_bad->checksum_error &&
1469 sblock_bad->no_io_error_seen)
1470 goto corrected_error;
1471 else
1472 goto did_not_correct_error;
1473 } else {
1474corrected_error:
1475 spin_lock(&sctx->stat_lock);
1476 sctx->stat.corrected_errors++;
1477 sblock_to_check->data_corrected = 1;
1478 spin_unlock(&sctx->stat_lock);
1479 btrfs_err_rl_in_rcu(fs_info,
1480 "fixed up error at logical %llu on dev %s",
1481 logical, rcu_str_deref(dev->name));
1482 }
1483 } else {
1484did_not_correct_error:
1485 spin_lock(&sctx->stat_lock);
1486 sctx->stat.uncorrectable_errors++;
1487 spin_unlock(&sctx->stat_lock);
1488 btrfs_err_rl_in_rcu(fs_info,
1489 "unable to fixup (regular) error at logical %llu on dev %s",
1490 logical, rcu_str_deref(dev->name));
1491 }
1492
1493out:
1494 if (sblocks_for_recheck) {
1495 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1496 mirror_index++) {
1497 struct scrub_block *sblock = sblocks_for_recheck +
1498 mirror_index;
1499 struct scrub_recover *recover;
1500 int page_index;
1501
1502 for (page_index = 0; page_index < sblock->page_count;
1503 page_index++) {
1504 sblock->pagev[page_index]->sblock = NULL;
1505 recover = sblock->pagev[page_index]->recover;
1506 if (recover) {
1507 scrub_put_recover(fs_info, recover);
1508 sblock->pagev[page_index]->recover =
1509 NULL;
1510 }
1511 scrub_page_put(sblock->pagev[page_index]);
1512 }
1513 }
1514 kfree(sblocks_for_recheck);
1515 }
1516
1517 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1518 if (ret < 0)
1519 return ret;
1520 return 0;
1521}
1522
1523static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1524{
1525 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1526 return 2;
1527 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1528 return 3;
1529 else
1530 return (int)bbio->num_stripes;
1531}
1532
1533static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1534 u64 *raid_map,
1535 u64 mapped_length,
1536 int nstripes, int mirror,
1537 int *stripe_index,
1538 u64 *stripe_offset)
1539{
1540 int i;
1541
1542 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1543
1544 for (i = 0; i < nstripes; i++) {
1545 if (raid_map[i] == RAID6_Q_STRIPE ||
1546 raid_map[i] == RAID5_P_STRIPE)
1547 continue;
1548
1549 if (logical >= raid_map[i] &&
1550 logical < raid_map[i] + mapped_length)
1551 break;
1552 }
1553
1554 *stripe_index = i;
1555 *stripe_offset = logical - raid_map[i];
1556 } else {
1557
1558 *stripe_index = mirror;
1559 *stripe_offset = 0;
1560 }
1561}
1562
1563static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1564 struct scrub_block *sblocks_for_recheck)
1565{
1566 struct scrub_ctx *sctx = original_sblock->sctx;
1567 struct btrfs_fs_info *fs_info = sctx->fs_info;
1568 u64 length = original_sblock->page_count * PAGE_SIZE;
1569 u64 logical = original_sblock->pagev[0]->logical;
1570 u64 generation = original_sblock->pagev[0]->generation;
1571 u64 flags = original_sblock->pagev[0]->flags;
1572 u64 have_csum = original_sblock->pagev[0]->have_csum;
1573 struct scrub_recover *recover;
1574 struct btrfs_bio *bbio;
1575 u64 sublen;
1576 u64 mapped_length;
1577 u64 stripe_offset;
1578 int stripe_index;
1579 int page_index = 0;
1580 int mirror_index;
1581 int nmirrors;
1582 int ret;
1583
1584
1585
1586
1587
1588
1589
1590 while (length > 0) {
1591 sublen = min_t(u64, length, PAGE_SIZE);
1592 mapped_length = sublen;
1593 bbio = NULL;
1594
1595
1596
1597
1598
1599 btrfs_bio_counter_inc_blocked(fs_info);
1600 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1601 logical, &mapped_length, &bbio);
1602 if (ret || !bbio || mapped_length < sublen) {
1603 btrfs_put_bbio(bbio);
1604 btrfs_bio_counter_dec(fs_info);
1605 return -EIO;
1606 }
1607
1608 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1609 if (!recover) {
1610 btrfs_put_bbio(bbio);
1611 btrfs_bio_counter_dec(fs_info);
1612 return -ENOMEM;
1613 }
1614
1615 refcount_set(&recover->refs, 1);
1616 recover->bbio = bbio;
1617 recover->map_length = mapped_length;
1618
1619 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1620
1621 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1622
1623 for (mirror_index = 0; mirror_index < nmirrors;
1624 mirror_index++) {
1625 struct scrub_block *sblock;
1626 struct scrub_page *page;
1627
1628 sblock = sblocks_for_recheck + mirror_index;
1629 sblock->sctx = sctx;
1630
1631 page = kzalloc(sizeof(*page), GFP_NOFS);
1632 if (!page) {
1633leave_nomem:
1634 spin_lock(&sctx->stat_lock);
1635 sctx->stat.malloc_errors++;
1636 spin_unlock(&sctx->stat_lock);
1637 scrub_put_recover(fs_info, recover);
1638 return -ENOMEM;
1639 }
1640 scrub_page_get(page);
1641 sblock->pagev[page_index] = page;
1642 page->sblock = sblock;
1643 page->flags = flags;
1644 page->generation = generation;
1645 page->logical = logical;
1646 page->have_csum = have_csum;
1647 if (have_csum)
1648 memcpy(page->csum,
1649 original_sblock->pagev[0]->csum,
1650 sctx->csum_size);
1651
1652 scrub_stripe_index_and_offset(logical,
1653 bbio->map_type,
1654 bbio->raid_map,
1655 mapped_length,
1656 bbio->num_stripes -
1657 bbio->num_tgtdevs,
1658 mirror_index,
1659 &stripe_index,
1660 &stripe_offset);
1661 page->physical = bbio->stripes[stripe_index].physical +
1662 stripe_offset;
1663 page->dev = bbio->stripes[stripe_index].dev;
1664
1665 BUG_ON(page_index >= original_sblock->page_count);
1666 page->physical_for_dev_replace =
1667 original_sblock->pagev[page_index]->
1668 physical_for_dev_replace;
1669
1670 page->mirror_num = mirror_index + 1;
1671 sblock->page_count++;
1672 page->page = alloc_page(GFP_NOFS);
1673 if (!page->page)
1674 goto leave_nomem;
1675
1676 scrub_get_recover(recover);
1677 page->recover = recover;
1678 }
1679 scrub_put_recover(fs_info, recover);
1680 length -= sublen;
1681 logical += sublen;
1682 page_index++;
1683 }
1684
1685 return 0;
1686}
1687
1688static void scrub_bio_wait_endio(struct bio *bio)
1689{
1690 complete(bio->bi_private);
1691}
1692
1693static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1694 struct bio *bio,
1695 struct scrub_page *page)
1696{
1697 DECLARE_COMPLETION_ONSTACK(done);
1698 int ret;
1699 int mirror_num;
1700
1701 bio->bi_iter.bi_sector = page->logical >> 9;
1702 bio->bi_private = &done;
1703 bio->bi_end_io = scrub_bio_wait_endio;
1704
1705 mirror_num = page->sblock->pagev[0]->mirror_num;
1706 ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
1707 page->recover->map_length,
1708 mirror_num, 0);
1709 if (ret)
1710 return ret;
1711
1712 wait_for_completion_io(&done);
1713 return blk_status_to_errno(bio->bi_status);
1714}
1715
1716static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1717 struct scrub_block *sblock)
1718{
1719 struct scrub_page *first_page = sblock->pagev[0];
1720 struct bio *bio;
1721 int page_num;
1722
1723
1724 ASSERT(first_page->dev);
1725 if (!first_page->dev->bdev)
1726 goto out;
1727
1728 bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
1729 bio_set_dev(bio, first_page->dev->bdev);
1730
1731 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1732 struct scrub_page *page = sblock->pagev[page_num];
1733
1734 WARN_ON(!page->page);
1735 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1736 }
1737
1738 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1739 bio_put(bio);
1740 goto out;
1741 }
1742
1743 bio_put(bio);
1744
1745 scrub_recheck_block_checksum(sblock);
1746
1747 return;
1748out:
1749 for (page_num = 0; page_num < sblock->page_count; page_num++)
1750 sblock->pagev[page_num]->io_error = 1;
1751
1752 sblock->no_io_error_seen = 0;
1753}
1754
1755
1756
1757
1758
1759
1760
1761
1762static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1763 struct scrub_block *sblock,
1764 int retry_failed_mirror)
1765{
1766 int page_num;
1767
1768 sblock->no_io_error_seen = 1;
1769
1770
1771 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1772 return scrub_recheck_block_on_raid56(fs_info, sblock);
1773
1774 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1775 struct bio *bio;
1776 struct scrub_page *page = sblock->pagev[page_num];
1777
1778 if (page->dev->bdev == NULL) {
1779 page->io_error = 1;
1780 sblock->no_io_error_seen = 0;
1781 continue;
1782 }
1783
1784 WARN_ON(!page->page);
1785 bio = btrfs_io_bio_alloc(1);
1786 bio_set_dev(bio, page->dev->bdev);
1787
1788 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1789 bio->bi_iter.bi_sector = page->physical >> 9;
1790 bio->bi_opf = REQ_OP_READ;
1791
1792 if (btrfsic_submit_bio_wait(bio)) {
1793 page->io_error = 1;
1794 sblock->no_io_error_seen = 0;
1795 }
1796
1797 bio_put(bio);
1798 }
1799
1800 if (sblock->no_io_error_seen)
1801 scrub_recheck_block_checksum(sblock);
1802}
1803
1804static inline int scrub_check_fsid(u8 fsid[],
1805 struct scrub_page *spage)
1806{
1807 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1808 int ret;
1809
1810 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1811 return !ret;
1812}
1813
1814static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1815{
1816 sblock->header_error = 0;
1817 sblock->checksum_error = 0;
1818 sblock->generation_error = 0;
1819
1820 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1821 scrub_checksum_data(sblock);
1822 else
1823 scrub_checksum_tree_block(sblock);
1824}
1825
1826static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1827 struct scrub_block *sblock_good)
1828{
1829 int page_num;
1830 int ret = 0;
1831
1832 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1833 int ret_sub;
1834
1835 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1836 sblock_good,
1837 page_num, 1);
1838 if (ret_sub)
1839 ret = ret_sub;
1840 }
1841
1842 return ret;
1843}
1844
1845static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1846 struct scrub_block *sblock_good,
1847 int page_num, int force_write)
1848{
1849 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1850 struct scrub_page *page_good = sblock_good->pagev[page_num];
1851 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1852
1853 BUG_ON(page_bad->page == NULL);
1854 BUG_ON(page_good->page == NULL);
1855 if (force_write || sblock_bad->header_error ||
1856 sblock_bad->checksum_error || page_bad->io_error) {
1857 struct bio *bio;
1858 int ret;
1859
1860 if (!page_bad->dev->bdev) {
1861 btrfs_warn_rl(fs_info,
1862 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1863 return -EIO;
1864 }
1865
1866 bio = btrfs_io_bio_alloc(1);
1867 bio_set_dev(bio, page_bad->dev->bdev);
1868 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1869 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1870
1871 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1872 if (PAGE_SIZE != ret) {
1873 bio_put(bio);
1874 return -EIO;
1875 }
1876
1877 if (btrfsic_submit_bio_wait(bio)) {
1878 btrfs_dev_stat_inc_and_print(page_bad->dev,
1879 BTRFS_DEV_STAT_WRITE_ERRS);
1880 btrfs_dev_replace_stats_inc(
1881 &fs_info->dev_replace.num_write_errors);
1882 bio_put(bio);
1883 return -EIO;
1884 }
1885 bio_put(bio);
1886 }
1887
1888 return 0;
1889}
1890
1891static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1892{
1893 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1894 int page_num;
1895
1896
1897
1898
1899
1900 if (sblock->sparity)
1901 return;
1902
1903 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1904 int ret;
1905
1906 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1907 if (ret)
1908 btrfs_dev_replace_stats_inc(
1909 &fs_info->dev_replace.num_write_errors);
1910 }
1911}
1912
1913static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1914 int page_num)
1915{
1916 struct scrub_page *spage = sblock->pagev[page_num];
1917
1918 BUG_ON(spage->page == NULL);
1919 if (spage->io_error) {
1920 void *mapped_buffer = kmap_atomic(spage->page);
1921
1922 clear_page(mapped_buffer);
1923 flush_dcache_page(spage->page);
1924 kunmap_atomic(mapped_buffer);
1925 }
1926 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1927}
1928
1929static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1930 struct scrub_page *spage)
1931{
1932 struct scrub_bio *sbio;
1933 int ret;
1934
1935 mutex_lock(&sctx->wr_lock);
1936again:
1937 if (!sctx->wr_curr_bio) {
1938 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1939 GFP_KERNEL);
1940 if (!sctx->wr_curr_bio) {
1941 mutex_unlock(&sctx->wr_lock);
1942 return -ENOMEM;
1943 }
1944 sctx->wr_curr_bio->sctx = sctx;
1945 sctx->wr_curr_bio->page_count = 0;
1946 }
1947 sbio = sctx->wr_curr_bio;
1948 if (sbio->page_count == 0) {
1949 struct bio *bio;
1950
1951 sbio->physical = spage->physical_for_dev_replace;
1952 sbio->logical = spage->logical;
1953 sbio->dev = sctx->wr_tgtdev;
1954 bio = sbio->bio;
1955 if (!bio) {
1956 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1957 sbio->bio = bio;
1958 }
1959
1960 bio->bi_private = sbio;
1961 bio->bi_end_io = scrub_wr_bio_end_io;
1962 bio_set_dev(bio, sbio->dev->bdev);
1963 bio->bi_iter.bi_sector = sbio->physical >> 9;
1964 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1965 sbio->status = 0;
1966 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1967 spage->physical_for_dev_replace ||
1968 sbio->logical + sbio->page_count * PAGE_SIZE !=
1969 spage->logical) {
1970 scrub_wr_submit(sctx);
1971 goto again;
1972 }
1973
1974 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1975 if (ret != PAGE_SIZE) {
1976 if (sbio->page_count < 1) {
1977 bio_put(sbio->bio);
1978 sbio->bio = NULL;
1979 mutex_unlock(&sctx->wr_lock);
1980 return -EIO;
1981 }
1982 scrub_wr_submit(sctx);
1983 goto again;
1984 }
1985
1986 sbio->pagev[sbio->page_count] = spage;
1987 scrub_page_get(spage);
1988 sbio->page_count++;
1989 if (sbio->page_count == sctx->pages_per_wr_bio)
1990 scrub_wr_submit(sctx);
1991 mutex_unlock(&sctx->wr_lock);
1992
1993 return 0;
1994}
1995
1996static void scrub_wr_submit(struct scrub_ctx *sctx)
1997{
1998 struct scrub_bio *sbio;
1999
2000 if (!sctx->wr_curr_bio)
2001 return;
2002
2003 sbio = sctx->wr_curr_bio;
2004 sctx->wr_curr_bio = NULL;
2005 WARN_ON(!sbio->bio->bi_disk);
2006 scrub_pending_bio_inc(sctx);
2007
2008
2009
2010
2011 btrfsic_submit_bio(sbio->bio);
2012}
2013
2014static void scrub_wr_bio_end_io(struct bio *bio)
2015{
2016 struct scrub_bio *sbio = bio->bi_private;
2017 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2018
2019 sbio->status = bio->bi_status;
2020 sbio->bio = bio;
2021
2022 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
2023 scrub_wr_bio_end_io_worker, NULL, NULL);
2024 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
2025}
2026
2027static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
2028{
2029 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2030 struct scrub_ctx *sctx = sbio->sctx;
2031 int i;
2032
2033 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
2034 if (sbio->status) {
2035 struct btrfs_dev_replace *dev_replace =
2036 &sbio->sctx->fs_info->dev_replace;
2037
2038 for (i = 0; i < sbio->page_count; i++) {
2039 struct scrub_page *spage = sbio->pagev[i];
2040
2041 spage->io_error = 1;
2042 btrfs_dev_replace_stats_inc(&dev_replace->
2043 num_write_errors);
2044 }
2045 }
2046
2047 for (i = 0; i < sbio->page_count; i++)
2048 scrub_page_put(sbio->pagev[i]);
2049
2050 bio_put(sbio->bio);
2051 kfree(sbio);
2052 scrub_pending_bio_dec(sctx);
2053}
2054
2055static int scrub_checksum(struct scrub_block *sblock)
2056{
2057 u64 flags;
2058 int ret;
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068 sblock->header_error = 0;
2069 sblock->generation_error = 0;
2070 sblock->checksum_error = 0;
2071
2072 WARN_ON(sblock->page_count < 1);
2073 flags = sblock->pagev[0]->flags;
2074 ret = 0;
2075 if (flags & BTRFS_EXTENT_FLAG_DATA)
2076 ret = scrub_checksum_data(sblock);
2077 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
2078 ret = scrub_checksum_tree_block(sblock);
2079 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
2080 (void)scrub_checksum_super(sblock);
2081 else
2082 WARN_ON(1);
2083 if (ret)
2084 scrub_handle_errored_block(sblock);
2085
2086 return ret;
2087}
2088
2089static int scrub_checksum_data(struct scrub_block *sblock)
2090{
2091 struct scrub_ctx *sctx = sblock->sctx;
2092 u8 csum[BTRFS_CSUM_SIZE];
2093 u8 *on_disk_csum;
2094 struct page *page;
2095 void *buffer;
2096 u32 crc = ~(u32)0;
2097 u64 len;
2098 int index;
2099
2100 BUG_ON(sblock->page_count < 1);
2101 if (!sblock->pagev[0]->have_csum)
2102 return 0;
2103
2104 on_disk_csum = sblock->pagev[0]->csum;
2105 page = sblock->pagev[0]->page;
2106 buffer = kmap_atomic(page);
2107
2108 len = sctx->fs_info->sectorsize;
2109 index = 0;
2110 for (;;) {
2111 u64 l = min_t(u64, len, PAGE_SIZE);
2112
2113 crc = btrfs_csum_data(buffer, crc, l);
2114 kunmap_atomic(buffer);
2115 len -= l;
2116 if (len == 0)
2117 break;
2118 index++;
2119 BUG_ON(index >= sblock->page_count);
2120 BUG_ON(!sblock->pagev[index]->page);
2121 page = sblock->pagev[index]->page;
2122 buffer = kmap_atomic(page);
2123 }
2124
2125 btrfs_csum_final(crc, csum);
2126 if (memcmp(csum, on_disk_csum, sctx->csum_size))
2127 sblock->checksum_error = 1;
2128
2129 return sblock->checksum_error;
2130}
2131
2132static int scrub_checksum_tree_block(struct scrub_block *sblock)
2133{
2134 struct scrub_ctx *sctx = sblock->sctx;
2135 struct btrfs_header *h;
2136 struct btrfs_fs_info *fs_info = sctx->fs_info;
2137 u8 calculated_csum[BTRFS_CSUM_SIZE];
2138 u8 on_disk_csum[BTRFS_CSUM_SIZE];
2139 struct page *page;
2140 void *mapped_buffer;
2141 u64 mapped_size;
2142 void *p;
2143 u32 crc = ~(u32)0;
2144 u64 len;
2145 int index;
2146
2147 BUG_ON(sblock->page_count < 1);
2148 page = sblock->pagev[0]->page;
2149 mapped_buffer = kmap_atomic(page);
2150 h = (struct btrfs_header *)mapped_buffer;
2151 memcpy(on_disk_csum, h->csum, sctx->csum_size);
2152
2153
2154
2155
2156
2157
2158 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
2159 sblock->header_error = 1;
2160
2161 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
2162 sblock->header_error = 1;
2163 sblock->generation_error = 1;
2164 }
2165
2166 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
2167 sblock->header_error = 1;
2168
2169 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
2170 BTRFS_UUID_SIZE))
2171 sblock->header_error = 1;
2172
2173 len = sctx->fs_info->nodesize - BTRFS_CSUM_SIZE;
2174 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
2175 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
2176 index = 0;
2177 for (;;) {
2178 u64 l = min_t(u64, len, mapped_size);
2179
2180 crc = btrfs_csum_data(p, crc, l);
2181 kunmap_atomic(mapped_buffer);
2182 len -= l;
2183 if (len == 0)
2184 break;
2185 index++;
2186 BUG_ON(index >= sblock->page_count);
2187 BUG_ON(!sblock->pagev[index]->page);
2188 page = sblock->pagev[index]->page;
2189 mapped_buffer = kmap_atomic(page);
2190 mapped_size = PAGE_SIZE;
2191 p = mapped_buffer;
2192 }
2193
2194 btrfs_csum_final(crc, calculated_csum);
2195 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
2196 sblock->checksum_error = 1;
2197
2198 return sblock->header_error || sblock->checksum_error;
2199}
2200
2201static int scrub_checksum_super(struct scrub_block *sblock)
2202{
2203 struct btrfs_super_block *s;
2204 struct scrub_ctx *sctx = sblock->sctx;
2205 u8 calculated_csum[BTRFS_CSUM_SIZE];
2206 u8 on_disk_csum[BTRFS_CSUM_SIZE];
2207 struct page *page;
2208 void *mapped_buffer;
2209 u64 mapped_size;
2210 void *p;
2211 u32 crc = ~(u32)0;
2212 int fail_gen = 0;
2213 int fail_cor = 0;
2214 u64 len;
2215 int index;
2216
2217 BUG_ON(sblock->page_count < 1);
2218 page = sblock->pagev[0]->page;
2219 mapped_buffer = kmap_atomic(page);
2220 s = (struct btrfs_super_block *)mapped_buffer;
2221 memcpy(on_disk_csum, s->csum, sctx->csum_size);
2222
2223 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
2224 ++fail_cor;
2225
2226 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
2227 ++fail_gen;
2228
2229 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
2230 ++fail_cor;
2231
2232 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
2233 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
2234 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
2235 index = 0;
2236 for (;;) {
2237 u64 l = min_t(u64, len, mapped_size);
2238
2239 crc = btrfs_csum_data(p, crc, l);
2240 kunmap_atomic(mapped_buffer);
2241 len -= l;
2242 if (len == 0)
2243 break;
2244 index++;
2245 BUG_ON(index >= sblock->page_count);
2246 BUG_ON(!sblock->pagev[index]->page);
2247 page = sblock->pagev[index]->page;
2248 mapped_buffer = kmap_atomic(page);
2249 mapped_size = PAGE_SIZE;
2250 p = mapped_buffer;
2251 }
2252
2253 btrfs_csum_final(crc, calculated_csum);
2254 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
2255 ++fail_cor;
2256
2257 if (fail_cor + fail_gen) {
2258
2259
2260
2261
2262
2263 spin_lock(&sctx->stat_lock);
2264 ++sctx->stat.super_errors;
2265 spin_unlock(&sctx->stat_lock);
2266 if (fail_cor)
2267 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
2268 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2269 else
2270 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
2271 BTRFS_DEV_STAT_GENERATION_ERRS);
2272 }
2273
2274 return fail_cor + fail_gen;
2275}
2276
2277static void scrub_block_get(struct scrub_block *sblock)
2278{
2279 refcount_inc(&sblock->refs);
2280}
2281
2282static void scrub_block_put(struct scrub_block *sblock)
2283{
2284 if (refcount_dec_and_test(&sblock->refs)) {
2285 int i;
2286
2287 if (sblock->sparity)
2288 scrub_parity_put(sblock->sparity);
2289
2290 for (i = 0; i < sblock->page_count; i++)
2291 scrub_page_put(sblock->pagev[i]);
2292 kfree(sblock);
2293 }
2294}
2295
2296static void scrub_page_get(struct scrub_page *spage)
2297{
2298 atomic_inc(&spage->refs);
2299}
2300
2301static void scrub_page_put(struct scrub_page *spage)
2302{
2303 if (atomic_dec_and_test(&spage->refs)) {
2304 if (spage->page)
2305 __free_page(spage->page);
2306 kfree(spage);
2307 }
2308}
2309
2310static void scrub_submit(struct scrub_ctx *sctx)
2311{
2312 struct scrub_bio *sbio;
2313
2314 if (sctx->curr == -1)
2315 return;
2316
2317 sbio = sctx->bios[sctx->curr];
2318 sctx->curr = -1;
2319 scrub_pending_bio_inc(sctx);
2320 btrfsic_submit_bio(sbio->bio);
2321}
2322
2323static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2324 struct scrub_page *spage)
2325{
2326 struct scrub_block *sblock = spage->sblock;
2327 struct scrub_bio *sbio;
2328 int ret;
2329
2330again:
2331
2332
2333
2334 while (sctx->curr == -1) {
2335 spin_lock(&sctx->list_lock);
2336 sctx->curr = sctx->first_free;
2337 if (sctx->curr != -1) {
2338 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2339 sctx->bios[sctx->curr]->next_free = -1;
2340 sctx->bios[sctx->curr]->page_count = 0;
2341 spin_unlock(&sctx->list_lock);
2342 } else {
2343 spin_unlock(&sctx->list_lock);
2344 wait_event(sctx->list_wait, sctx->first_free != -1);
2345 }
2346 }
2347 sbio = sctx->bios[sctx->curr];
2348 if (sbio->page_count == 0) {
2349 struct bio *bio;
2350
2351 sbio->physical = spage->physical;
2352 sbio->logical = spage->logical;
2353 sbio->dev = spage->dev;
2354 bio = sbio->bio;
2355 if (!bio) {
2356 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2357 sbio->bio = bio;
2358 }
2359
2360 bio->bi_private = sbio;
2361 bio->bi_end_io = scrub_bio_end_io;
2362 bio_set_dev(bio, sbio->dev->bdev);
2363 bio->bi_iter.bi_sector = sbio->physical >> 9;
2364 bio_set_op_attrs(bio, REQ_OP_READ, 0);
2365 sbio->status = 0;
2366 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2367 spage->physical ||
2368 sbio->logical + sbio->page_count * PAGE_SIZE !=
2369 spage->logical ||
2370 sbio->dev != spage->dev) {
2371 scrub_submit(sctx);
2372 goto again;
2373 }
2374
2375 sbio->pagev[sbio->page_count] = spage;
2376 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2377 if (ret != PAGE_SIZE) {
2378 if (sbio->page_count < 1) {
2379 bio_put(sbio->bio);
2380 sbio->bio = NULL;
2381 return -EIO;
2382 }
2383 scrub_submit(sctx);
2384 goto again;
2385 }
2386
2387 scrub_block_get(sblock);
2388 atomic_inc(&sblock->outstanding_pages);
2389 sbio->page_count++;
2390 if (sbio->page_count == sctx->pages_per_rd_bio)
2391 scrub_submit(sctx);
2392
2393 return 0;
2394}
2395
2396static void scrub_missing_raid56_end_io(struct bio *bio)
2397{
2398 struct scrub_block *sblock = bio->bi_private;
2399 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2400
2401 if (bio->bi_status)
2402 sblock->no_io_error_seen = 0;
2403
2404 bio_put(bio);
2405
2406 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2407}
2408
2409static void scrub_missing_raid56_worker(struct btrfs_work *work)
2410{
2411 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2412 struct scrub_ctx *sctx = sblock->sctx;
2413 struct btrfs_fs_info *fs_info = sctx->fs_info;
2414 u64 logical;
2415 struct btrfs_device *dev;
2416
2417 logical = sblock->pagev[0]->logical;
2418 dev = sblock->pagev[0]->dev;
2419
2420 if (sblock->no_io_error_seen)
2421 scrub_recheck_block_checksum(sblock);
2422
2423 if (!sblock->no_io_error_seen) {
2424 spin_lock(&sctx->stat_lock);
2425 sctx->stat.read_errors++;
2426 spin_unlock(&sctx->stat_lock);
2427 btrfs_err_rl_in_rcu(fs_info,
2428 "IO error rebuilding logical %llu for dev %s",
2429 logical, rcu_str_deref(dev->name));
2430 } else if (sblock->header_error || sblock->checksum_error) {
2431 spin_lock(&sctx->stat_lock);
2432 sctx->stat.uncorrectable_errors++;
2433 spin_unlock(&sctx->stat_lock);
2434 btrfs_err_rl_in_rcu(fs_info,
2435 "failed to rebuild valid logical %llu for dev %s",
2436 logical, rcu_str_deref(dev->name));
2437 } else {
2438 scrub_write_block_to_dev_replace(sblock);
2439 }
2440
2441 scrub_block_put(sblock);
2442
2443 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2444 mutex_lock(&sctx->wr_lock);
2445 scrub_wr_submit(sctx);
2446 mutex_unlock(&sctx->wr_lock);
2447 }
2448
2449 scrub_pending_bio_dec(sctx);
2450}
2451
2452static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2453{
2454 struct scrub_ctx *sctx = sblock->sctx;
2455 struct btrfs_fs_info *fs_info = sctx->fs_info;
2456 u64 length = sblock->page_count * PAGE_SIZE;
2457 u64 logical = sblock->pagev[0]->logical;
2458 struct btrfs_bio *bbio = NULL;
2459 struct bio *bio;
2460 struct btrfs_raid_bio *rbio;
2461 int ret;
2462 int i;
2463
2464 btrfs_bio_counter_inc_blocked(fs_info);
2465 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2466 &length, &bbio);
2467 if (ret || !bbio || !bbio->raid_map)
2468 goto bbio_out;
2469
2470 if (WARN_ON(!sctx->is_dev_replace ||
2471 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2472
2473
2474
2475
2476
2477
2478 goto bbio_out;
2479 }
2480
2481 bio = btrfs_io_bio_alloc(0);
2482 bio->bi_iter.bi_sector = logical >> 9;
2483 bio->bi_private = sblock;
2484 bio->bi_end_io = scrub_missing_raid56_end_io;
2485
2486 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2487 if (!rbio)
2488 goto rbio_out;
2489
2490 for (i = 0; i < sblock->page_count; i++) {
2491 struct scrub_page *spage = sblock->pagev[i];
2492
2493 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2494 }
2495
2496 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2497 scrub_missing_raid56_worker, NULL, NULL);
2498 scrub_block_get(sblock);
2499 scrub_pending_bio_inc(sctx);
2500 raid56_submit_missing_rbio(rbio);
2501 return;
2502
2503rbio_out:
2504 bio_put(bio);
2505bbio_out:
2506 btrfs_bio_counter_dec(fs_info);
2507 btrfs_put_bbio(bbio);
2508 spin_lock(&sctx->stat_lock);
2509 sctx->stat.malloc_errors++;
2510 spin_unlock(&sctx->stat_lock);
2511}
2512
2513static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2514 u64 physical, struct btrfs_device *dev, u64 flags,
2515 u64 gen, int mirror_num, u8 *csum, int force,
2516 u64 physical_for_dev_replace)
2517{
2518 struct scrub_block *sblock;
2519 int index;
2520
2521 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2522 if (!sblock) {
2523 spin_lock(&sctx->stat_lock);
2524 sctx->stat.malloc_errors++;
2525 spin_unlock(&sctx->stat_lock);
2526 return -ENOMEM;
2527 }
2528
2529
2530
2531 refcount_set(&sblock->refs, 1);
2532 sblock->sctx = sctx;
2533 sblock->no_io_error_seen = 1;
2534
2535 for (index = 0; len > 0; index++) {
2536 struct scrub_page *spage;
2537 u64 l = min_t(u64, len, PAGE_SIZE);
2538
2539 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2540 if (!spage) {
2541leave_nomem:
2542 spin_lock(&sctx->stat_lock);
2543 sctx->stat.malloc_errors++;
2544 spin_unlock(&sctx->stat_lock);
2545 scrub_block_put(sblock);
2546 return -ENOMEM;
2547 }
2548 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2549 scrub_page_get(spage);
2550 sblock->pagev[index] = spage;
2551 spage->sblock = sblock;
2552 spage->dev = dev;
2553 spage->flags = flags;
2554 spage->generation = gen;
2555 spage->logical = logical;
2556 spage->physical = physical;
2557 spage->physical_for_dev_replace = physical_for_dev_replace;
2558 spage->mirror_num = mirror_num;
2559 if (csum) {
2560 spage->have_csum = 1;
2561 memcpy(spage->csum, csum, sctx->csum_size);
2562 } else {
2563 spage->have_csum = 0;
2564 }
2565 sblock->page_count++;
2566 spage->page = alloc_page(GFP_KERNEL);
2567 if (!spage->page)
2568 goto leave_nomem;
2569 len -= l;
2570 logical += l;
2571 physical += l;
2572 physical_for_dev_replace += l;
2573 }
2574
2575 WARN_ON(sblock->page_count == 0);
2576 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2577
2578
2579
2580
2581 scrub_missing_raid56_pages(sblock);
2582 } else {
2583 for (index = 0; index < sblock->page_count; index++) {
2584 struct scrub_page *spage = sblock->pagev[index];
2585 int ret;
2586
2587 ret = scrub_add_page_to_rd_bio(sctx, spage);
2588 if (ret) {
2589 scrub_block_put(sblock);
2590 return ret;
2591 }
2592 }
2593
2594 if (force)
2595 scrub_submit(sctx);
2596 }
2597
2598
2599 scrub_block_put(sblock);
2600 return 0;
2601}
2602
2603static void scrub_bio_end_io(struct bio *bio)
2604{
2605 struct scrub_bio *sbio = bio->bi_private;
2606 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2607
2608 sbio->status = bio->bi_status;
2609 sbio->bio = bio;
2610
2611 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2612}
2613
2614static void scrub_bio_end_io_worker(struct btrfs_work *work)
2615{
2616 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2617 struct scrub_ctx *sctx = sbio->sctx;
2618 int i;
2619
2620 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2621 if (sbio->status) {
2622 for (i = 0; i < sbio->page_count; i++) {
2623 struct scrub_page *spage = sbio->pagev[i];
2624
2625 spage->io_error = 1;
2626 spage->sblock->no_io_error_seen = 0;
2627 }
2628 }
2629
2630
2631 for (i = 0; i < sbio->page_count; i++) {
2632 struct scrub_page *spage = sbio->pagev[i];
2633 struct scrub_block *sblock = spage->sblock;
2634
2635 if (atomic_dec_and_test(&sblock->outstanding_pages))
2636 scrub_block_complete(sblock);
2637 scrub_block_put(sblock);
2638 }
2639
2640 bio_put(sbio->bio);
2641 sbio->bio = NULL;
2642 spin_lock(&sctx->list_lock);
2643 sbio->next_free = sctx->first_free;
2644 sctx->first_free = sbio->index;
2645 spin_unlock(&sctx->list_lock);
2646
2647 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2648 mutex_lock(&sctx->wr_lock);
2649 scrub_wr_submit(sctx);
2650 mutex_unlock(&sctx->wr_lock);
2651 }
2652
2653 scrub_pending_bio_dec(sctx);
2654}
2655
2656static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2657 unsigned long *bitmap,
2658 u64 start, u64 len)
2659{
2660 u64 offset;
2661 u64 nsectors64;
2662 u32 nsectors;
2663 int sectorsize = sparity->sctx->fs_info->sectorsize;
2664
2665 if (len >= sparity->stripe_len) {
2666 bitmap_set(bitmap, 0, sparity->nsectors);
2667 return;
2668 }
2669
2670 start -= sparity->logic_start;
2671 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2672 offset = div_u64(offset, sectorsize);
2673 nsectors64 = div_u64(len, sectorsize);
2674
2675 ASSERT(nsectors64 < UINT_MAX);
2676 nsectors = (u32)nsectors64;
2677
2678 if (offset + nsectors <= sparity->nsectors) {
2679 bitmap_set(bitmap, offset, nsectors);
2680 return;
2681 }
2682
2683 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2684 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2685}
2686
2687static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2688 u64 start, u64 len)
2689{
2690 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2691}
2692
2693static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2694 u64 start, u64 len)
2695{
2696 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2697}
2698
2699static void scrub_block_complete(struct scrub_block *sblock)
2700{
2701 int corrupted = 0;
2702
2703 if (!sblock->no_io_error_seen) {
2704 corrupted = 1;
2705 scrub_handle_errored_block(sblock);
2706 } else {
2707
2708
2709
2710
2711
2712 corrupted = scrub_checksum(sblock);
2713 if (!corrupted && sblock->sctx->is_dev_replace)
2714 scrub_write_block_to_dev_replace(sblock);
2715 }
2716
2717 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2718 u64 start = sblock->pagev[0]->logical;
2719 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2720 PAGE_SIZE;
2721
2722 scrub_parity_mark_sectors_error(sblock->sparity,
2723 start, end - start);
2724 }
2725}
2726
2727static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2728{
2729 struct btrfs_ordered_sum *sum = NULL;
2730 unsigned long index;
2731 unsigned long num_sectors;
2732
2733 while (!list_empty(&sctx->csum_list)) {
2734 sum = list_first_entry(&sctx->csum_list,
2735 struct btrfs_ordered_sum, list);
2736 if (sum->bytenr > logical)
2737 return 0;
2738 if (sum->bytenr + sum->len > logical)
2739 break;
2740
2741 ++sctx->stat.csum_discards;
2742 list_del(&sum->list);
2743 kfree(sum);
2744 sum = NULL;
2745 }
2746 if (!sum)
2747 return 0;
2748
2749 index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
2750 ASSERT(index < UINT_MAX);
2751
2752 num_sectors = sum->len / sctx->fs_info->sectorsize;
2753 memcpy(csum, sum->sums + index, sctx->csum_size);
2754 if (index == num_sectors - 1) {
2755 list_del(&sum->list);
2756 kfree(sum);
2757 }
2758 return 1;
2759}
2760
2761
2762static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2763 u64 logical, u64 len,
2764 u64 physical, struct btrfs_device *dev, u64 flags,
2765 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2766{
2767 int ret;
2768 u8 csum[BTRFS_CSUM_SIZE];
2769 u32 blocksize;
2770
2771 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2772 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2773 blocksize = map->stripe_len;
2774 else
2775 blocksize = sctx->fs_info->sectorsize;
2776 spin_lock(&sctx->stat_lock);
2777 sctx->stat.data_extents_scrubbed++;
2778 sctx->stat.data_bytes_scrubbed += len;
2779 spin_unlock(&sctx->stat_lock);
2780 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2781 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2782 blocksize = map->stripe_len;
2783 else
2784 blocksize = sctx->fs_info->nodesize;
2785 spin_lock(&sctx->stat_lock);
2786 sctx->stat.tree_extents_scrubbed++;
2787 sctx->stat.tree_bytes_scrubbed += len;
2788 spin_unlock(&sctx->stat_lock);
2789 } else {
2790 blocksize = sctx->fs_info->sectorsize;
2791 WARN_ON(1);
2792 }
2793
2794 while (len) {
2795 u64 l = min_t(u64, len, blocksize);
2796 int have_csum = 0;
2797
2798 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2799
2800 have_csum = scrub_find_csum(sctx, logical, csum);
2801 if (have_csum == 0)
2802 ++sctx->stat.no_csum;
2803 if (0 && sctx->is_dev_replace && !have_csum) {
2804 ret = copy_nocow_pages(sctx, logical, l,
2805 mirror_num,
2806 physical_for_dev_replace);
2807 goto behind_scrub_pages;
2808 }
2809 }
2810 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2811 mirror_num, have_csum ? csum : NULL, 0,
2812 physical_for_dev_replace);
2813behind_scrub_pages:
2814 if (ret)
2815 return ret;
2816 len -= l;
2817 logical += l;
2818 physical += l;
2819 physical_for_dev_replace += l;
2820 }
2821 return 0;
2822}
2823
2824static int scrub_pages_for_parity(struct scrub_parity *sparity,
2825 u64 logical, u64 len,
2826 u64 physical, struct btrfs_device *dev,
2827 u64 flags, u64 gen, int mirror_num, u8 *csum)
2828{
2829 struct scrub_ctx *sctx = sparity->sctx;
2830 struct scrub_block *sblock;
2831 int index;
2832
2833 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2834 if (!sblock) {
2835 spin_lock(&sctx->stat_lock);
2836 sctx->stat.malloc_errors++;
2837 spin_unlock(&sctx->stat_lock);
2838 return -ENOMEM;
2839 }
2840
2841
2842
2843 refcount_set(&sblock->refs, 1);
2844 sblock->sctx = sctx;
2845 sblock->no_io_error_seen = 1;
2846 sblock->sparity = sparity;
2847 scrub_parity_get(sparity);
2848
2849 for (index = 0; len > 0; index++) {
2850 struct scrub_page *spage;
2851 u64 l = min_t(u64, len, PAGE_SIZE);
2852
2853 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2854 if (!spage) {
2855leave_nomem:
2856 spin_lock(&sctx->stat_lock);
2857 sctx->stat.malloc_errors++;
2858 spin_unlock(&sctx->stat_lock);
2859 scrub_block_put(sblock);
2860 return -ENOMEM;
2861 }
2862 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2863
2864 scrub_page_get(spage);
2865 sblock->pagev[index] = spage;
2866
2867 scrub_page_get(spage);
2868 list_add_tail(&spage->list, &sparity->spages);
2869 spage->sblock = sblock;
2870 spage->dev = dev;
2871 spage->flags = flags;
2872 spage->generation = gen;
2873 spage->logical = logical;
2874 spage->physical = physical;
2875 spage->mirror_num = mirror_num;
2876 if (csum) {
2877 spage->have_csum = 1;
2878 memcpy(spage->csum, csum, sctx->csum_size);
2879 } else {
2880 spage->have_csum = 0;
2881 }
2882 sblock->page_count++;
2883 spage->page = alloc_page(GFP_KERNEL);
2884 if (!spage->page)
2885 goto leave_nomem;
2886 len -= l;
2887 logical += l;
2888 physical += l;
2889 }
2890
2891 WARN_ON(sblock->page_count == 0);
2892 for (index = 0; index < sblock->page_count; index++) {
2893 struct scrub_page *spage = sblock->pagev[index];
2894 int ret;
2895
2896 ret = scrub_add_page_to_rd_bio(sctx, spage);
2897 if (ret) {
2898 scrub_block_put(sblock);
2899 return ret;
2900 }
2901 }
2902
2903
2904 scrub_block_put(sblock);
2905 return 0;
2906}
2907
2908static int scrub_extent_for_parity(struct scrub_parity *sparity,
2909 u64 logical, u64 len,
2910 u64 physical, struct btrfs_device *dev,
2911 u64 flags, u64 gen, int mirror_num)
2912{
2913 struct scrub_ctx *sctx = sparity->sctx;
2914 int ret;
2915 u8 csum[BTRFS_CSUM_SIZE];
2916 u32 blocksize;
2917
2918 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2919 scrub_parity_mark_sectors_error(sparity, logical, len);
2920 return 0;
2921 }
2922
2923 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2924 blocksize = sparity->stripe_len;
2925 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2926 blocksize = sparity->stripe_len;
2927 } else {
2928 blocksize = sctx->fs_info->sectorsize;
2929 WARN_ON(1);
2930 }
2931
2932 while (len) {
2933 u64 l = min_t(u64, len, blocksize);
2934 int have_csum = 0;
2935
2936 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2937
2938 have_csum = scrub_find_csum(sctx, logical, csum);
2939 if (have_csum == 0)
2940 goto skip;
2941 }
2942 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2943 flags, gen, mirror_num,
2944 have_csum ? csum : NULL);
2945 if (ret)
2946 return ret;
2947skip:
2948 len -= l;
2949 logical += l;
2950 physical += l;
2951 }
2952 return 0;
2953}
2954
2955
2956
2957
2958
2959
2960
2961
2962static int get_raid56_logic_offset(u64 physical, int num,
2963 struct map_lookup *map, u64 *offset,
2964 u64 *stripe_start)
2965{
2966 int i;
2967 int j = 0;
2968 u64 stripe_nr;
2969 u64 last_offset;
2970 u32 stripe_index;
2971 u32 rot;
2972
2973 last_offset = (physical - map->stripes[num].physical) *
2974 nr_data_stripes(map);
2975 if (stripe_start)
2976 *stripe_start = last_offset;
2977
2978 *offset = last_offset;
2979 for (i = 0; i < nr_data_stripes(map); i++) {
2980 *offset = last_offset + i * map->stripe_len;
2981
2982 stripe_nr = div64_u64(*offset, map->stripe_len);
2983 stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
2984
2985
2986 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2987
2988 rot += i;
2989 stripe_index = rot % map->num_stripes;
2990 if (stripe_index == num)
2991 return 0;
2992 if (stripe_index < num)
2993 j++;
2994 }
2995 *offset = last_offset + j * map->stripe_len;
2996 return 1;
2997}
2998
2999static void scrub_free_parity(struct scrub_parity *sparity)
3000{
3001 struct scrub_ctx *sctx = sparity->sctx;
3002 struct scrub_page *curr, *next;
3003 int nbits;
3004
3005 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
3006 if (nbits) {
3007 spin_lock(&sctx->stat_lock);
3008 sctx->stat.read_errors += nbits;
3009 sctx->stat.uncorrectable_errors += nbits;
3010 spin_unlock(&sctx->stat_lock);
3011 }
3012
3013 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
3014 list_del_init(&curr->list);
3015 scrub_page_put(curr);
3016 }
3017
3018 kfree(sparity);
3019}
3020
3021static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
3022{
3023 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
3024 work);
3025 struct scrub_ctx *sctx = sparity->sctx;
3026
3027 scrub_free_parity(sparity);
3028 scrub_pending_bio_dec(sctx);
3029}
3030
3031static void scrub_parity_bio_endio(struct bio *bio)
3032{
3033 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
3034 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
3035
3036 if (bio->bi_status)
3037 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
3038 sparity->nsectors);
3039
3040 bio_put(bio);
3041
3042 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
3043 scrub_parity_bio_endio_worker, NULL, NULL);
3044 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
3045}
3046
3047static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
3048{
3049 struct scrub_ctx *sctx = sparity->sctx;
3050 struct btrfs_fs_info *fs_info = sctx->fs_info;
3051 struct bio *bio;
3052 struct btrfs_raid_bio *rbio;
3053 struct btrfs_bio *bbio = NULL;
3054 u64 length;
3055 int ret;
3056
3057 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
3058 sparity->nsectors))
3059 goto out;
3060
3061 length = sparity->logic_end - sparity->logic_start;
3062
3063 btrfs_bio_counter_inc_blocked(fs_info);
3064 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
3065 &length, &bbio);
3066 if (ret || !bbio || !bbio->raid_map)
3067 goto bbio_out;
3068
3069 bio = btrfs_io_bio_alloc(0);
3070 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
3071 bio->bi_private = sparity;
3072 bio->bi_end_io = scrub_parity_bio_endio;
3073
3074 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
3075 length, sparity->scrub_dev,
3076 sparity->dbitmap,
3077 sparity->nsectors);
3078 if (!rbio)
3079 goto rbio_out;
3080
3081 scrub_pending_bio_inc(sctx);
3082 raid56_parity_submit_scrub_rbio(rbio);
3083 return;
3084
3085rbio_out:
3086 bio_put(bio);
3087bbio_out:
3088 btrfs_bio_counter_dec(fs_info);
3089 btrfs_put_bbio(bbio);
3090 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
3091 sparity->nsectors);
3092 spin_lock(&sctx->stat_lock);
3093 sctx->stat.malloc_errors++;
3094 spin_unlock(&sctx->stat_lock);
3095out:
3096 scrub_free_parity(sparity);
3097}
3098
3099static inline int scrub_calc_parity_bitmap_len(int nsectors)
3100{
3101 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
3102}
3103
3104static void scrub_parity_get(struct scrub_parity *sparity)
3105{
3106 refcount_inc(&sparity->refs);
3107}
3108
3109static void scrub_parity_put(struct scrub_parity *sparity)
3110{
3111 if (!refcount_dec_and_test(&sparity->refs))
3112 return;
3113
3114 scrub_parity_check_and_repair(sparity);
3115}
3116
3117static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
3118 struct map_lookup *map,
3119 struct btrfs_device *sdev,
3120 struct btrfs_path *path,
3121 u64 logic_start,
3122 u64 logic_end)
3123{
3124 struct btrfs_fs_info *fs_info = sctx->fs_info;
3125 struct btrfs_root *root = fs_info->extent_root;
3126 struct btrfs_root *csum_root = fs_info->csum_root;
3127 struct btrfs_extent_item *extent;
3128 struct btrfs_bio *bbio = NULL;
3129 u64 flags;
3130 int ret;
3131 int slot;
3132 struct extent_buffer *l;
3133 struct btrfs_key key;
3134 u64 generation;
3135 u64 extent_logical;
3136 u64 extent_physical;
3137 u64 extent_len;
3138 u64 mapped_length;
3139 struct btrfs_device *extent_dev;
3140 struct scrub_parity *sparity;
3141 int nsectors;
3142 int bitmap_len;
3143 int extent_mirror_num;
3144 int stop_loop = 0;
3145
3146 nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
3147 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
3148 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
3149 GFP_NOFS);
3150 if (!sparity) {
3151 spin_lock(&sctx->stat_lock);
3152 sctx->stat.malloc_errors++;
3153 spin_unlock(&sctx->stat_lock);
3154 return -ENOMEM;
3155 }
3156
3157 sparity->stripe_len = map->stripe_len;
3158 sparity->nsectors = nsectors;
3159 sparity->sctx = sctx;
3160 sparity->scrub_dev = sdev;
3161 sparity->logic_start = logic_start;
3162 sparity->logic_end = logic_end;
3163 refcount_set(&sparity->refs, 1);
3164 INIT_LIST_HEAD(&sparity->spages);
3165 sparity->dbitmap = sparity->bitmap;
3166 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
3167
3168 ret = 0;
3169 while (logic_start < logic_end) {
3170 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3171 key.type = BTRFS_METADATA_ITEM_KEY;
3172 else
3173 key.type = BTRFS_EXTENT_ITEM_KEY;
3174 key.objectid = logic_start;
3175 key.offset = (u64)-1;
3176
3177 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3178 if (ret < 0)
3179 goto out;
3180
3181 if (ret > 0) {
3182 ret = btrfs_previous_extent_item(root, path, 0);
3183 if (ret < 0)
3184 goto out;
3185 if (ret > 0) {
3186 btrfs_release_path(path);
3187 ret = btrfs_search_slot(NULL, root, &key,
3188 path, 0, 0);
3189 if (ret < 0)
3190 goto out;
3191 }
3192 }
3193
3194 stop_loop = 0;
3195 while (1) {
3196 u64 bytes;
3197
3198 l = path->nodes[0];
3199 slot = path->slots[0];
3200 if (slot >= btrfs_header_nritems(l)) {
3201 ret = btrfs_next_leaf(root, path);
3202 if (ret == 0)
3203 continue;
3204 if (ret < 0)
3205 goto out;
3206
3207 stop_loop = 1;
3208 break;
3209 }
3210 btrfs_item_key_to_cpu(l, &key, slot);
3211
3212 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3213 key.type != BTRFS_METADATA_ITEM_KEY)
3214 goto next;
3215
3216 if (key.type == BTRFS_METADATA_ITEM_KEY)
3217 bytes = fs_info->nodesize;
3218 else
3219 bytes = key.offset;
3220
3221 if (key.objectid + bytes <= logic_start)
3222 goto next;
3223
3224 if (key.objectid >= logic_end) {
3225 stop_loop = 1;
3226 break;
3227 }
3228
3229 while (key.objectid >= logic_start + map->stripe_len)
3230 logic_start += map->stripe_len;
3231
3232 extent = btrfs_item_ptr(l, slot,
3233 struct btrfs_extent_item);
3234 flags = btrfs_extent_flags(l, extent);
3235 generation = btrfs_extent_generation(l, extent);
3236
3237 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3238 (key.objectid < logic_start ||
3239 key.objectid + bytes >
3240 logic_start + map->stripe_len)) {
3241 btrfs_err(fs_info,
3242 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3243 key.objectid, logic_start);
3244 spin_lock(&sctx->stat_lock);
3245 sctx->stat.uncorrectable_errors++;
3246 spin_unlock(&sctx->stat_lock);
3247 goto next;
3248 }
3249again:
3250 extent_logical = key.objectid;
3251 extent_len = bytes;
3252
3253 if (extent_logical < logic_start) {
3254 extent_len -= logic_start - extent_logical;
3255 extent_logical = logic_start;
3256 }
3257
3258 if (extent_logical + extent_len >
3259 logic_start + map->stripe_len)
3260 extent_len = logic_start + map->stripe_len -
3261 extent_logical;
3262
3263 scrub_parity_mark_sectors_data(sparity, extent_logical,
3264 extent_len);
3265
3266 mapped_length = extent_len;
3267 bbio = NULL;
3268 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
3269 extent_logical, &mapped_length, &bbio,
3270 0);
3271 if (!ret) {
3272 if (!bbio || mapped_length < extent_len)
3273 ret = -EIO;
3274 }
3275 if (ret) {
3276 btrfs_put_bbio(bbio);
3277 goto out;
3278 }
3279 extent_physical = bbio->stripes[0].physical;
3280 extent_mirror_num = bbio->mirror_num;
3281 extent_dev = bbio->stripes[0].dev;
3282 btrfs_put_bbio(bbio);
3283
3284 ret = btrfs_lookup_csums_range(csum_root,
3285 extent_logical,
3286 extent_logical + extent_len - 1,
3287 &sctx->csum_list, 1);
3288 if (ret)
3289 goto out;
3290
3291 ret = scrub_extent_for_parity(sparity, extent_logical,
3292 extent_len,
3293 extent_physical,
3294 extent_dev, flags,
3295 generation,
3296 extent_mirror_num);
3297
3298 scrub_free_csums(sctx);
3299
3300 if (ret)
3301 goto out;
3302
3303 if (extent_logical + extent_len <
3304 key.objectid + bytes) {
3305 logic_start += map->stripe_len;
3306
3307 if (logic_start >= logic_end) {
3308 stop_loop = 1;
3309 break;
3310 }
3311
3312 if (logic_start < key.objectid + bytes) {
3313 cond_resched();
3314 goto again;
3315 }
3316 }
3317next:
3318 path->slots[0]++;
3319 }
3320
3321 btrfs_release_path(path);
3322
3323 if (stop_loop)
3324 break;
3325
3326 logic_start += map->stripe_len;
3327 }
3328out:
3329 if (ret < 0)
3330 scrub_parity_mark_sectors_error(sparity, logic_start,
3331 logic_end - logic_start);
3332 scrub_parity_put(sparity);
3333 scrub_submit(sctx);
3334 mutex_lock(&sctx->wr_lock);
3335 scrub_wr_submit(sctx);
3336 mutex_unlock(&sctx->wr_lock);
3337
3338 btrfs_release_path(path);
3339 return ret < 0 ? ret : 0;
3340}
3341
3342static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3343 struct map_lookup *map,
3344 struct btrfs_device *scrub_dev,
3345 int num, u64 base, u64 length,
3346 int is_dev_replace)
3347{
3348 struct btrfs_path *path, *ppath;
3349 struct btrfs_fs_info *fs_info = sctx->fs_info;
3350 struct btrfs_root *root = fs_info->extent_root;
3351 struct btrfs_root *csum_root = fs_info->csum_root;
3352 struct btrfs_extent_item *extent;
3353 struct blk_plug plug;
3354 u64 flags;
3355 int ret;
3356 int slot;
3357 u64 nstripes;
3358 struct extent_buffer *l;
3359 u64 physical;
3360 u64 logical;
3361 u64 logic_end;
3362 u64 physical_end;
3363 u64 generation;
3364 int mirror_num;
3365 struct reada_control *reada1;
3366 struct reada_control *reada2;
3367 struct btrfs_key key;
3368 struct btrfs_key key_end;
3369 u64 increment = map->stripe_len;
3370 u64 offset;
3371 u64 extent_logical;
3372 u64 extent_physical;
3373 u64 extent_len;
3374 u64 stripe_logical;
3375 u64 stripe_end;
3376 struct btrfs_device *extent_dev;
3377 int extent_mirror_num;
3378 int stop_loop = 0;
3379
3380 physical = map->stripes[num].physical;
3381 offset = 0;
3382 nstripes = div64_u64(length, map->stripe_len);
3383 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3384 offset = map->stripe_len * num;
3385 increment = map->stripe_len * map->num_stripes;
3386 mirror_num = 1;
3387 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3388 int factor = map->num_stripes / map->sub_stripes;
3389 offset = map->stripe_len * (num / map->sub_stripes);
3390 increment = map->stripe_len * factor;
3391 mirror_num = num % map->sub_stripes + 1;
3392 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3393 increment = map->stripe_len;
3394 mirror_num = num % map->num_stripes + 1;
3395 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3396 increment = map->stripe_len;
3397 mirror_num = num % map->num_stripes + 1;
3398 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3399 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3400 increment = map->stripe_len * nr_data_stripes(map);
3401 mirror_num = 1;
3402 } else {
3403 increment = map->stripe_len;
3404 mirror_num = 1;
3405 }
3406
3407 path = btrfs_alloc_path();
3408 if (!path)
3409 return -ENOMEM;
3410
3411 ppath = btrfs_alloc_path();
3412 if (!ppath) {
3413 btrfs_free_path(path);
3414 return -ENOMEM;
3415 }
3416
3417
3418
3419
3420
3421
3422 path->search_commit_root = 1;
3423 path->skip_locking = 1;
3424
3425 ppath->search_commit_root = 1;
3426 ppath->skip_locking = 1;
3427
3428
3429
3430
3431
3432 logical = base + offset;
3433 physical_end = physical + nstripes * map->stripe_len;
3434 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3435 get_raid56_logic_offset(physical_end, num,
3436 map, &logic_end, NULL);
3437 logic_end += base;
3438 } else {
3439 logic_end = logical + increment * nstripes;
3440 }
3441 wait_event(sctx->list_wait,
3442 atomic_read(&sctx->bios_in_flight) == 0);
3443 scrub_blocked_if_needed(fs_info);
3444
3445
3446 key.objectid = logical;
3447 key.type = BTRFS_EXTENT_ITEM_KEY;
3448 key.offset = (u64)0;
3449 key_end.objectid = logic_end;
3450 key_end.type = BTRFS_METADATA_ITEM_KEY;
3451 key_end.offset = (u64)-1;
3452 reada1 = btrfs_reada_add(root, &key, &key_end);
3453
3454 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3455 key.type = BTRFS_EXTENT_CSUM_KEY;
3456 key.offset = logical;
3457 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3458 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3459 key_end.offset = logic_end;
3460 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3461
3462 if (!IS_ERR(reada1))
3463 btrfs_reada_wait(reada1);
3464 if (!IS_ERR(reada2))
3465 btrfs_reada_wait(reada2);
3466
3467
3468
3469
3470
3471
3472 blk_start_plug(&plug);
3473
3474
3475
3476
3477 ret = 0;
3478 while (physical < physical_end) {
3479
3480
3481
3482 if (atomic_read(&fs_info->scrub_cancel_req) ||
3483 atomic_read(&sctx->cancel_req)) {
3484 ret = -ECANCELED;
3485 goto out;
3486 }
3487
3488
3489
3490 if (atomic_read(&fs_info->scrub_pause_req)) {
3491
3492 sctx->flush_all_writes = true;
3493 scrub_submit(sctx);
3494 mutex_lock(&sctx->wr_lock);
3495 scrub_wr_submit(sctx);
3496 mutex_unlock(&sctx->wr_lock);
3497 wait_event(sctx->list_wait,
3498 atomic_read(&sctx->bios_in_flight) == 0);
3499 sctx->flush_all_writes = false;
3500 scrub_blocked_if_needed(fs_info);
3501 }
3502
3503 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3504 ret = get_raid56_logic_offset(physical, num, map,
3505 &logical,
3506 &stripe_logical);
3507 logical += base;
3508 if (ret) {
3509
3510 stripe_logical += base;
3511 stripe_end = stripe_logical + increment;
3512 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3513 ppath, stripe_logical,
3514 stripe_end);
3515 if (ret)
3516 goto out;
3517 goto skip;
3518 }
3519 }
3520
3521 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3522 key.type = BTRFS_METADATA_ITEM_KEY;
3523 else
3524 key.type = BTRFS_EXTENT_ITEM_KEY;
3525 key.objectid = logical;
3526 key.offset = (u64)-1;
3527
3528 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3529 if (ret < 0)
3530 goto out;
3531
3532 if (ret > 0) {
3533 ret = btrfs_previous_extent_item(root, path, 0);
3534 if (ret < 0)
3535 goto out;
3536 if (ret > 0) {
3537
3538
3539 btrfs_release_path(path);
3540 ret = btrfs_search_slot(NULL, root, &key,
3541 path, 0, 0);
3542 if (ret < 0)
3543 goto out;
3544 }
3545 }
3546
3547 stop_loop = 0;
3548 while (1) {
3549 u64 bytes;
3550
3551 l = path->nodes[0];
3552 slot = path->slots[0];
3553 if (slot >= btrfs_header_nritems(l)) {
3554 ret = btrfs_next_leaf(root, path);
3555 if (ret == 0)
3556 continue;
3557 if (ret < 0)
3558 goto out;
3559
3560 stop_loop = 1;
3561 break;
3562 }
3563 btrfs_item_key_to_cpu(l, &key, slot);
3564
3565 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3566 key.type != BTRFS_METADATA_ITEM_KEY)
3567 goto next;
3568
3569 if (key.type == BTRFS_METADATA_ITEM_KEY)
3570 bytes = fs_info->nodesize;
3571 else
3572 bytes = key.offset;
3573
3574 if (key.objectid + bytes <= logical)
3575 goto next;
3576
3577 if (key.objectid >= logical + map->stripe_len) {
3578
3579 if (key.objectid >= logic_end)
3580 stop_loop = 1;
3581 break;
3582 }
3583
3584 extent = btrfs_item_ptr(l, slot,
3585 struct btrfs_extent_item);
3586 flags = btrfs_extent_flags(l, extent);
3587 generation = btrfs_extent_generation(l, extent);
3588
3589 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3590 (key.objectid < logical ||
3591 key.objectid + bytes >
3592 logical + map->stripe_len)) {
3593 btrfs_err(fs_info,
3594 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3595 key.objectid, logical);
3596 spin_lock(&sctx->stat_lock);
3597 sctx->stat.uncorrectable_errors++;
3598 spin_unlock(&sctx->stat_lock);
3599 goto next;
3600 }
3601
3602again:
3603 extent_logical = key.objectid;
3604 extent_len = bytes;
3605
3606
3607
3608
3609 if (extent_logical < logical) {
3610 extent_len -= logical - extent_logical;
3611 extent_logical = logical;
3612 }
3613 if (extent_logical + extent_len >
3614 logical + map->stripe_len) {
3615 extent_len = logical + map->stripe_len -
3616 extent_logical;
3617 }
3618
3619 extent_physical = extent_logical - logical + physical;
3620 extent_dev = scrub_dev;
3621 extent_mirror_num = mirror_num;
3622 if (is_dev_replace)
3623 scrub_remap_extent(fs_info, extent_logical,
3624 extent_len, &extent_physical,
3625 &extent_dev,
3626 &extent_mirror_num);
3627
3628 ret = btrfs_lookup_csums_range(csum_root,
3629 extent_logical,
3630 extent_logical +
3631 extent_len - 1,
3632 &sctx->csum_list, 1);
3633 if (ret)
3634 goto out;
3635
3636 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3637 extent_physical, extent_dev, flags,
3638 generation, extent_mirror_num,
3639 extent_logical - logical + physical);
3640
3641 scrub_free_csums(sctx);
3642
3643 if (ret)
3644 goto out;
3645
3646 if (extent_logical + extent_len <
3647 key.objectid + bytes) {
3648 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3649
3650
3651
3652
3653loop:
3654 physical += map->stripe_len;
3655 ret = get_raid56_logic_offset(physical,
3656 num, map, &logical,
3657 &stripe_logical);
3658 logical += base;
3659
3660 if (ret && physical < physical_end) {
3661 stripe_logical += base;
3662 stripe_end = stripe_logical +
3663 increment;
3664 ret = scrub_raid56_parity(sctx,
3665 map, scrub_dev, ppath,
3666 stripe_logical,
3667 stripe_end);
3668 if (ret)
3669 goto out;
3670 goto loop;
3671 }
3672 } else {
3673 physical += map->stripe_len;
3674 logical += increment;
3675 }
3676 if (logical < key.objectid + bytes) {
3677 cond_resched();
3678 goto again;
3679 }
3680
3681 if (physical >= physical_end) {
3682 stop_loop = 1;
3683 break;
3684 }
3685 }
3686next:
3687 path->slots[0]++;
3688 }
3689 btrfs_release_path(path);
3690skip:
3691 logical += increment;
3692 physical += map->stripe_len;
3693 spin_lock(&sctx->stat_lock);
3694 if (stop_loop)
3695 sctx->stat.last_physical = map->stripes[num].physical +
3696 length;
3697 else
3698 sctx->stat.last_physical = physical;
3699 spin_unlock(&sctx->stat_lock);
3700 if (stop_loop)
3701 break;
3702 }
3703out:
3704
3705 scrub_submit(sctx);
3706 mutex_lock(&sctx->wr_lock);
3707 scrub_wr_submit(sctx);
3708 mutex_unlock(&sctx->wr_lock);
3709
3710 blk_finish_plug(&plug);
3711 btrfs_free_path(path);
3712 btrfs_free_path(ppath);
3713 return ret < 0 ? ret : 0;
3714}
3715
3716static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3717 struct btrfs_device *scrub_dev,
3718 u64 chunk_offset, u64 length,
3719 u64 dev_offset,
3720 struct btrfs_block_group_cache *cache,
3721 int is_dev_replace)
3722{
3723 struct btrfs_fs_info *fs_info = sctx->fs_info;
3724 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
3725 struct map_lookup *map;
3726 struct extent_map *em;
3727 int i;
3728 int ret = 0;
3729
3730 read_lock(&map_tree->map_tree.lock);
3731 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
3732 read_unlock(&map_tree->map_tree.lock);
3733
3734 if (!em) {
3735
3736
3737
3738
3739 spin_lock(&cache->lock);
3740 if (!cache->removed)
3741 ret = -EINVAL;
3742 spin_unlock(&cache->lock);
3743
3744 return ret;
3745 }
3746
3747 map = em->map_lookup;
3748 if (em->start != chunk_offset)
3749 goto out;
3750
3751 if (em->len < length)
3752 goto out;
3753
3754 for (i = 0; i < map->num_stripes; ++i) {
3755 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3756 map->stripes[i].physical == dev_offset) {
3757 ret = scrub_stripe(sctx, map, scrub_dev, i,
3758 chunk_offset, length,
3759 is_dev_replace);
3760 if (ret)
3761 goto out;
3762 }
3763 }
3764out:
3765 free_extent_map(em);
3766
3767 return ret;
3768}
3769
3770static noinline_for_stack
3771int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3772 struct btrfs_device *scrub_dev, u64 start, u64 end,
3773 int is_dev_replace)
3774{
3775 struct btrfs_dev_extent *dev_extent = NULL;
3776 struct btrfs_path *path;
3777 struct btrfs_fs_info *fs_info = sctx->fs_info;
3778 struct btrfs_root *root = fs_info->dev_root;
3779 u64 length;
3780 u64 chunk_offset;
3781 int ret = 0;
3782 int ro_set;
3783 int slot;
3784 struct extent_buffer *l;
3785 struct btrfs_key key;
3786 struct btrfs_key found_key;
3787 struct btrfs_block_group_cache *cache;
3788 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3789
3790 path = btrfs_alloc_path();
3791 if (!path)
3792 return -ENOMEM;
3793
3794 path->reada = READA_FORWARD;
3795 path->search_commit_root = 1;
3796 path->skip_locking = 1;
3797
3798 key.objectid = scrub_dev->devid;
3799 key.offset = 0ull;
3800 key.type = BTRFS_DEV_EXTENT_KEY;
3801
3802 while (1) {
3803 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3804 if (ret < 0)
3805 break;
3806 if (ret > 0) {
3807 if (path->slots[0] >=
3808 btrfs_header_nritems(path->nodes[0])) {
3809 ret = btrfs_next_leaf(root, path);
3810 if (ret < 0)
3811 break;
3812 if (ret > 0) {
3813 ret = 0;
3814 break;
3815 }
3816 } else {
3817 ret = 0;
3818 }
3819 }
3820
3821 l = path->nodes[0];
3822 slot = path->slots[0];
3823
3824 btrfs_item_key_to_cpu(l, &found_key, slot);
3825
3826 if (found_key.objectid != scrub_dev->devid)
3827 break;
3828
3829 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3830 break;
3831
3832 if (found_key.offset >= end)
3833 break;
3834
3835 if (found_key.offset < key.offset)
3836 break;
3837
3838 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3839 length = btrfs_dev_extent_length(l, dev_extent);
3840
3841 if (found_key.offset + length <= start)
3842 goto skip;
3843
3844 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3845
3846
3847
3848
3849
3850 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3851
3852
3853
3854 if (!cache)
3855 goto skip;
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865 scrub_pause_on(fs_info);
3866 ret = btrfs_inc_block_group_ro(fs_info, cache);
3867 if (!ret && is_dev_replace) {
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886 btrfs_wait_block_group_reservations(cache);
3887 btrfs_wait_nocow_writers(cache);
3888 ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
3889 cache->key.objectid,
3890 cache->key.offset);
3891 if (ret > 0) {
3892 struct btrfs_trans_handle *trans;
3893
3894 trans = btrfs_join_transaction(root);
3895 if (IS_ERR(trans))
3896 ret = PTR_ERR(trans);
3897 else
3898 ret = btrfs_commit_transaction(trans);
3899 if (ret) {
3900 scrub_pause_off(fs_info);
3901 btrfs_put_block_group(cache);
3902 break;
3903 }
3904 }
3905 }
3906 scrub_pause_off(fs_info);
3907
3908 if (ret == 0) {
3909 ro_set = 1;
3910 } else if (ret == -ENOSPC) {
3911
3912
3913
3914
3915
3916
3917
3918 ro_set = 0;
3919 } else {
3920 btrfs_warn(fs_info,
3921 "failed setting block group ro: %d", ret);
3922 btrfs_put_block_group(cache);
3923 break;
3924 }
3925
3926 btrfs_dev_replace_write_lock(&fs_info->dev_replace);
3927 dev_replace->cursor_right = found_key.offset + length;
3928 dev_replace->cursor_left = found_key.offset;
3929 dev_replace->item_needs_writeback = 1;
3930 btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
3931 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3932 found_key.offset, cache, is_dev_replace);
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944 sctx->flush_all_writes = true;
3945 scrub_submit(sctx);
3946 mutex_lock(&sctx->wr_lock);
3947 scrub_wr_submit(sctx);
3948 mutex_unlock(&sctx->wr_lock);
3949
3950 wait_event(sctx->list_wait,
3951 atomic_read(&sctx->bios_in_flight) == 0);
3952
3953 scrub_pause_on(fs_info);
3954
3955
3956
3957
3958
3959
3960 wait_event(sctx->list_wait,
3961 atomic_read(&sctx->workers_pending) == 0);
3962 sctx->flush_all_writes = false;
3963
3964 scrub_pause_off(fs_info);
3965
3966 btrfs_dev_replace_write_lock(&fs_info->dev_replace);
3967 dev_replace->cursor_left = dev_replace->cursor_right;
3968 dev_replace->item_needs_writeback = 1;
3969 btrfs_dev_replace_write_unlock(&fs_info->dev_replace);
3970
3971 if (ro_set)
3972 btrfs_dec_block_group_ro(cache);
3973
3974
3975
3976
3977
3978
3979
3980
3981 spin_lock(&cache->lock);
3982 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3983 btrfs_block_group_used(&cache->item) == 0) {
3984 spin_unlock(&cache->lock);
3985 spin_lock(&fs_info->unused_bgs_lock);
3986 if (list_empty(&cache->bg_list)) {
3987 btrfs_get_block_group(cache);
3988 trace_btrfs_add_unused_block_group(cache);
3989 list_add_tail(&cache->bg_list,
3990 &fs_info->unused_bgs);
3991 }
3992 spin_unlock(&fs_info->unused_bgs_lock);
3993 } else {
3994 spin_unlock(&cache->lock);
3995 }
3996
3997 btrfs_put_block_group(cache);
3998 if (ret)
3999 break;
4000 if (is_dev_replace &&
4001 atomic64_read(&dev_replace->num_write_errors) > 0) {
4002 ret = -EIO;
4003 break;
4004 }
4005 if (sctx->stat.malloc_errors > 0) {
4006 ret = -ENOMEM;
4007 break;
4008 }
4009skip:
4010 key.offset = found_key.offset + length;
4011 btrfs_release_path(path);
4012 }
4013
4014 btrfs_free_path(path);
4015
4016 return ret;
4017}
4018
4019static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
4020 struct btrfs_device *scrub_dev)
4021{
4022 int i;
4023 u64 bytenr;
4024 u64 gen;
4025 int ret;
4026 struct btrfs_fs_info *fs_info = sctx->fs_info;
4027
4028 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
4029 return -EIO;
4030
4031
4032 if (scrub_dev->fs_devices != fs_info->fs_devices)
4033 gen = scrub_dev->generation;
4034 else
4035 gen = fs_info->last_trans_committed;
4036
4037 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
4038 bytenr = btrfs_sb_offset(i);
4039 if (bytenr + BTRFS_SUPER_INFO_SIZE >
4040 scrub_dev->commit_total_bytes)
4041 break;
4042
4043 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
4044 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
4045 NULL, 1, bytenr);
4046 if (ret)
4047 return ret;
4048 }
4049 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
4050
4051 return 0;
4052}
4053
4054
4055
4056
4057static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
4058 int is_dev_replace)
4059{
4060 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
4061 int max_active = fs_info->thread_pool_size;
4062
4063 if (fs_info->scrub_workers_refcnt == 0) {
4064 fs_info->scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub",
4065 flags, is_dev_replace ? 1 : max_active, 4);
4066 if (!fs_info->scrub_workers)
4067 goto fail_scrub_workers;
4068
4069 fs_info->scrub_wr_completion_workers =
4070 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
4071 max_active, 2);
4072 if (!fs_info->scrub_wr_completion_workers)
4073 goto fail_scrub_wr_completion_workers;
4074
4075 fs_info->scrub_nocow_workers =
4076 btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
4077 if (!fs_info->scrub_nocow_workers)
4078 goto fail_scrub_nocow_workers;
4079 fs_info->scrub_parity_workers =
4080 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
4081 max_active, 2);
4082 if (!fs_info->scrub_parity_workers)
4083 goto fail_scrub_parity_workers;
4084 }
4085 ++fs_info->scrub_workers_refcnt;
4086 return 0;
4087
4088fail_scrub_parity_workers:
4089 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
4090fail_scrub_nocow_workers:
4091 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
4092fail_scrub_wr_completion_workers:
4093 btrfs_destroy_workqueue(fs_info->scrub_workers);
4094fail_scrub_workers:
4095 return -ENOMEM;
4096}
4097
4098static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
4099{
4100 if (--fs_info->scrub_workers_refcnt == 0) {
4101 btrfs_destroy_workqueue(fs_info->scrub_workers);
4102 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
4103 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
4104 btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
4105 }
4106 WARN_ON(fs_info->scrub_workers_refcnt < 0);
4107}
4108
4109int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
4110 u64 end, struct btrfs_scrub_progress *progress,
4111 int readonly, int is_dev_replace)
4112{
4113 struct scrub_ctx *sctx;
4114 int ret;
4115 struct btrfs_device *dev;
4116 struct rcu_string *name;
4117
4118 if (btrfs_fs_closing(fs_info))
4119 return -EINVAL;
4120
4121 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
4122
4123
4124
4125
4126
4127 btrfs_err(fs_info,
4128 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
4129 fs_info->nodesize,
4130 BTRFS_STRIPE_LEN);
4131 return -EINVAL;
4132 }
4133
4134 if (fs_info->sectorsize != PAGE_SIZE) {
4135
4136 btrfs_err_rl(fs_info,
4137 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
4138 fs_info->sectorsize, PAGE_SIZE);
4139 return -EINVAL;
4140 }
4141
4142 if (fs_info->nodesize >
4143 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
4144 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
4145
4146
4147
4148
4149 btrfs_err(fs_info,
4150 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
4151 fs_info->nodesize,
4152 SCRUB_MAX_PAGES_PER_BLOCK,
4153 fs_info->sectorsize,
4154 SCRUB_MAX_PAGES_PER_BLOCK);
4155 return -EINVAL;
4156 }
4157
4158
4159 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4160 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
4161 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
4162 !is_dev_replace)) {
4163 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4164 return -ENODEV;
4165 }
4166
4167 if (!is_dev_replace && !readonly &&
4168 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
4169 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4170 rcu_read_lock();
4171 name = rcu_dereference(dev->name);
4172 btrfs_err(fs_info, "scrub: device %s is not writable",
4173 name->str);
4174 rcu_read_unlock();
4175 return -EROFS;
4176 }
4177
4178 mutex_lock(&fs_info->scrub_lock);
4179 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
4180 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
4181 mutex_unlock(&fs_info->scrub_lock);
4182 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4183 return -EIO;
4184 }
4185
4186 btrfs_dev_replace_read_lock(&fs_info->dev_replace);
4187 if (dev->scrub_ctx ||
4188 (!is_dev_replace &&
4189 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
4190 btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
4191 mutex_unlock(&fs_info->scrub_lock);
4192 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4193 return -EINPROGRESS;
4194 }
4195 btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
4196
4197 ret = scrub_workers_get(fs_info, is_dev_replace);
4198 if (ret) {
4199 mutex_unlock(&fs_info->scrub_lock);
4200 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4201 return ret;
4202 }
4203
4204 sctx = scrub_setup_ctx(dev, is_dev_replace);
4205 if (IS_ERR(sctx)) {
4206 mutex_unlock(&fs_info->scrub_lock);
4207 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4208 scrub_workers_put(fs_info);
4209 return PTR_ERR(sctx);
4210 }
4211 sctx->readonly = readonly;
4212 dev->scrub_ctx = sctx;
4213 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4214
4215
4216
4217
4218
4219 __scrub_blocked_if_needed(fs_info);
4220 atomic_inc(&fs_info->scrubs_running);
4221 mutex_unlock(&fs_info->scrub_lock);
4222
4223 if (!is_dev_replace) {
4224
4225
4226
4227
4228 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4229 ret = scrub_supers(sctx, dev);
4230 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4231 }
4232
4233 if (!ret)
4234 ret = scrub_enumerate_chunks(sctx, dev, start, end,
4235 is_dev_replace);
4236
4237 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
4238 atomic_dec(&fs_info->scrubs_running);
4239 wake_up(&fs_info->scrub_pause_wait);
4240
4241 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
4242
4243 if (progress)
4244 memcpy(progress, &sctx->stat, sizeof(*progress));
4245
4246 mutex_lock(&fs_info->scrub_lock);
4247 dev->scrub_ctx = NULL;
4248 scrub_workers_put(fs_info);
4249 mutex_unlock(&fs_info->scrub_lock);
4250
4251 scrub_put_ctx(sctx);
4252
4253 return ret;
4254}
4255
4256void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
4257{
4258 mutex_lock(&fs_info->scrub_lock);
4259 atomic_inc(&fs_info->scrub_pause_req);
4260 while (atomic_read(&fs_info->scrubs_paused) !=
4261 atomic_read(&fs_info->scrubs_running)) {
4262 mutex_unlock(&fs_info->scrub_lock);
4263 wait_event(fs_info->scrub_pause_wait,
4264 atomic_read(&fs_info->scrubs_paused) ==
4265 atomic_read(&fs_info->scrubs_running));
4266 mutex_lock(&fs_info->scrub_lock);
4267 }
4268 mutex_unlock(&fs_info->scrub_lock);
4269}
4270
4271void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
4272{
4273 atomic_dec(&fs_info->scrub_pause_req);
4274 wake_up(&fs_info->scrub_pause_wait);
4275}
4276
4277int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
4278{
4279 mutex_lock(&fs_info->scrub_lock);
4280 if (!atomic_read(&fs_info->scrubs_running)) {
4281 mutex_unlock(&fs_info->scrub_lock);
4282 return -ENOTCONN;
4283 }
4284
4285 atomic_inc(&fs_info->scrub_cancel_req);
4286 while (atomic_read(&fs_info->scrubs_running)) {
4287 mutex_unlock(&fs_info->scrub_lock);
4288 wait_event(fs_info->scrub_pause_wait,
4289 atomic_read(&fs_info->scrubs_running) == 0);
4290 mutex_lock(&fs_info->scrub_lock);
4291 }
4292 atomic_dec(&fs_info->scrub_cancel_req);
4293 mutex_unlock(&fs_info->scrub_lock);
4294
4295 return 0;
4296}
4297
4298int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
4299 struct btrfs_device *dev)
4300{
4301 struct scrub_ctx *sctx;
4302
4303 mutex_lock(&fs_info->scrub_lock);
4304 sctx = dev->scrub_ctx;
4305 if (!sctx) {
4306 mutex_unlock(&fs_info->scrub_lock);
4307 return -ENOTCONN;
4308 }
4309 atomic_inc(&sctx->cancel_req);
4310 while (dev->scrub_ctx) {
4311 mutex_unlock(&fs_info->scrub_lock);
4312 wait_event(fs_info->scrub_pause_wait,
4313 dev->scrub_ctx == NULL);
4314 mutex_lock(&fs_info->scrub_lock);
4315 }
4316 mutex_unlock(&fs_info->scrub_lock);
4317
4318 return 0;
4319}
4320
4321int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4322 struct btrfs_scrub_progress *progress)
4323{
4324 struct btrfs_device *dev;
4325 struct scrub_ctx *sctx = NULL;
4326
4327 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4328 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
4329 if (dev)
4330 sctx = dev->scrub_ctx;
4331 if (sctx)
4332 memcpy(progress, &sctx->stat, sizeof(*progress));
4333 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4334
4335 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4336}
4337
4338static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4339 u64 extent_logical, u64 extent_len,
4340 u64 *extent_physical,
4341 struct btrfs_device **extent_dev,
4342 int *extent_mirror_num)
4343{
4344 u64 mapped_length;
4345 struct btrfs_bio *bbio = NULL;
4346 int ret;
4347
4348 mapped_length = extent_len;
4349 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4350 &mapped_length, &bbio, 0);
4351 if (ret || !bbio || mapped_length < extent_len ||
4352 !bbio->stripes[0].dev->bdev) {
4353 btrfs_put_bbio(bbio);
4354 return;
4355 }
4356
4357 *extent_physical = bbio->stripes[0].physical;
4358 *extent_mirror_num = bbio->mirror_num;
4359 *extent_dev = bbio->stripes[0].dev;
4360 btrfs_put_bbio(bbio);
4361}
4362
4363static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
4364 int mirror_num, u64 physical_for_dev_replace)
4365{
4366 struct scrub_copy_nocow_ctx *nocow_ctx;
4367 struct btrfs_fs_info *fs_info = sctx->fs_info;
4368
4369 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
4370 if (!nocow_ctx) {
4371 spin_lock(&sctx->stat_lock);
4372 sctx->stat.malloc_errors++;
4373 spin_unlock(&sctx->stat_lock);
4374 return -ENOMEM;
4375 }
4376
4377 scrub_pending_trans_workers_inc(sctx);
4378
4379 nocow_ctx->sctx = sctx;
4380 nocow_ctx->logical = logical;
4381 nocow_ctx->len = len;
4382 nocow_ctx->mirror_num = mirror_num;
4383 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
4384 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
4385 copy_nocow_pages_worker, NULL, NULL);
4386 INIT_LIST_HEAD(&nocow_ctx->inodes);
4387 btrfs_queue_work(fs_info->scrub_nocow_workers,
4388 &nocow_ctx->work);
4389
4390 return 0;
4391}
4392
4393static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
4394{
4395 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
4396 struct scrub_nocow_inode *nocow_inode;
4397
4398 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
4399 if (!nocow_inode)
4400 return -ENOMEM;
4401 nocow_inode->inum = inum;
4402 nocow_inode->offset = offset;
4403 nocow_inode->root = root;
4404 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
4405 return 0;
4406}
4407
4408#define COPY_COMPLETE 1
4409
4410static void copy_nocow_pages_worker(struct btrfs_work *work)
4411{
4412 struct scrub_copy_nocow_ctx *nocow_ctx =
4413 container_of(work, struct scrub_copy_nocow_ctx, work);
4414 struct scrub_ctx *sctx = nocow_ctx->sctx;
4415 struct btrfs_fs_info *fs_info = sctx->fs_info;
4416 struct btrfs_root *root = fs_info->extent_root;
4417 u64 logical = nocow_ctx->logical;
4418 u64 len = nocow_ctx->len;
4419 int mirror_num = nocow_ctx->mirror_num;
4420 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4421 int ret;
4422 struct btrfs_trans_handle *trans = NULL;
4423 struct btrfs_path *path;
4424 int not_written = 0;
4425
4426 path = btrfs_alloc_path();
4427 if (!path) {
4428 spin_lock(&sctx->stat_lock);
4429 sctx->stat.malloc_errors++;
4430 spin_unlock(&sctx->stat_lock);
4431 not_written = 1;
4432 goto out;
4433 }
4434
4435 trans = btrfs_join_transaction(root);
4436 if (IS_ERR(trans)) {
4437 not_written = 1;
4438 goto out;
4439 }
4440
4441 ret = iterate_inodes_from_logical(logical, fs_info, path,
4442 record_inode_for_nocow, nocow_ctx, false);
4443 if (ret != 0 && ret != -ENOENT) {
4444 btrfs_warn(fs_info,
4445 "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
4446 logical, physical_for_dev_replace, len, mirror_num,
4447 ret);
4448 not_written = 1;
4449 goto out;
4450 }
4451
4452 btrfs_end_transaction(trans);
4453 trans = NULL;
4454 while (!list_empty(&nocow_ctx->inodes)) {
4455 struct scrub_nocow_inode *entry;
4456 entry = list_first_entry(&nocow_ctx->inodes,
4457 struct scrub_nocow_inode,
4458 list);
4459 list_del_init(&entry->list);
4460 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
4461 entry->root, nocow_ctx);
4462 kfree(entry);
4463 if (ret == COPY_COMPLETE) {
4464 ret = 0;
4465 break;
4466 } else if (ret) {
4467 break;
4468 }
4469 }
4470out:
4471 while (!list_empty(&nocow_ctx->inodes)) {
4472 struct scrub_nocow_inode *entry;
4473 entry = list_first_entry(&nocow_ctx->inodes,
4474 struct scrub_nocow_inode,
4475 list);
4476 list_del_init(&entry->list);
4477 kfree(entry);
4478 }
4479 if (trans && !IS_ERR(trans))
4480 btrfs_end_transaction(trans);
4481 if (not_written)
4482 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
4483 num_uncorrectable_read_errors);
4484
4485 btrfs_free_path(path);
4486 kfree(nocow_ctx);
4487
4488 scrub_pending_trans_workers_dec(sctx);
4489}
4490
4491static int check_extent_to_block(struct btrfs_inode *inode, u64 start, u64 len,
4492 u64 logical)
4493{
4494 struct extent_state *cached_state = NULL;
4495 struct btrfs_ordered_extent *ordered;
4496 struct extent_io_tree *io_tree;
4497 struct extent_map *em;
4498 u64 lockstart = start, lockend = start + len - 1;
4499 int ret = 0;
4500
4501 io_tree = &inode->io_tree;
4502
4503 lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
4504 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
4505 if (ordered) {
4506 btrfs_put_ordered_extent(ordered);
4507 ret = 1;
4508 goto out_unlock;
4509 }
4510
4511 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
4512 if (IS_ERR(em)) {
4513 ret = PTR_ERR(em);
4514 goto out_unlock;
4515 }
4516
4517
4518
4519
4520
4521 if (em->block_start > logical ||
4522 em->block_start + em->block_len < logical + len ||
4523 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4524 free_extent_map(em);
4525 ret = 1;
4526 goto out_unlock;
4527 }
4528 free_extent_map(em);
4529
4530out_unlock:
4531 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state);
4532 return ret;
4533}
4534
4535static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
4536 struct scrub_copy_nocow_ctx *nocow_ctx)
4537{
4538 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->fs_info;
4539 struct btrfs_key key;
4540 struct inode *inode;
4541 struct page *page;
4542 struct btrfs_root *local_root;
4543 struct extent_io_tree *io_tree;
4544 u64 physical_for_dev_replace;
4545 u64 nocow_ctx_logical;
4546 u64 len = nocow_ctx->len;
4547 unsigned long index;
4548 int srcu_index;
4549 int ret = 0;
4550 int err = 0;
4551
4552 key.objectid = root;
4553 key.type = BTRFS_ROOT_ITEM_KEY;
4554 key.offset = (u64)-1;
4555
4556 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
4557
4558 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
4559 if (IS_ERR(local_root)) {
4560 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4561 return PTR_ERR(local_root);
4562 }
4563
4564 key.type = BTRFS_INODE_ITEM_KEY;
4565 key.objectid = inum;
4566 key.offset = 0;
4567 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
4568 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4569 if (IS_ERR(inode))
4570 return PTR_ERR(inode);
4571
4572
4573 inode_lock(inode);
4574 inode_dio_wait(inode);
4575
4576 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4577 io_tree = &BTRFS_I(inode)->io_tree;
4578 nocow_ctx_logical = nocow_ctx->logical;
4579
4580 ret = check_extent_to_block(BTRFS_I(inode), offset, len,
4581 nocow_ctx_logical);
4582 if (ret) {
4583 ret = ret > 0 ? 0 : ret;
4584 goto out;
4585 }
4586
4587 while (len >= PAGE_SIZE) {
4588 index = offset >> PAGE_SHIFT;
4589again:
4590 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4591 if (!page) {
4592 btrfs_err(fs_info, "find_or_create_page() failed");
4593 ret = -ENOMEM;
4594 goto out;
4595 }
4596
4597 if (PageUptodate(page)) {
4598 if (PageDirty(page))
4599 goto next_page;
4600 } else {
4601 ClearPageError(page);
4602 err = extent_read_full_page(io_tree, page,
4603 btrfs_get_extent,
4604 nocow_ctx->mirror_num);
4605 if (err) {
4606 ret = err;
4607 goto next_page;
4608 }
4609
4610 lock_page(page);
4611
4612
4613
4614
4615
4616
4617 if (page->mapping != inode->i_mapping) {
4618 unlock_page(page);
4619 put_page(page);
4620 goto again;
4621 }
4622 if (!PageUptodate(page)) {
4623 ret = -EIO;
4624 goto next_page;
4625 }
4626 }
4627
4628 ret = check_extent_to_block(BTRFS_I(inode), offset, len,
4629 nocow_ctx_logical);
4630 if (ret) {
4631 ret = ret > 0 ? 0 : ret;
4632 goto next_page;
4633 }
4634
4635 err = write_page_nocow(nocow_ctx->sctx,
4636 physical_for_dev_replace, page);
4637 if (err)
4638 ret = err;
4639next_page:
4640 unlock_page(page);
4641 put_page(page);
4642
4643 if (ret)
4644 break;
4645
4646 offset += PAGE_SIZE;
4647 physical_for_dev_replace += PAGE_SIZE;
4648 nocow_ctx_logical += PAGE_SIZE;
4649 len -= PAGE_SIZE;
4650 }
4651 ret = COPY_COMPLETE;
4652out:
4653 inode_unlock(inode);
4654 iput(inode);
4655 return ret;
4656}
4657
4658static int write_page_nocow(struct scrub_ctx *sctx,
4659 u64 physical_for_dev_replace, struct page *page)
4660{
4661 struct bio *bio;
4662 struct btrfs_device *dev;
4663
4664 dev = sctx->wr_tgtdev;
4665 if (!dev)
4666 return -EIO;
4667 if (!dev->bdev) {
4668 btrfs_warn_rl(dev->fs_info,
4669 "scrub write_page_nocow(bdev == NULL) is unexpected");
4670 return -EIO;
4671 }
4672 bio = btrfs_io_bio_alloc(1);
4673 bio->bi_iter.bi_size = 0;
4674 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
4675 bio_set_dev(bio, dev->bdev);
4676 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
4677
4678 bio_add_page(bio, page, PAGE_SIZE, 0);
4679
4680 if (btrfsic_submit_bio_wait(bio)) {
4681 bio_put(bio);
4682 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
4683 return -EIO;
4684 }
4685
4686 bio_put(bio);
4687 return 0;
4688}
4689