1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "discard.h"
12#include "volumes.h"
13#include "disk-io.h"
14#include "ordered-data.h"
15#include "transaction.h"
16#include "backref.h"
17#include "extent_io.h"
18#include "dev-replace.h"
19#include "check-integrity.h"
20#include "rcu-string.h"
21#include "raid56.h"
22#include "block-group.h"
23#include "zoned.h"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38struct scrub_block;
39struct scrub_ctx;
40
41
42
43
44
45
46
47#define SCRUB_PAGES_PER_RD_BIO 32
48#define SCRUB_PAGES_PER_WR_BIO 32
49#define SCRUB_BIOS_PER_SCTX 64
50
51
52
53
54
55
56#define SCRUB_MAX_PAGES_PER_BLOCK 16
57
58struct scrub_recover {
59 refcount_t refs;
60 struct btrfs_bio *bbio;
61 u64 map_length;
62};
63
64struct scrub_page {
65 struct scrub_block *sblock;
66 struct page *page;
67 struct btrfs_device *dev;
68 struct list_head list;
69 u64 flags;
70 u64 generation;
71 u64 logical;
72 u64 physical;
73 u64 physical_for_dev_replace;
74 atomic_t refs;
75 u8 mirror_num;
76 int have_csum:1;
77 int io_error:1;
78 u8 csum[BTRFS_CSUM_SIZE];
79
80 struct scrub_recover *recover;
81};
82
83struct scrub_bio {
84 int index;
85 struct scrub_ctx *sctx;
86 struct btrfs_device *dev;
87 struct bio *bio;
88 blk_status_t status;
89 u64 logical;
90 u64 physical;
91#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
92 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
93#else
94 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
95#endif
96 int page_count;
97 int next_free;
98 struct btrfs_work work;
99};
100
101struct scrub_block {
102 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
103 int page_count;
104 atomic_t outstanding_pages;
105 refcount_t refs;
106 struct scrub_ctx *sctx;
107 struct scrub_parity *sparity;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113
114
115
116 unsigned int data_corrected:1;
117 };
118 struct btrfs_work work;
119};
120
121
122struct scrub_parity {
123 struct scrub_ctx *sctx;
124
125 struct btrfs_device *scrub_dev;
126
127 u64 logic_start;
128
129 u64 logic_end;
130
131 int nsectors;
132
133 u32 stripe_len;
134
135 refcount_t refs;
136
137 struct list_head spages;
138
139
140 struct btrfs_work work;
141
142
143 unsigned long *dbitmap;
144
145
146
147
148
149 unsigned long *ebitmap;
150
151 unsigned long bitmap[];
152};
153
154struct scrub_ctx {
155 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
156 struct btrfs_fs_info *fs_info;
157 int first_free;
158 int curr;
159 atomic_t bios_in_flight;
160 atomic_t workers_pending;
161 spinlock_t list_lock;
162 wait_queue_head_t list_wait;
163 struct list_head csum_list;
164 atomic_t cancel_req;
165 int readonly;
166 int pages_per_rd_bio;
167
168
169 ktime_t throttle_deadline;
170 u64 throttle_sent;
171
172 int is_dev_replace;
173 u64 write_pointer;
174
175 struct scrub_bio *wr_curr_bio;
176 struct mutex wr_lock;
177 int pages_per_wr_bio;
178 struct btrfs_device *wr_tgtdev;
179 bool flush_all_writes;
180
181
182
183
184 struct btrfs_scrub_progress stat;
185 spinlock_t stat_lock;
186
187
188
189
190
191
192
193
194 refcount_t refs;
195};
196
197struct scrub_warning {
198 struct btrfs_path *path;
199 u64 extent_item_size;
200 const char *errstr;
201 u64 physical;
202 u64 logical;
203 struct btrfs_device *dev;
204};
205
206struct full_stripe_lock {
207 struct rb_node node;
208 u64 logical;
209 u64 refs;
210 struct mutex mutex;
211};
212
213static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
214 struct scrub_block *sblocks_for_recheck);
215static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
216 struct scrub_block *sblock,
217 int retry_failed_mirror);
218static void scrub_recheck_block_checksum(struct scrub_block *sblock);
219static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
220 struct scrub_block *sblock_good);
221static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
222 struct scrub_block *sblock_good,
223 int page_num, int force_write);
224static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
225static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
226 int page_num);
227static int scrub_checksum_data(struct scrub_block *sblock);
228static int scrub_checksum_tree_block(struct scrub_block *sblock);
229static int scrub_checksum_super(struct scrub_block *sblock);
230static void scrub_block_put(struct scrub_block *sblock);
231static void scrub_page_get(struct scrub_page *spage);
232static void scrub_page_put(struct scrub_page *spage);
233static void scrub_parity_get(struct scrub_parity *sparity);
234static void scrub_parity_put(struct scrub_parity *sparity);
235static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
236 u64 physical, struct btrfs_device *dev, u64 flags,
237 u64 gen, int mirror_num, u8 *csum,
238 u64 physical_for_dev_replace);
239static void scrub_bio_end_io(struct bio *bio);
240static void scrub_bio_end_io_worker(struct btrfs_work *work);
241static void scrub_block_complete(struct scrub_block *sblock);
242static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
243 u64 extent_logical, u32 extent_len,
244 u64 *extent_physical,
245 struct btrfs_device **extent_dev,
246 int *extent_mirror_num);
247static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
248 struct scrub_page *spage);
249static void scrub_wr_submit(struct scrub_ctx *sctx);
250static void scrub_wr_bio_end_io(struct bio *bio);
251static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
252static void scrub_put_ctx(struct scrub_ctx *sctx);
253
254static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
255{
256 return spage->recover &&
257 (spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
258}
259
260static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
261{
262 refcount_inc(&sctx->refs);
263 atomic_inc(&sctx->bios_in_flight);
264}
265
266static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
267{
268 atomic_dec(&sctx->bios_in_flight);
269 wake_up(&sctx->list_wait);
270 scrub_put_ctx(sctx);
271}
272
273static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
274{
275 while (atomic_read(&fs_info->scrub_pause_req)) {
276 mutex_unlock(&fs_info->scrub_lock);
277 wait_event(fs_info->scrub_pause_wait,
278 atomic_read(&fs_info->scrub_pause_req) == 0);
279 mutex_lock(&fs_info->scrub_lock);
280 }
281}
282
283static void scrub_pause_on(struct btrfs_fs_info *fs_info)
284{
285 atomic_inc(&fs_info->scrubs_paused);
286 wake_up(&fs_info->scrub_pause_wait);
287}
288
289static void scrub_pause_off(struct btrfs_fs_info *fs_info)
290{
291 mutex_lock(&fs_info->scrub_lock);
292 __scrub_blocked_if_needed(fs_info);
293 atomic_dec(&fs_info->scrubs_paused);
294 mutex_unlock(&fs_info->scrub_lock);
295
296 wake_up(&fs_info->scrub_pause_wait);
297}
298
299static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
300{
301 scrub_pause_on(fs_info);
302 scrub_pause_off(fs_info);
303}
304
305
306
307
308
309
310
311
312
313
314
315static struct full_stripe_lock *insert_full_stripe_lock(
316 struct btrfs_full_stripe_locks_tree *locks_root,
317 u64 fstripe_logical)
318{
319 struct rb_node **p;
320 struct rb_node *parent = NULL;
321 struct full_stripe_lock *entry;
322 struct full_stripe_lock *ret;
323
324 lockdep_assert_held(&locks_root->lock);
325
326 p = &locks_root->root.rb_node;
327 while (*p) {
328 parent = *p;
329 entry = rb_entry(parent, struct full_stripe_lock, node);
330 if (fstripe_logical < entry->logical) {
331 p = &(*p)->rb_left;
332 } else if (fstripe_logical > entry->logical) {
333 p = &(*p)->rb_right;
334 } else {
335 entry->refs++;
336 return entry;
337 }
338 }
339
340
341
342
343 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
344 if (!ret)
345 return ERR_PTR(-ENOMEM);
346 ret->logical = fstripe_logical;
347 ret->refs = 1;
348 mutex_init(&ret->mutex);
349
350 rb_link_node(&ret->node, parent, p);
351 rb_insert_color(&ret->node, &locks_root->root);
352 return ret;
353}
354
355
356
357
358
359
360
361static struct full_stripe_lock *search_full_stripe_lock(
362 struct btrfs_full_stripe_locks_tree *locks_root,
363 u64 fstripe_logical)
364{
365 struct rb_node *node;
366 struct full_stripe_lock *entry;
367
368 lockdep_assert_held(&locks_root->lock);
369
370 node = locks_root->root.rb_node;
371 while (node) {
372 entry = rb_entry(node, struct full_stripe_lock, node);
373 if (fstripe_logical < entry->logical)
374 node = node->rb_left;
375 else if (fstripe_logical > entry->logical)
376 node = node->rb_right;
377 else
378 return entry;
379 }
380 return NULL;
381}
382
383
384
385
386
387
388static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
389{
390 u64 ret;
391
392
393
394
395
396 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
397
398
399
400
401
402 ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
403 cache->full_stripe_len + cache->start;
404 return ret;
405}
406
407
408
409
410
411
412
413
414
415
416
417
418static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
419 bool *locked_ret)
420{
421 struct btrfs_block_group *bg_cache;
422 struct btrfs_full_stripe_locks_tree *locks_root;
423 struct full_stripe_lock *existing;
424 u64 fstripe_start;
425 int ret = 0;
426
427 *locked_ret = false;
428 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
429 if (!bg_cache) {
430 ASSERT(0);
431 return -ENOENT;
432 }
433
434
435 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
436 goto out;
437 locks_root = &bg_cache->full_stripe_locks_root;
438
439 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
440
441
442 mutex_lock(&locks_root->lock);
443 existing = insert_full_stripe_lock(locks_root, fstripe_start);
444 mutex_unlock(&locks_root->lock);
445 if (IS_ERR(existing)) {
446 ret = PTR_ERR(existing);
447 goto out;
448 }
449 mutex_lock(&existing->mutex);
450 *locked_ret = true;
451out:
452 btrfs_put_block_group(bg_cache);
453 return ret;
454}
455
456
457
458
459
460
461
462
463
464
465static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
466 bool locked)
467{
468 struct btrfs_block_group *bg_cache;
469 struct btrfs_full_stripe_locks_tree *locks_root;
470 struct full_stripe_lock *fstripe_lock;
471 u64 fstripe_start;
472 bool freeit = false;
473 int ret = 0;
474
475
476 if (!locked)
477 return 0;
478
479 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
480 if (!bg_cache) {
481 ASSERT(0);
482 return -ENOENT;
483 }
484 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
485 goto out;
486
487 locks_root = &bg_cache->full_stripe_locks_root;
488 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
489
490 mutex_lock(&locks_root->lock);
491 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
492
493 if (!fstripe_lock) {
494 WARN_ON(1);
495 ret = -ENOENT;
496 mutex_unlock(&locks_root->lock);
497 goto out;
498 }
499
500 if (fstripe_lock->refs == 0) {
501 WARN_ON(1);
502 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
503 fstripe_lock->logical);
504 } else {
505 fstripe_lock->refs--;
506 }
507
508 if (fstripe_lock->refs == 0) {
509 rb_erase(&fstripe_lock->node, &locks_root->root);
510 freeit = true;
511 }
512 mutex_unlock(&locks_root->lock);
513
514 mutex_unlock(&fstripe_lock->mutex);
515 if (freeit)
516 kfree(fstripe_lock);
517out:
518 btrfs_put_block_group(bg_cache);
519 return ret;
520}
521
522static void scrub_free_csums(struct scrub_ctx *sctx)
523{
524 while (!list_empty(&sctx->csum_list)) {
525 struct btrfs_ordered_sum *sum;
526 sum = list_first_entry(&sctx->csum_list,
527 struct btrfs_ordered_sum, list);
528 list_del(&sum->list);
529 kfree(sum);
530 }
531}
532
533static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
534{
535 int i;
536
537 if (!sctx)
538 return;
539
540
541 if (sctx->curr != -1) {
542 struct scrub_bio *sbio = sctx->bios[sctx->curr];
543
544 for (i = 0; i < sbio->page_count; i++) {
545 WARN_ON(!sbio->pagev[i]->page);
546 scrub_block_put(sbio->pagev[i]->sblock);
547 }
548 bio_put(sbio->bio);
549 }
550
551 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
552 struct scrub_bio *sbio = sctx->bios[i];
553
554 if (!sbio)
555 break;
556 kfree(sbio);
557 }
558
559 kfree(sctx->wr_curr_bio);
560 scrub_free_csums(sctx);
561 kfree(sctx);
562}
563
564static void scrub_put_ctx(struct scrub_ctx *sctx)
565{
566 if (refcount_dec_and_test(&sctx->refs))
567 scrub_free_ctx(sctx);
568}
569
570static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
571 struct btrfs_fs_info *fs_info, int is_dev_replace)
572{
573 struct scrub_ctx *sctx;
574 int i;
575
576 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
577 if (!sctx)
578 goto nomem;
579 refcount_set(&sctx->refs, 1);
580 sctx->is_dev_replace = is_dev_replace;
581 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
582 sctx->curr = -1;
583 sctx->fs_info = fs_info;
584 INIT_LIST_HEAD(&sctx->csum_list);
585 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
586 struct scrub_bio *sbio;
587
588 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
589 if (!sbio)
590 goto nomem;
591 sctx->bios[i] = sbio;
592
593 sbio->index = i;
594 sbio->sctx = sctx;
595 sbio->page_count = 0;
596 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
597 NULL);
598
599 if (i != SCRUB_BIOS_PER_SCTX - 1)
600 sctx->bios[i]->next_free = i + 1;
601 else
602 sctx->bios[i]->next_free = -1;
603 }
604 sctx->first_free = 0;
605 atomic_set(&sctx->bios_in_flight, 0);
606 atomic_set(&sctx->workers_pending, 0);
607 atomic_set(&sctx->cancel_req, 0);
608
609 spin_lock_init(&sctx->list_lock);
610 spin_lock_init(&sctx->stat_lock);
611 init_waitqueue_head(&sctx->list_wait);
612 sctx->throttle_deadline = 0;
613
614 WARN_ON(sctx->wr_curr_bio != NULL);
615 mutex_init(&sctx->wr_lock);
616 sctx->wr_curr_bio = NULL;
617 if (is_dev_replace) {
618 WARN_ON(!fs_info->dev_replace.tgtdev);
619 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
620 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
621 sctx->flush_all_writes = false;
622 }
623
624 return sctx;
625
626nomem:
627 scrub_free_ctx(sctx);
628 return ERR_PTR(-ENOMEM);
629}
630
631static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
632 void *warn_ctx)
633{
634 u32 nlink;
635 int ret;
636 int i;
637 unsigned nofs_flag;
638 struct extent_buffer *eb;
639 struct btrfs_inode_item *inode_item;
640 struct scrub_warning *swarn = warn_ctx;
641 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
642 struct inode_fs_paths *ipath = NULL;
643 struct btrfs_root *local_root;
644 struct btrfs_key key;
645
646 local_root = btrfs_get_fs_root(fs_info, root, true);
647 if (IS_ERR(local_root)) {
648 ret = PTR_ERR(local_root);
649 goto err;
650 }
651
652
653
654
655 key.objectid = inum;
656 key.type = BTRFS_INODE_ITEM_KEY;
657 key.offset = 0;
658
659 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
660 if (ret) {
661 btrfs_put_root(local_root);
662 btrfs_release_path(swarn->path);
663 goto err;
664 }
665
666 eb = swarn->path->nodes[0];
667 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
668 struct btrfs_inode_item);
669 nlink = btrfs_inode_nlink(eb, inode_item);
670 btrfs_release_path(swarn->path);
671
672
673
674
675
676
677 nofs_flag = memalloc_nofs_save();
678 ipath = init_ipath(4096, local_root, swarn->path);
679 memalloc_nofs_restore(nofs_flag);
680 if (IS_ERR(ipath)) {
681 btrfs_put_root(local_root);
682 ret = PTR_ERR(ipath);
683 ipath = NULL;
684 goto err;
685 }
686 ret = paths_from_inode(inum, ipath);
687
688 if (ret < 0)
689 goto err;
690
691
692
693
694
695 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
696 btrfs_warn_in_rcu(fs_info,
697"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
698 swarn->errstr, swarn->logical,
699 rcu_str_deref(swarn->dev->name),
700 swarn->physical,
701 root, inum, offset,
702 fs_info->sectorsize, nlink,
703 (char *)(unsigned long)ipath->fspath->val[i]);
704
705 btrfs_put_root(local_root);
706 free_ipath(ipath);
707 return 0;
708
709err:
710 btrfs_warn_in_rcu(fs_info,
711 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
712 swarn->errstr, swarn->logical,
713 rcu_str_deref(swarn->dev->name),
714 swarn->physical,
715 root, inum, offset, ret);
716
717 free_ipath(ipath);
718 return 0;
719}
720
721static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
722{
723 struct btrfs_device *dev;
724 struct btrfs_fs_info *fs_info;
725 struct btrfs_path *path;
726 struct btrfs_key found_key;
727 struct extent_buffer *eb;
728 struct btrfs_extent_item *ei;
729 struct scrub_warning swarn;
730 unsigned long ptr = 0;
731 u64 extent_item_pos;
732 u64 flags = 0;
733 u64 ref_root;
734 u32 item_size;
735 u8 ref_level = 0;
736 int ret;
737
738 WARN_ON(sblock->page_count < 1);
739 dev = sblock->pagev[0]->dev;
740 fs_info = sblock->sctx->fs_info;
741
742 path = btrfs_alloc_path();
743 if (!path)
744 return;
745
746 swarn.physical = sblock->pagev[0]->physical;
747 swarn.logical = sblock->pagev[0]->logical;
748 swarn.errstr = errstr;
749 swarn.dev = NULL;
750
751 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
752 &flags);
753 if (ret < 0)
754 goto out;
755
756 extent_item_pos = swarn.logical - found_key.objectid;
757 swarn.extent_item_size = found_key.offset;
758
759 eb = path->nodes[0];
760 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
761 item_size = btrfs_item_size_nr(eb, path->slots[0]);
762
763 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
764 do {
765 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
766 item_size, &ref_root,
767 &ref_level);
768 btrfs_warn_in_rcu(fs_info,
769"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
770 errstr, swarn.logical,
771 rcu_str_deref(dev->name),
772 swarn.physical,
773 ref_level ? "node" : "leaf",
774 ret < 0 ? -1 : ref_level,
775 ret < 0 ? -1 : ref_root);
776 } while (ret != 1);
777 btrfs_release_path(path);
778 } else {
779 btrfs_release_path(path);
780 swarn.path = path;
781 swarn.dev = dev;
782 iterate_extent_inodes(fs_info, found_key.objectid,
783 extent_item_pos, 1,
784 scrub_print_warning_inode, &swarn, false);
785 }
786
787out:
788 btrfs_free_path(path);
789}
790
791static inline void scrub_get_recover(struct scrub_recover *recover)
792{
793 refcount_inc(&recover->refs);
794}
795
796static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
797 struct scrub_recover *recover)
798{
799 if (refcount_dec_and_test(&recover->refs)) {
800 btrfs_bio_counter_dec(fs_info);
801 btrfs_put_bbio(recover->bbio);
802 kfree(recover);
803 }
804}
805
806
807
808
809
810
811
812
813
814static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
815{
816 struct scrub_ctx *sctx = sblock_to_check->sctx;
817 struct btrfs_device *dev;
818 struct btrfs_fs_info *fs_info;
819 u64 logical;
820 unsigned int failed_mirror_index;
821 unsigned int is_metadata;
822 unsigned int have_csum;
823 struct scrub_block *sblocks_for_recheck;
824 struct scrub_block *sblock_bad;
825 int ret;
826 int mirror_index;
827 int page_num;
828 int success;
829 bool full_stripe_locked;
830 unsigned int nofs_flag;
831 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
832 DEFAULT_RATELIMIT_BURST);
833
834 BUG_ON(sblock_to_check->page_count < 1);
835 fs_info = sctx->fs_info;
836 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
837
838
839
840
841
842 spin_lock(&sctx->stat_lock);
843 ++sctx->stat.super_errors;
844 spin_unlock(&sctx->stat_lock);
845 return 0;
846 }
847 logical = sblock_to_check->pagev[0]->logical;
848 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
849 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
850 is_metadata = !(sblock_to_check->pagev[0]->flags &
851 BTRFS_EXTENT_FLAG_DATA);
852 have_csum = sblock_to_check->pagev[0]->have_csum;
853 dev = sblock_to_check->pagev[0]->dev;
854
855 if (btrfs_is_zoned(fs_info) && !sctx->is_dev_replace)
856 return btrfs_repair_one_zone(fs_info, logical);
857
858
859
860
861
862
863
864
865
866
867 nofs_flag = memalloc_nofs_save();
868
869
870
871
872
873
874
875 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
876 if (ret < 0) {
877 memalloc_nofs_restore(nofs_flag);
878 spin_lock(&sctx->stat_lock);
879 if (ret == -ENOMEM)
880 sctx->stat.malloc_errors++;
881 sctx->stat.read_errors++;
882 sctx->stat.uncorrectable_errors++;
883 spin_unlock(&sctx->stat_lock);
884 return ret;
885 }
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
917 sizeof(*sblocks_for_recheck), GFP_KERNEL);
918 if (!sblocks_for_recheck) {
919 spin_lock(&sctx->stat_lock);
920 sctx->stat.malloc_errors++;
921 sctx->stat.read_errors++;
922 sctx->stat.uncorrectable_errors++;
923 spin_unlock(&sctx->stat_lock);
924 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
925 goto out;
926 }
927
928
929 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
930 if (ret) {
931 spin_lock(&sctx->stat_lock);
932 sctx->stat.read_errors++;
933 sctx->stat.uncorrectable_errors++;
934 spin_unlock(&sctx->stat_lock);
935 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
936 goto out;
937 }
938 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
939 sblock_bad = sblocks_for_recheck + failed_mirror_index;
940
941
942 scrub_recheck_block(fs_info, sblock_bad, 1);
943
944 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
945 sblock_bad->no_io_error_seen) {
946
947
948
949
950
951
952
953
954 spin_lock(&sctx->stat_lock);
955 sctx->stat.unverified_errors++;
956 sblock_to_check->data_corrected = 1;
957 spin_unlock(&sctx->stat_lock);
958
959 if (sctx->is_dev_replace)
960 scrub_write_block_to_dev_replace(sblock_bad);
961 goto out;
962 }
963
964 if (!sblock_bad->no_io_error_seen) {
965 spin_lock(&sctx->stat_lock);
966 sctx->stat.read_errors++;
967 spin_unlock(&sctx->stat_lock);
968 if (__ratelimit(&rs))
969 scrub_print_warning("i/o error", sblock_to_check);
970 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
971 } else if (sblock_bad->checksum_error) {
972 spin_lock(&sctx->stat_lock);
973 sctx->stat.csum_errors++;
974 spin_unlock(&sctx->stat_lock);
975 if (__ratelimit(&rs))
976 scrub_print_warning("checksum error", sblock_to_check);
977 btrfs_dev_stat_inc_and_print(dev,
978 BTRFS_DEV_STAT_CORRUPTION_ERRS);
979 } else if (sblock_bad->header_error) {
980 spin_lock(&sctx->stat_lock);
981 sctx->stat.verify_errors++;
982 spin_unlock(&sctx->stat_lock);
983 if (__ratelimit(&rs))
984 scrub_print_warning("checksum/header error",
985 sblock_to_check);
986 if (sblock_bad->generation_error)
987 btrfs_dev_stat_inc_and_print(dev,
988 BTRFS_DEV_STAT_GENERATION_ERRS);
989 else
990 btrfs_dev_stat_inc_and_print(dev,
991 BTRFS_DEV_STAT_CORRUPTION_ERRS);
992 }
993
994 if (sctx->readonly) {
995 ASSERT(!sctx->is_dev_replace);
996 goto out;
997 }
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014 for (mirror_index = 0; ;mirror_index++) {
1015 struct scrub_block *sblock_other;
1016
1017 if (mirror_index == failed_mirror_index)
1018 continue;
1019
1020
1021 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1022 if (mirror_index >= BTRFS_MAX_MIRRORS)
1023 break;
1024 if (!sblocks_for_recheck[mirror_index].page_count)
1025 break;
1026
1027 sblock_other = sblocks_for_recheck + mirror_index;
1028 } else {
1029 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1030 int max_allowed = r->bbio->num_stripes -
1031 r->bbio->num_tgtdevs;
1032
1033 if (mirror_index >= max_allowed)
1034 break;
1035 if (!sblocks_for_recheck[1].page_count)
1036 break;
1037
1038 ASSERT(failed_mirror_index == 0);
1039 sblock_other = sblocks_for_recheck + 1;
1040 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1041 }
1042
1043
1044 scrub_recheck_block(fs_info, sblock_other, 0);
1045
1046 if (!sblock_other->header_error &&
1047 !sblock_other->checksum_error &&
1048 sblock_other->no_io_error_seen) {
1049 if (sctx->is_dev_replace) {
1050 scrub_write_block_to_dev_replace(sblock_other);
1051 goto corrected_error;
1052 } else {
1053 ret = scrub_repair_block_from_good_copy(
1054 sblock_bad, sblock_other);
1055 if (!ret)
1056 goto corrected_error;
1057 }
1058 }
1059 }
1060
1061 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1062 goto did_not_correct_error;
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088 success = 1;
1089 for (page_num = 0; page_num < sblock_bad->page_count;
1090 page_num++) {
1091 struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
1092 struct scrub_block *sblock_other = NULL;
1093
1094
1095 if (!spage_bad->io_error && !sctx->is_dev_replace)
1096 continue;
1097
1098 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1099
1100
1101
1102
1103
1104
1105
1106 sblock_other = NULL;
1107 } else if (spage_bad->io_error) {
1108
1109 for (mirror_index = 0;
1110 mirror_index < BTRFS_MAX_MIRRORS &&
1111 sblocks_for_recheck[mirror_index].page_count > 0;
1112 mirror_index++) {
1113 if (!sblocks_for_recheck[mirror_index].
1114 pagev[page_num]->io_error) {
1115 sblock_other = sblocks_for_recheck +
1116 mirror_index;
1117 break;
1118 }
1119 }
1120 if (!sblock_other)
1121 success = 0;
1122 }
1123
1124 if (sctx->is_dev_replace) {
1125
1126
1127
1128
1129
1130
1131
1132 if (!sblock_other)
1133 sblock_other = sblock_bad;
1134
1135 if (scrub_write_page_to_dev_replace(sblock_other,
1136 page_num) != 0) {
1137 atomic64_inc(
1138 &fs_info->dev_replace.num_write_errors);
1139 success = 0;
1140 }
1141 } else if (sblock_other) {
1142 ret = scrub_repair_page_from_good_copy(sblock_bad,
1143 sblock_other,
1144 page_num, 0);
1145 if (0 == ret)
1146 spage_bad->io_error = 0;
1147 else
1148 success = 0;
1149 }
1150 }
1151
1152 if (success && !sctx->is_dev_replace) {
1153 if (is_metadata || have_csum) {
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163 scrub_recheck_block(fs_info, sblock_bad, 1);
1164 if (!sblock_bad->header_error &&
1165 !sblock_bad->checksum_error &&
1166 sblock_bad->no_io_error_seen)
1167 goto corrected_error;
1168 else
1169 goto did_not_correct_error;
1170 } else {
1171corrected_error:
1172 spin_lock(&sctx->stat_lock);
1173 sctx->stat.corrected_errors++;
1174 sblock_to_check->data_corrected = 1;
1175 spin_unlock(&sctx->stat_lock);
1176 btrfs_err_rl_in_rcu(fs_info,
1177 "fixed up error at logical %llu on dev %s",
1178 logical, rcu_str_deref(dev->name));
1179 }
1180 } else {
1181did_not_correct_error:
1182 spin_lock(&sctx->stat_lock);
1183 sctx->stat.uncorrectable_errors++;
1184 spin_unlock(&sctx->stat_lock);
1185 btrfs_err_rl_in_rcu(fs_info,
1186 "unable to fixup (regular) error at logical %llu on dev %s",
1187 logical, rcu_str_deref(dev->name));
1188 }
1189
1190out:
1191 if (sblocks_for_recheck) {
1192 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1193 mirror_index++) {
1194 struct scrub_block *sblock = sblocks_for_recheck +
1195 mirror_index;
1196 struct scrub_recover *recover;
1197 int page_index;
1198
1199 for (page_index = 0; page_index < sblock->page_count;
1200 page_index++) {
1201 sblock->pagev[page_index]->sblock = NULL;
1202 recover = sblock->pagev[page_index]->recover;
1203 if (recover) {
1204 scrub_put_recover(fs_info, recover);
1205 sblock->pagev[page_index]->recover =
1206 NULL;
1207 }
1208 scrub_page_put(sblock->pagev[page_index]);
1209 }
1210 }
1211 kfree(sblocks_for_recheck);
1212 }
1213
1214 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1215 memalloc_nofs_restore(nofs_flag);
1216 if (ret < 0)
1217 return ret;
1218 return 0;
1219}
1220
1221static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1222{
1223 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1224 return 2;
1225 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1226 return 3;
1227 else
1228 return (int)bbio->num_stripes;
1229}
1230
1231static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1232 u64 *raid_map,
1233 u64 mapped_length,
1234 int nstripes, int mirror,
1235 int *stripe_index,
1236 u64 *stripe_offset)
1237{
1238 int i;
1239
1240 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1241
1242 for (i = 0; i < nstripes; i++) {
1243 if (raid_map[i] == RAID6_Q_STRIPE ||
1244 raid_map[i] == RAID5_P_STRIPE)
1245 continue;
1246
1247 if (logical >= raid_map[i] &&
1248 logical < raid_map[i] + mapped_length)
1249 break;
1250 }
1251
1252 *stripe_index = i;
1253 *stripe_offset = logical - raid_map[i];
1254 } else {
1255
1256 *stripe_index = mirror;
1257 *stripe_offset = 0;
1258 }
1259}
1260
1261static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1262 struct scrub_block *sblocks_for_recheck)
1263{
1264 struct scrub_ctx *sctx = original_sblock->sctx;
1265 struct btrfs_fs_info *fs_info = sctx->fs_info;
1266 u64 length = original_sblock->page_count * fs_info->sectorsize;
1267 u64 logical = original_sblock->pagev[0]->logical;
1268 u64 generation = original_sblock->pagev[0]->generation;
1269 u64 flags = original_sblock->pagev[0]->flags;
1270 u64 have_csum = original_sblock->pagev[0]->have_csum;
1271 struct scrub_recover *recover;
1272 struct btrfs_bio *bbio;
1273 u64 sublen;
1274 u64 mapped_length;
1275 u64 stripe_offset;
1276 int stripe_index;
1277 int page_index = 0;
1278 int mirror_index;
1279 int nmirrors;
1280 int ret;
1281
1282
1283
1284
1285
1286
1287
1288 while (length > 0) {
1289 sublen = min_t(u64, length, fs_info->sectorsize);
1290 mapped_length = sublen;
1291 bbio = NULL;
1292
1293
1294
1295
1296
1297 btrfs_bio_counter_inc_blocked(fs_info);
1298 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1299 logical, &mapped_length, &bbio);
1300 if (ret || !bbio || mapped_length < sublen) {
1301 btrfs_put_bbio(bbio);
1302 btrfs_bio_counter_dec(fs_info);
1303 return -EIO;
1304 }
1305
1306 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1307 if (!recover) {
1308 btrfs_put_bbio(bbio);
1309 btrfs_bio_counter_dec(fs_info);
1310 return -ENOMEM;
1311 }
1312
1313 refcount_set(&recover->refs, 1);
1314 recover->bbio = bbio;
1315 recover->map_length = mapped_length;
1316
1317 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1318
1319 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1320
1321 for (mirror_index = 0; mirror_index < nmirrors;
1322 mirror_index++) {
1323 struct scrub_block *sblock;
1324 struct scrub_page *spage;
1325
1326 sblock = sblocks_for_recheck + mirror_index;
1327 sblock->sctx = sctx;
1328
1329 spage = kzalloc(sizeof(*spage), GFP_NOFS);
1330 if (!spage) {
1331leave_nomem:
1332 spin_lock(&sctx->stat_lock);
1333 sctx->stat.malloc_errors++;
1334 spin_unlock(&sctx->stat_lock);
1335 scrub_put_recover(fs_info, recover);
1336 return -ENOMEM;
1337 }
1338 scrub_page_get(spage);
1339 sblock->pagev[page_index] = spage;
1340 spage->sblock = sblock;
1341 spage->flags = flags;
1342 spage->generation = generation;
1343 spage->logical = logical;
1344 spage->have_csum = have_csum;
1345 if (have_csum)
1346 memcpy(spage->csum,
1347 original_sblock->pagev[0]->csum,
1348 sctx->fs_info->csum_size);
1349
1350 scrub_stripe_index_and_offset(logical,
1351 bbio->map_type,
1352 bbio->raid_map,
1353 mapped_length,
1354 bbio->num_stripes -
1355 bbio->num_tgtdevs,
1356 mirror_index,
1357 &stripe_index,
1358 &stripe_offset);
1359 spage->physical = bbio->stripes[stripe_index].physical +
1360 stripe_offset;
1361 spage->dev = bbio->stripes[stripe_index].dev;
1362
1363 BUG_ON(page_index >= original_sblock->page_count);
1364 spage->physical_for_dev_replace =
1365 original_sblock->pagev[page_index]->
1366 physical_for_dev_replace;
1367
1368 spage->mirror_num = mirror_index + 1;
1369 sblock->page_count++;
1370 spage->page = alloc_page(GFP_NOFS);
1371 if (!spage->page)
1372 goto leave_nomem;
1373
1374 scrub_get_recover(recover);
1375 spage->recover = recover;
1376 }
1377 scrub_put_recover(fs_info, recover);
1378 length -= sublen;
1379 logical += sublen;
1380 page_index++;
1381 }
1382
1383 return 0;
1384}
1385
1386static void scrub_bio_wait_endio(struct bio *bio)
1387{
1388 complete(bio->bi_private);
1389}
1390
1391static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1392 struct bio *bio,
1393 struct scrub_page *spage)
1394{
1395 DECLARE_COMPLETION_ONSTACK(done);
1396 int ret;
1397 int mirror_num;
1398
1399 bio->bi_iter.bi_sector = spage->logical >> 9;
1400 bio->bi_private = &done;
1401 bio->bi_end_io = scrub_bio_wait_endio;
1402
1403 mirror_num = spage->sblock->pagev[0]->mirror_num;
1404 ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
1405 spage->recover->map_length,
1406 mirror_num, 0);
1407 if (ret)
1408 return ret;
1409
1410 wait_for_completion_io(&done);
1411 return blk_status_to_errno(bio->bi_status);
1412}
1413
1414static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1415 struct scrub_block *sblock)
1416{
1417 struct scrub_page *first_page = sblock->pagev[0];
1418 struct bio *bio;
1419 int page_num;
1420
1421
1422 ASSERT(first_page->dev);
1423 if (!first_page->dev->bdev)
1424 goto out;
1425
1426 bio = btrfs_io_bio_alloc(BIO_MAX_VECS);
1427 bio_set_dev(bio, first_page->dev->bdev);
1428
1429 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1430 struct scrub_page *spage = sblock->pagev[page_num];
1431
1432 WARN_ON(!spage->page);
1433 bio_add_page(bio, spage->page, PAGE_SIZE, 0);
1434 }
1435
1436 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1437 bio_put(bio);
1438 goto out;
1439 }
1440
1441 bio_put(bio);
1442
1443 scrub_recheck_block_checksum(sblock);
1444
1445 return;
1446out:
1447 for (page_num = 0; page_num < sblock->page_count; page_num++)
1448 sblock->pagev[page_num]->io_error = 1;
1449
1450 sblock->no_io_error_seen = 0;
1451}
1452
1453
1454
1455
1456
1457
1458
1459
1460static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1461 struct scrub_block *sblock,
1462 int retry_failed_mirror)
1463{
1464 int page_num;
1465
1466 sblock->no_io_error_seen = 1;
1467
1468
1469 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1470 return scrub_recheck_block_on_raid56(fs_info, sblock);
1471
1472 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1473 struct bio *bio;
1474 struct scrub_page *spage = sblock->pagev[page_num];
1475
1476 if (spage->dev->bdev == NULL) {
1477 spage->io_error = 1;
1478 sblock->no_io_error_seen = 0;
1479 continue;
1480 }
1481
1482 WARN_ON(!spage->page);
1483 bio = btrfs_io_bio_alloc(1);
1484 bio_set_dev(bio, spage->dev->bdev);
1485
1486 bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
1487 bio->bi_iter.bi_sector = spage->physical >> 9;
1488 bio->bi_opf = REQ_OP_READ;
1489
1490 if (btrfsic_submit_bio_wait(bio)) {
1491 spage->io_error = 1;
1492 sblock->no_io_error_seen = 0;
1493 }
1494
1495 bio_put(bio);
1496 }
1497
1498 if (sblock->no_io_error_seen)
1499 scrub_recheck_block_checksum(sblock);
1500}
1501
1502static inline int scrub_check_fsid(u8 fsid[],
1503 struct scrub_page *spage)
1504{
1505 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1506 int ret;
1507
1508 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1509 return !ret;
1510}
1511
1512static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1513{
1514 sblock->header_error = 0;
1515 sblock->checksum_error = 0;
1516 sblock->generation_error = 0;
1517
1518 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1519 scrub_checksum_data(sblock);
1520 else
1521 scrub_checksum_tree_block(sblock);
1522}
1523
1524static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1525 struct scrub_block *sblock_good)
1526{
1527 int page_num;
1528 int ret = 0;
1529
1530 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1531 int ret_sub;
1532
1533 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1534 sblock_good,
1535 page_num, 1);
1536 if (ret_sub)
1537 ret = ret_sub;
1538 }
1539
1540 return ret;
1541}
1542
1543static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1544 struct scrub_block *sblock_good,
1545 int page_num, int force_write)
1546{
1547 struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
1548 struct scrub_page *spage_good = sblock_good->pagev[page_num];
1549 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1550 const u32 sectorsize = fs_info->sectorsize;
1551
1552 BUG_ON(spage_bad->page == NULL);
1553 BUG_ON(spage_good->page == NULL);
1554 if (force_write || sblock_bad->header_error ||
1555 sblock_bad->checksum_error || spage_bad->io_error) {
1556 struct bio *bio;
1557 int ret;
1558
1559 if (!spage_bad->dev->bdev) {
1560 btrfs_warn_rl(fs_info,
1561 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1562 return -EIO;
1563 }
1564
1565 bio = btrfs_io_bio_alloc(1);
1566 bio_set_dev(bio, spage_bad->dev->bdev);
1567 bio->bi_iter.bi_sector = spage_bad->physical >> 9;
1568 bio->bi_opf = REQ_OP_WRITE;
1569
1570 ret = bio_add_page(bio, spage_good->page, sectorsize, 0);
1571 if (ret != sectorsize) {
1572 bio_put(bio);
1573 return -EIO;
1574 }
1575
1576 if (btrfsic_submit_bio_wait(bio)) {
1577 btrfs_dev_stat_inc_and_print(spage_bad->dev,
1578 BTRFS_DEV_STAT_WRITE_ERRS);
1579 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1580 bio_put(bio);
1581 return -EIO;
1582 }
1583 bio_put(bio);
1584 }
1585
1586 return 0;
1587}
1588
1589static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1590{
1591 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1592 int page_num;
1593
1594
1595
1596
1597
1598 if (sblock->sparity)
1599 return;
1600
1601 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1602 int ret;
1603
1604 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1605 if (ret)
1606 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1607 }
1608}
1609
1610static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1611 int page_num)
1612{
1613 struct scrub_page *spage = sblock->pagev[page_num];
1614
1615 BUG_ON(spage->page == NULL);
1616 if (spage->io_error)
1617 clear_page(page_address(spage->page));
1618
1619 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1620}
1621
1622static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
1623{
1624 int ret = 0;
1625 u64 length;
1626
1627 if (!btrfs_is_zoned(sctx->fs_info))
1628 return 0;
1629
1630 if (!btrfs_dev_is_sequential(sctx->wr_tgtdev, physical))
1631 return 0;
1632
1633 if (sctx->write_pointer < physical) {
1634 length = physical - sctx->write_pointer;
1635
1636 ret = btrfs_zoned_issue_zeroout(sctx->wr_tgtdev,
1637 sctx->write_pointer, length);
1638 if (!ret)
1639 sctx->write_pointer = physical;
1640 }
1641 return ret;
1642}
1643
1644static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1645 struct scrub_page *spage)
1646{
1647 struct scrub_bio *sbio;
1648 int ret;
1649 const u32 sectorsize = sctx->fs_info->sectorsize;
1650
1651 mutex_lock(&sctx->wr_lock);
1652again:
1653 if (!sctx->wr_curr_bio) {
1654 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1655 GFP_KERNEL);
1656 if (!sctx->wr_curr_bio) {
1657 mutex_unlock(&sctx->wr_lock);
1658 return -ENOMEM;
1659 }
1660 sctx->wr_curr_bio->sctx = sctx;
1661 sctx->wr_curr_bio->page_count = 0;
1662 }
1663 sbio = sctx->wr_curr_bio;
1664 if (sbio->page_count == 0) {
1665 struct bio *bio;
1666
1667 ret = fill_writer_pointer_gap(sctx,
1668 spage->physical_for_dev_replace);
1669 if (ret) {
1670 mutex_unlock(&sctx->wr_lock);
1671 return ret;
1672 }
1673
1674 sbio->physical = spage->physical_for_dev_replace;
1675 sbio->logical = spage->logical;
1676 sbio->dev = sctx->wr_tgtdev;
1677 bio = sbio->bio;
1678 if (!bio) {
1679 bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
1680 sbio->bio = bio;
1681 }
1682
1683 bio->bi_private = sbio;
1684 bio->bi_end_io = scrub_wr_bio_end_io;
1685 bio_set_dev(bio, sbio->dev->bdev);
1686 bio->bi_iter.bi_sector = sbio->physical >> 9;
1687 bio->bi_opf = REQ_OP_WRITE;
1688 sbio->status = 0;
1689 } else if (sbio->physical + sbio->page_count * sectorsize !=
1690 spage->physical_for_dev_replace ||
1691 sbio->logical + sbio->page_count * sectorsize !=
1692 spage->logical) {
1693 scrub_wr_submit(sctx);
1694 goto again;
1695 }
1696
1697 ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0);
1698 if (ret != sectorsize) {
1699 if (sbio->page_count < 1) {
1700 bio_put(sbio->bio);
1701 sbio->bio = NULL;
1702 mutex_unlock(&sctx->wr_lock);
1703 return -EIO;
1704 }
1705 scrub_wr_submit(sctx);
1706 goto again;
1707 }
1708
1709 sbio->pagev[sbio->page_count] = spage;
1710 scrub_page_get(spage);
1711 sbio->page_count++;
1712 if (sbio->page_count == sctx->pages_per_wr_bio)
1713 scrub_wr_submit(sctx);
1714 mutex_unlock(&sctx->wr_lock);
1715
1716 return 0;
1717}
1718
1719static void scrub_wr_submit(struct scrub_ctx *sctx)
1720{
1721 struct scrub_bio *sbio;
1722
1723 if (!sctx->wr_curr_bio)
1724 return;
1725
1726 sbio = sctx->wr_curr_bio;
1727 sctx->wr_curr_bio = NULL;
1728 WARN_ON(!sbio->bio->bi_bdev);
1729 scrub_pending_bio_inc(sctx);
1730
1731
1732
1733
1734 btrfsic_submit_bio(sbio->bio);
1735
1736 if (btrfs_is_zoned(sctx->fs_info))
1737 sctx->write_pointer = sbio->physical + sbio->page_count *
1738 sctx->fs_info->sectorsize;
1739}
1740
1741static void scrub_wr_bio_end_io(struct bio *bio)
1742{
1743 struct scrub_bio *sbio = bio->bi_private;
1744 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1745
1746 sbio->status = bio->bi_status;
1747 sbio->bio = bio;
1748
1749 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1750 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1751}
1752
1753static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1754{
1755 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1756 struct scrub_ctx *sctx = sbio->sctx;
1757 int i;
1758
1759 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1760 if (sbio->status) {
1761 struct btrfs_dev_replace *dev_replace =
1762 &sbio->sctx->fs_info->dev_replace;
1763
1764 for (i = 0; i < sbio->page_count; i++) {
1765 struct scrub_page *spage = sbio->pagev[i];
1766
1767 spage->io_error = 1;
1768 atomic64_inc(&dev_replace->num_write_errors);
1769 }
1770 }
1771
1772 for (i = 0; i < sbio->page_count; i++)
1773 scrub_page_put(sbio->pagev[i]);
1774
1775 bio_put(sbio->bio);
1776 kfree(sbio);
1777 scrub_pending_bio_dec(sctx);
1778}
1779
1780static int scrub_checksum(struct scrub_block *sblock)
1781{
1782 u64 flags;
1783 int ret;
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793 sblock->header_error = 0;
1794 sblock->generation_error = 0;
1795 sblock->checksum_error = 0;
1796
1797 WARN_ON(sblock->page_count < 1);
1798 flags = sblock->pagev[0]->flags;
1799 ret = 0;
1800 if (flags & BTRFS_EXTENT_FLAG_DATA)
1801 ret = scrub_checksum_data(sblock);
1802 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1803 ret = scrub_checksum_tree_block(sblock);
1804 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1805 (void)scrub_checksum_super(sblock);
1806 else
1807 WARN_ON(1);
1808 if (ret)
1809 scrub_handle_errored_block(sblock);
1810
1811 return ret;
1812}
1813
1814static int scrub_checksum_data(struct scrub_block *sblock)
1815{
1816 struct scrub_ctx *sctx = sblock->sctx;
1817 struct btrfs_fs_info *fs_info = sctx->fs_info;
1818 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1819 u8 csum[BTRFS_CSUM_SIZE];
1820 struct scrub_page *spage;
1821 char *kaddr;
1822
1823 BUG_ON(sblock->page_count < 1);
1824 spage = sblock->pagev[0];
1825 if (!spage->have_csum)
1826 return 0;
1827
1828 kaddr = page_address(spage->page);
1829
1830 shash->tfm = fs_info->csum_shash;
1831 crypto_shash_init(shash);
1832
1833
1834
1835
1836
1837 crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
1838
1839 if (memcmp(csum, spage->csum, fs_info->csum_size))
1840 sblock->checksum_error = 1;
1841 return sblock->checksum_error;
1842}
1843
1844static int scrub_checksum_tree_block(struct scrub_block *sblock)
1845{
1846 struct scrub_ctx *sctx = sblock->sctx;
1847 struct btrfs_header *h;
1848 struct btrfs_fs_info *fs_info = sctx->fs_info;
1849 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1850 u8 calculated_csum[BTRFS_CSUM_SIZE];
1851 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1852
1853
1854
1855
1856
1857 const u32 sectorsize = sctx->fs_info->sectorsize;
1858 const int num_sectors = fs_info->nodesize >> fs_info->sectorsize_bits;
1859 int i;
1860 struct scrub_page *spage;
1861 char *kaddr;
1862
1863 BUG_ON(sblock->page_count < 1);
1864
1865
1866 ASSERT(sblock->page_count == num_sectors);
1867
1868 spage = sblock->pagev[0];
1869 kaddr = page_address(spage->page);
1870 h = (struct btrfs_header *)kaddr;
1871 memcpy(on_disk_csum, h->csum, sctx->fs_info->csum_size);
1872
1873
1874
1875
1876
1877
1878 if (spage->logical != btrfs_stack_header_bytenr(h))
1879 sblock->header_error = 1;
1880
1881 if (spage->generation != btrfs_stack_header_generation(h)) {
1882 sblock->header_error = 1;
1883 sblock->generation_error = 1;
1884 }
1885
1886 if (!scrub_check_fsid(h->fsid, spage))
1887 sblock->header_error = 1;
1888
1889 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1890 BTRFS_UUID_SIZE))
1891 sblock->header_error = 1;
1892
1893 shash->tfm = fs_info->csum_shash;
1894 crypto_shash_init(shash);
1895 crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
1896 sectorsize - BTRFS_CSUM_SIZE);
1897
1898 for (i = 1; i < num_sectors; i++) {
1899 kaddr = page_address(sblock->pagev[i]->page);
1900 crypto_shash_update(shash, kaddr, sectorsize);
1901 }
1902
1903 crypto_shash_final(shash, calculated_csum);
1904 if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size))
1905 sblock->checksum_error = 1;
1906
1907 return sblock->header_error || sblock->checksum_error;
1908}
1909
1910static int scrub_checksum_super(struct scrub_block *sblock)
1911{
1912 struct btrfs_super_block *s;
1913 struct scrub_ctx *sctx = sblock->sctx;
1914 struct btrfs_fs_info *fs_info = sctx->fs_info;
1915 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1916 u8 calculated_csum[BTRFS_CSUM_SIZE];
1917 struct scrub_page *spage;
1918 char *kaddr;
1919 int fail_gen = 0;
1920 int fail_cor = 0;
1921
1922 BUG_ON(sblock->page_count < 1);
1923 spage = sblock->pagev[0];
1924 kaddr = page_address(spage->page);
1925 s = (struct btrfs_super_block *)kaddr;
1926
1927 if (spage->logical != btrfs_super_bytenr(s))
1928 ++fail_cor;
1929
1930 if (spage->generation != btrfs_super_generation(s))
1931 ++fail_gen;
1932
1933 if (!scrub_check_fsid(s->fsid, spage))
1934 ++fail_cor;
1935
1936 shash->tfm = fs_info->csum_shash;
1937 crypto_shash_init(shash);
1938 crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
1939 BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
1940
1941 if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size))
1942 ++fail_cor;
1943
1944 if (fail_cor + fail_gen) {
1945
1946
1947
1948
1949
1950 spin_lock(&sctx->stat_lock);
1951 ++sctx->stat.super_errors;
1952 spin_unlock(&sctx->stat_lock);
1953 if (fail_cor)
1954 btrfs_dev_stat_inc_and_print(spage->dev,
1955 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1956 else
1957 btrfs_dev_stat_inc_and_print(spage->dev,
1958 BTRFS_DEV_STAT_GENERATION_ERRS);
1959 }
1960
1961 return fail_cor + fail_gen;
1962}
1963
1964static void scrub_block_get(struct scrub_block *sblock)
1965{
1966 refcount_inc(&sblock->refs);
1967}
1968
1969static void scrub_block_put(struct scrub_block *sblock)
1970{
1971 if (refcount_dec_and_test(&sblock->refs)) {
1972 int i;
1973
1974 if (sblock->sparity)
1975 scrub_parity_put(sblock->sparity);
1976
1977 for (i = 0; i < sblock->page_count; i++)
1978 scrub_page_put(sblock->pagev[i]);
1979 kfree(sblock);
1980 }
1981}
1982
1983static void scrub_page_get(struct scrub_page *spage)
1984{
1985 atomic_inc(&spage->refs);
1986}
1987
1988static void scrub_page_put(struct scrub_page *spage)
1989{
1990 if (atomic_dec_and_test(&spage->refs)) {
1991 if (spage->page)
1992 __free_page(spage->page);
1993 kfree(spage);
1994 }
1995}
1996
1997
1998
1999
2000
2001static void scrub_throttle(struct scrub_ctx *sctx)
2002{
2003 const int time_slice = 1000;
2004 struct scrub_bio *sbio;
2005 struct btrfs_device *device;
2006 s64 delta;
2007 ktime_t now;
2008 u32 div;
2009 u64 bwlimit;
2010
2011 sbio = sctx->bios[sctx->curr];
2012 device = sbio->dev;
2013 bwlimit = READ_ONCE(device->scrub_speed_max);
2014 if (bwlimit == 0)
2015 return;
2016
2017
2018
2019
2020
2021 div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
2022 div = min_t(u32, 64, div);
2023
2024
2025 now = ktime_get();
2026 if (sctx->throttle_deadline == 0) {
2027 sctx->throttle_deadline = ktime_add_ms(now, time_slice / div);
2028 sctx->throttle_sent = 0;
2029 }
2030
2031
2032 if (ktime_before(now, sctx->throttle_deadline)) {
2033
2034 sctx->throttle_sent += sbio->bio->bi_iter.bi_size;
2035 if (sctx->throttle_sent <= div_u64(bwlimit, div))
2036 return;
2037
2038
2039 delta = ktime_ms_delta(sctx->throttle_deadline, now);
2040 } else {
2041
2042 delta = 0;
2043 }
2044
2045 if (delta) {
2046 long timeout;
2047
2048 timeout = div_u64(delta * HZ, 1000);
2049 schedule_timeout_interruptible(timeout);
2050 }
2051
2052
2053 sctx->throttle_deadline = 0;
2054}
2055
2056static void scrub_submit(struct scrub_ctx *sctx)
2057{
2058 struct scrub_bio *sbio;
2059
2060 if (sctx->curr == -1)
2061 return;
2062
2063 scrub_throttle(sctx);
2064
2065 sbio = sctx->bios[sctx->curr];
2066 sctx->curr = -1;
2067 scrub_pending_bio_inc(sctx);
2068 btrfsic_submit_bio(sbio->bio);
2069}
2070
2071static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2072 struct scrub_page *spage)
2073{
2074 struct scrub_block *sblock = spage->sblock;
2075 struct scrub_bio *sbio;
2076 const u32 sectorsize = sctx->fs_info->sectorsize;
2077 int ret;
2078
2079again:
2080
2081
2082
2083 while (sctx->curr == -1) {
2084 spin_lock(&sctx->list_lock);
2085 sctx->curr = sctx->first_free;
2086 if (sctx->curr != -1) {
2087 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2088 sctx->bios[sctx->curr]->next_free = -1;
2089 sctx->bios[sctx->curr]->page_count = 0;
2090 spin_unlock(&sctx->list_lock);
2091 } else {
2092 spin_unlock(&sctx->list_lock);
2093 wait_event(sctx->list_wait, sctx->first_free != -1);
2094 }
2095 }
2096 sbio = sctx->bios[sctx->curr];
2097 if (sbio->page_count == 0) {
2098 struct bio *bio;
2099
2100 sbio->physical = spage->physical;
2101 sbio->logical = spage->logical;
2102 sbio->dev = spage->dev;
2103 bio = sbio->bio;
2104 if (!bio) {
2105 bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
2106 sbio->bio = bio;
2107 }
2108
2109 bio->bi_private = sbio;
2110 bio->bi_end_io = scrub_bio_end_io;
2111 bio_set_dev(bio, sbio->dev->bdev);
2112 bio->bi_iter.bi_sector = sbio->physical >> 9;
2113 bio->bi_opf = REQ_OP_READ;
2114 sbio->status = 0;
2115 } else if (sbio->physical + sbio->page_count * sectorsize !=
2116 spage->physical ||
2117 sbio->logical + sbio->page_count * sectorsize !=
2118 spage->logical ||
2119 sbio->dev != spage->dev) {
2120 scrub_submit(sctx);
2121 goto again;
2122 }
2123
2124 sbio->pagev[sbio->page_count] = spage;
2125 ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0);
2126 if (ret != sectorsize) {
2127 if (sbio->page_count < 1) {
2128 bio_put(sbio->bio);
2129 sbio->bio = NULL;
2130 return -EIO;
2131 }
2132 scrub_submit(sctx);
2133 goto again;
2134 }
2135
2136 scrub_block_get(sblock);
2137 atomic_inc(&sblock->outstanding_pages);
2138 sbio->page_count++;
2139 if (sbio->page_count == sctx->pages_per_rd_bio)
2140 scrub_submit(sctx);
2141
2142 return 0;
2143}
2144
2145static void scrub_missing_raid56_end_io(struct bio *bio)
2146{
2147 struct scrub_block *sblock = bio->bi_private;
2148 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2149
2150 if (bio->bi_status)
2151 sblock->no_io_error_seen = 0;
2152
2153 bio_put(bio);
2154
2155 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2156}
2157
2158static void scrub_missing_raid56_worker(struct btrfs_work *work)
2159{
2160 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2161 struct scrub_ctx *sctx = sblock->sctx;
2162 struct btrfs_fs_info *fs_info = sctx->fs_info;
2163 u64 logical;
2164 struct btrfs_device *dev;
2165
2166 logical = sblock->pagev[0]->logical;
2167 dev = sblock->pagev[0]->dev;
2168
2169 if (sblock->no_io_error_seen)
2170 scrub_recheck_block_checksum(sblock);
2171
2172 if (!sblock->no_io_error_seen) {
2173 spin_lock(&sctx->stat_lock);
2174 sctx->stat.read_errors++;
2175 spin_unlock(&sctx->stat_lock);
2176 btrfs_err_rl_in_rcu(fs_info,
2177 "IO error rebuilding logical %llu for dev %s",
2178 logical, rcu_str_deref(dev->name));
2179 } else if (sblock->header_error || sblock->checksum_error) {
2180 spin_lock(&sctx->stat_lock);
2181 sctx->stat.uncorrectable_errors++;
2182 spin_unlock(&sctx->stat_lock);
2183 btrfs_err_rl_in_rcu(fs_info,
2184 "failed to rebuild valid logical %llu for dev %s",
2185 logical, rcu_str_deref(dev->name));
2186 } else {
2187 scrub_write_block_to_dev_replace(sblock);
2188 }
2189
2190 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2191 mutex_lock(&sctx->wr_lock);
2192 scrub_wr_submit(sctx);
2193 mutex_unlock(&sctx->wr_lock);
2194 }
2195
2196 scrub_block_put(sblock);
2197 scrub_pending_bio_dec(sctx);
2198}
2199
2200static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2201{
2202 struct scrub_ctx *sctx = sblock->sctx;
2203 struct btrfs_fs_info *fs_info = sctx->fs_info;
2204 u64 length = sblock->page_count * PAGE_SIZE;
2205 u64 logical = sblock->pagev[0]->logical;
2206 struct btrfs_bio *bbio = NULL;
2207 struct bio *bio;
2208 struct btrfs_raid_bio *rbio;
2209 int ret;
2210 int i;
2211
2212 btrfs_bio_counter_inc_blocked(fs_info);
2213 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2214 &length, &bbio);
2215 if (ret || !bbio || !bbio->raid_map)
2216 goto bbio_out;
2217
2218 if (WARN_ON(!sctx->is_dev_replace ||
2219 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2220
2221
2222
2223
2224
2225
2226 goto bbio_out;
2227 }
2228
2229 bio = btrfs_io_bio_alloc(0);
2230 bio->bi_iter.bi_sector = logical >> 9;
2231 bio->bi_private = sblock;
2232 bio->bi_end_io = scrub_missing_raid56_end_io;
2233
2234 rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
2235 if (!rbio)
2236 goto rbio_out;
2237
2238 for (i = 0; i < sblock->page_count; i++) {
2239 struct scrub_page *spage = sblock->pagev[i];
2240
2241 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2242 }
2243
2244 btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
2245 scrub_block_get(sblock);
2246 scrub_pending_bio_inc(sctx);
2247 raid56_submit_missing_rbio(rbio);
2248 return;
2249
2250rbio_out:
2251 bio_put(bio);
2252bbio_out:
2253 btrfs_bio_counter_dec(fs_info);
2254 btrfs_put_bbio(bbio);
2255 spin_lock(&sctx->stat_lock);
2256 sctx->stat.malloc_errors++;
2257 spin_unlock(&sctx->stat_lock);
2258}
2259
2260static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
2261 u64 physical, struct btrfs_device *dev, u64 flags,
2262 u64 gen, int mirror_num, u8 *csum,
2263 u64 physical_for_dev_replace)
2264{
2265 struct scrub_block *sblock;
2266 const u32 sectorsize = sctx->fs_info->sectorsize;
2267 int index;
2268
2269 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2270 if (!sblock) {
2271 spin_lock(&sctx->stat_lock);
2272 sctx->stat.malloc_errors++;
2273 spin_unlock(&sctx->stat_lock);
2274 return -ENOMEM;
2275 }
2276
2277
2278
2279 refcount_set(&sblock->refs, 1);
2280 sblock->sctx = sctx;
2281 sblock->no_io_error_seen = 1;
2282
2283 for (index = 0; len > 0; index++) {
2284 struct scrub_page *spage;
2285
2286
2287
2288
2289
2290 u32 l = min(sectorsize, len);
2291
2292 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2293 if (!spage) {
2294leave_nomem:
2295 spin_lock(&sctx->stat_lock);
2296 sctx->stat.malloc_errors++;
2297 spin_unlock(&sctx->stat_lock);
2298 scrub_block_put(sblock);
2299 return -ENOMEM;
2300 }
2301 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2302 scrub_page_get(spage);
2303 sblock->pagev[index] = spage;
2304 spage->sblock = sblock;
2305 spage->dev = dev;
2306 spage->flags = flags;
2307 spage->generation = gen;
2308 spage->logical = logical;
2309 spage->physical = physical;
2310 spage->physical_for_dev_replace = physical_for_dev_replace;
2311 spage->mirror_num = mirror_num;
2312 if (csum) {
2313 spage->have_csum = 1;
2314 memcpy(spage->csum, csum, sctx->fs_info->csum_size);
2315 } else {
2316 spage->have_csum = 0;
2317 }
2318 sblock->page_count++;
2319 spage->page = alloc_page(GFP_KERNEL);
2320 if (!spage->page)
2321 goto leave_nomem;
2322 len -= l;
2323 logical += l;
2324 physical += l;
2325 physical_for_dev_replace += l;
2326 }
2327
2328 WARN_ON(sblock->page_count == 0);
2329 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2330
2331
2332
2333
2334 scrub_missing_raid56_pages(sblock);
2335 } else {
2336 for (index = 0; index < sblock->page_count; index++) {
2337 struct scrub_page *spage = sblock->pagev[index];
2338 int ret;
2339
2340 ret = scrub_add_page_to_rd_bio(sctx, spage);
2341 if (ret) {
2342 scrub_block_put(sblock);
2343 return ret;
2344 }
2345 }
2346
2347 if (flags & BTRFS_EXTENT_FLAG_SUPER)
2348 scrub_submit(sctx);
2349 }
2350
2351
2352 scrub_block_put(sblock);
2353 return 0;
2354}
2355
2356static void scrub_bio_end_io(struct bio *bio)
2357{
2358 struct scrub_bio *sbio = bio->bi_private;
2359 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2360
2361 sbio->status = bio->bi_status;
2362 sbio->bio = bio;
2363
2364 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2365}
2366
2367static void scrub_bio_end_io_worker(struct btrfs_work *work)
2368{
2369 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2370 struct scrub_ctx *sctx = sbio->sctx;
2371 int i;
2372
2373 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2374 if (sbio->status) {
2375 for (i = 0; i < sbio->page_count; i++) {
2376 struct scrub_page *spage = sbio->pagev[i];
2377
2378 spage->io_error = 1;
2379 spage->sblock->no_io_error_seen = 0;
2380 }
2381 }
2382
2383
2384 for (i = 0; i < sbio->page_count; i++) {
2385 struct scrub_page *spage = sbio->pagev[i];
2386 struct scrub_block *sblock = spage->sblock;
2387
2388 if (atomic_dec_and_test(&sblock->outstanding_pages))
2389 scrub_block_complete(sblock);
2390 scrub_block_put(sblock);
2391 }
2392
2393 bio_put(sbio->bio);
2394 sbio->bio = NULL;
2395 spin_lock(&sctx->list_lock);
2396 sbio->next_free = sctx->first_free;
2397 sctx->first_free = sbio->index;
2398 spin_unlock(&sctx->list_lock);
2399
2400 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2401 mutex_lock(&sctx->wr_lock);
2402 scrub_wr_submit(sctx);
2403 mutex_unlock(&sctx->wr_lock);
2404 }
2405
2406 scrub_pending_bio_dec(sctx);
2407}
2408
2409static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2410 unsigned long *bitmap,
2411 u64 start, u32 len)
2412{
2413 u64 offset;
2414 u32 nsectors;
2415 u32 sectorsize_bits = sparity->sctx->fs_info->sectorsize_bits;
2416
2417 if (len >= sparity->stripe_len) {
2418 bitmap_set(bitmap, 0, sparity->nsectors);
2419 return;
2420 }
2421
2422 start -= sparity->logic_start;
2423 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2424 offset = offset >> sectorsize_bits;
2425 nsectors = len >> sectorsize_bits;
2426
2427 if (offset + nsectors <= sparity->nsectors) {
2428 bitmap_set(bitmap, offset, nsectors);
2429 return;
2430 }
2431
2432 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2433 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2434}
2435
2436static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2437 u64 start, u32 len)
2438{
2439 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2440}
2441
2442static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2443 u64 start, u32 len)
2444{
2445 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2446}
2447
2448static void scrub_block_complete(struct scrub_block *sblock)
2449{
2450 int corrupted = 0;
2451
2452 if (!sblock->no_io_error_seen) {
2453 corrupted = 1;
2454 scrub_handle_errored_block(sblock);
2455 } else {
2456
2457
2458
2459
2460
2461 corrupted = scrub_checksum(sblock);
2462 if (!corrupted && sblock->sctx->is_dev_replace)
2463 scrub_write_block_to_dev_replace(sblock);
2464 }
2465
2466 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2467 u64 start = sblock->pagev[0]->logical;
2468 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2469 sblock->sctx->fs_info->sectorsize;
2470
2471 ASSERT(end - start <= U32_MAX);
2472 scrub_parity_mark_sectors_error(sblock->sparity,
2473 start, end - start);
2474 }
2475}
2476
2477static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum)
2478{
2479 sctx->stat.csum_discards += sum->len >> sctx->fs_info->sectorsize_bits;
2480 list_del(&sum->list);
2481 kfree(sum);
2482}
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2496{
2497 bool found = false;
2498
2499 while (!list_empty(&sctx->csum_list)) {
2500 struct btrfs_ordered_sum *sum = NULL;
2501 unsigned long index;
2502 unsigned long num_sectors;
2503
2504 sum = list_first_entry(&sctx->csum_list,
2505 struct btrfs_ordered_sum, list);
2506
2507 if (sum->bytenr > logical)
2508 break;
2509
2510
2511
2512
2513
2514
2515
2516 if (sum->bytenr + sum->len <= logical) {
2517 drop_csum_range(sctx, sum);
2518 continue;
2519 }
2520
2521
2522 found = true;
2523 index = (logical - sum->bytenr) >> sctx->fs_info->sectorsize_bits;
2524 num_sectors = sum->len >> sctx->fs_info->sectorsize_bits;
2525
2526 memcpy(csum, sum->sums + index * sctx->fs_info->csum_size,
2527 sctx->fs_info->csum_size);
2528
2529
2530 if (index == num_sectors - 1)
2531 drop_csum_range(sctx, sum);
2532 break;
2533 }
2534 if (!found)
2535 return 0;
2536 return 1;
2537}
2538
2539
2540static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2541 u64 logical, u32 len,
2542 u64 physical, struct btrfs_device *dev, u64 flags,
2543 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2544{
2545 int ret;
2546 u8 csum[BTRFS_CSUM_SIZE];
2547 u32 blocksize;
2548
2549 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2550 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2551 blocksize = map->stripe_len;
2552 else
2553 blocksize = sctx->fs_info->sectorsize;
2554 spin_lock(&sctx->stat_lock);
2555 sctx->stat.data_extents_scrubbed++;
2556 sctx->stat.data_bytes_scrubbed += len;
2557 spin_unlock(&sctx->stat_lock);
2558 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2559 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2560 blocksize = map->stripe_len;
2561 else
2562 blocksize = sctx->fs_info->nodesize;
2563 spin_lock(&sctx->stat_lock);
2564 sctx->stat.tree_extents_scrubbed++;
2565 sctx->stat.tree_bytes_scrubbed += len;
2566 spin_unlock(&sctx->stat_lock);
2567 } else {
2568 blocksize = sctx->fs_info->sectorsize;
2569 WARN_ON(1);
2570 }
2571
2572 while (len) {
2573 u32 l = min(len, blocksize);
2574 int have_csum = 0;
2575
2576 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2577
2578 have_csum = scrub_find_csum(sctx, logical, csum);
2579 if (have_csum == 0)
2580 ++sctx->stat.no_csum;
2581 }
2582 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2583 mirror_num, have_csum ? csum : NULL,
2584 physical_for_dev_replace);
2585 if (ret)
2586 return ret;
2587 len -= l;
2588 logical += l;
2589 physical += l;
2590 physical_for_dev_replace += l;
2591 }
2592 return 0;
2593}
2594
2595static int scrub_pages_for_parity(struct scrub_parity *sparity,
2596 u64 logical, u32 len,
2597 u64 physical, struct btrfs_device *dev,
2598 u64 flags, u64 gen, int mirror_num, u8 *csum)
2599{
2600 struct scrub_ctx *sctx = sparity->sctx;
2601 struct scrub_block *sblock;
2602 const u32 sectorsize = sctx->fs_info->sectorsize;
2603 int index;
2604
2605 ASSERT(IS_ALIGNED(len, sectorsize));
2606
2607 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2608 if (!sblock) {
2609 spin_lock(&sctx->stat_lock);
2610 sctx->stat.malloc_errors++;
2611 spin_unlock(&sctx->stat_lock);
2612 return -ENOMEM;
2613 }
2614
2615
2616
2617 refcount_set(&sblock->refs, 1);
2618 sblock->sctx = sctx;
2619 sblock->no_io_error_seen = 1;
2620 sblock->sparity = sparity;
2621 scrub_parity_get(sparity);
2622
2623 for (index = 0; len > 0; index++) {
2624 struct scrub_page *spage;
2625
2626 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2627 if (!spage) {
2628leave_nomem:
2629 spin_lock(&sctx->stat_lock);
2630 sctx->stat.malloc_errors++;
2631 spin_unlock(&sctx->stat_lock);
2632 scrub_block_put(sblock);
2633 return -ENOMEM;
2634 }
2635 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2636
2637 scrub_page_get(spage);
2638 sblock->pagev[index] = spage;
2639
2640 scrub_page_get(spage);
2641 list_add_tail(&spage->list, &sparity->spages);
2642 spage->sblock = sblock;
2643 spage->dev = dev;
2644 spage->flags = flags;
2645 spage->generation = gen;
2646 spage->logical = logical;
2647 spage->physical = physical;
2648 spage->mirror_num = mirror_num;
2649 if (csum) {
2650 spage->have_csum = 1;
2651 memcpy(spage->csum, csum, sctx->fs_info->csum_size);
2652 } else {
2653 spage->have_csum = 0;
2654 }
2655 sblock->page_count++;
2656 spage->page = alloc_page(GFP_KERNEL);
2657 if (!spage->page)
2658 goto leave_nomem;
2659
2660
2661
2662 len -= sectorsize;
2663 logical += sectorsize;
2664 physical += sectorsize;
2665 }
2666
2667 WARN_ON(sblock->page_count == 0);
2668 for (index = 0; index < sblock->page_count; index++) {
2669 struct scrub_page *spage = sblock->pagev[index];
2670 int ret;
2671
2672 ret = scrub_add_page_to_rd_bio(sctx, spage);
2673 if (ret) {
2674 scrub_block_put(sblock);
2675 return ret;
2676 }
2677 }
2678
2679
2680 scrub_block_put(sblock);
2681 return 0;
2682}
2683
2684static int scrub_extent_for_parity(struct scrub_parity *sparity,
2685 u64 logical, u32 len,
2686 u64 physical, struct btrfs_device *dev,
2687 u64 flags, u64 gen, int mirror_num)
2688{
2689 struct scrub_ctx *sctx = sparity->sctx;
2690 int ret;
2691 u8 csum[BTRFS_CSUM_SIZE];
2692 u32 blocksize;
2693
2694 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2695 scrub_parity_mark_sectors_error(sparity, logical, len);
2696 return 0;
2697 }
2698
2699 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2700 blocksize = sparity->stripe_len;
2701 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2702 blocksize = sparity->stripe_len;
2703 } else {
2704 blocksize = sctx->fs_info->sectorsize;
2705 WARN_ON(1);
2706 }
2707
2708 while (len) {
2709 u32 l = min(len, blocksize);
2710 int have_csum = 0;
2711
2712 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2713
2714 have_csum = scrub_find_csum(sctx, logical, csum);
2715 if (have_csum == 0)
2716 goto skip;
2717 }
2718 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2719 flags, gen, mirror_num,
2720 have_csum ? csum : NULL);
2721 if (ret)
2722 return ret;
2723skip:
2724 len -= l;
2725 logical += l;
2726 physical += l;
2727 }
2728 return 0;
2729}
2730
2731
2732
2733
2734
2735
2736
2737
2738static int get_raid56_logic_offset(u64 physical, int num,
2739 struct map_lookup *map, u64 *offset,
2740 u64 *stripe_start)
2741{
2742 int i;
2743 int j = 0;
2744 u64 stripe_nr;
2745 u64 last_offset;
2746 u32 stripe_index;
2747 u32 rot;
2748 const int data_stripes = nr_data_stripes(map);
2749
2750 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2751 if (stripe_start)
2752 *stripe_start = last_offset;
2753
2754 *offset = last_offset;
2755 for (i = 0; i < data_stripes; i++) {
2756 *offset = last_offset + i * map->stripe_len;
2757
2758 stripe_nr = div64_u64(*offset, map->stripe_len);
2759 stripe_nr = div_u64(stripe_nr, data_stripes);
2760
2761
2762 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2763
2764 rot += i;
2765 stripe_index = rot % map->num_stripes;
2766 if (stripe_index == num)
2767 return 0;
2768 if (stripe_index < num)
2769 j++;
2770 }
2771 *offset = last_offset + j * map->stripe_len;
2772 return 1;
2773}
2774
2775static void scrub_free_parity(struct scrub_parity *sparity)
2776{
2777 struct scrub_ctx *sctx = sparity->sctx;
2778 struct scrub_page *curr, *next;
2779 int nbits;
2780
2781 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2782 if (nbits) {
2783 spin_lock(&sctx->stat_lock);
2784 sctx->stat.read_errors += nbits;
2785 sctx->stat.uncorrectable_errors += nbits;
2786 spin_unlock(&sctx->stat_lock);
2787 }
2788
2789 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2790 list_del_init(&curr->list);
2791 scrub_page_put(curr);
2792 }
2793
2794 kfree(sparity);
2795}
2796
2797static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2798{
2799 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2800 work);
2801 struct scrub_ctx *sctx = sparity->sctx;
2802
2803 scrub_free_parity(sparity);
2804 scrub_pending_bio_dec(sctx);
2805}
2806
2807static void scrub_parity_bio_endio(struct bio *bio)
2808{
2809 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2810 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2811
2812 if (bio->bi_status)
2813 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2814 sparity->nsectors);
2815
2816 bio_put(bio);
2817
2818 btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
2819 NULL);
2820 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2821}
2822
2823static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2824{
2825 struct scrub_ctx *sctx = sparity->sctx;
2826 struct btrfs_fs_info *fs_info = sctx->fs_info;
2827 struct bio *bio;
2828 struct btrfs_raid_bio *rbio;
2829 struct btrfs_bio *bbio = NULL;
2830 u64 length;
2831 int ret;
2832
2833 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2834 sparity->nsectors))
2835 goto out;
2836
2837 length = sparity->logic_end - sparity->logic_start;
2838
2839 btrfs_bio_counter_inc_blocked(fs_info);
2840 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2841 &length, &bbio);
2842 if (ret || !bbio || !bbio->raid_map)
2843 goto bbio_out;
2844
2845 bio = btrfs_io_bio_alloc(0);
2846 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2847 bio->bi_private = sparity;
2848 bio->bi_end_io = scrub_parity_bio_endio;
2849
2850 rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
2851 length, sparity->scrub_dev,
2852 sparity->dbitmap,
2853 sparity->nsectors);
2854 if (!rbio)
2855 goto rbio_out;
2856
2857 scrub_pending_bio_inc(sctx);
2858 raid56_parity_submit_scrub_rbio(rbio);
2859 return;
2860
2861rbio_out:
2862 bio_put(bio);
2863bbio_out:
2864 btrfs_bio_counter_dec(fs_info);
2865 btrfs_put_bbio(bbio);
2866 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2867 sparity->nsectors);
2868 spin_lock(&sctx->stat_lock);
2869 sctx->stat.malloc_errors++;
2870 spin_unlock(&sctx->stat_lock);
2871out:
2872 scrub_free_parity(sparity);
2873}
2874
2875static inline int scrub_calc_parity_bitmap_len(int nsectors)
2876{
2877 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2878}
2879
2880static void scrub_parity_get(struct scrub_parity *sparity)
2881{
2882 refcount_inc(&sparity->refs);
2883}
2884
2885static void scrub_parity_put(struct scrub_parity *sparity)
2886{
2887 if (!refcount_dec_and_test(&sparity->refs))
2888 return;
2889
2890 scrub_parity_check_and_repair(sparity);
2891}
2892
2893static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2894 struct map_lookup *map,
2895 struct btrfs_device *sdev,
2896 struct btrfs_path *path,
2897 u64 logic_start,
2898 u64 logic_end)
2899{
2900 struct btrfs_fs_info *fs_info = sctx->fs_info;
2901 struct btrfs_root *root = fs_info->extent_root;
2902 struct btrfs_root *csum_root = fs_info->csum_root;
2903 struct btrfs_extent_item *extent;
2904 struct btrfs_bio *bbio = NULL;
2905 u64 flags;
2906 int ret;
2907 int slot;
2908 struct extent_buffer *l;
2909 struct btrfs_key key;
2910 u64 generation;
2911 u64 extent_logical;
2912 u64 extent_physical;
2913
2914 u32 extent_len;
2915 u64 mapped_length;
2916 struct btrfs_device *extent_dev;
2917 struct scrub_parity *sparity;
2918 int nsectors;
2919 int bitmap_len;
2920 int extent_mirror_num;
2921 int stop_loop = 0;
2922
2923 ASSERT(map->stripe_len <= U32_MAX);
2924 nsectors = map->stripe_len >> fs_info->sectorsize_bits;
2925 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2926 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2927 GFP_NOFS);
2928 if (!sparity) {
2929 spin_lock(&sctx->stat_lock);
2930 sctx->stat.malloc_errors++;
2931 spin_unlock(&sctx->stat_lock);
2932 return -ENOMEM;
2933 }
2934
2935 ASSERT(map->stripe_len <= U32_MAX);
2936 sparity->stripe_len = map->stripe_len;
2937 sparity->nsectors = nsectors;
2938 sparity->sctx = sctx;
2939 sparity->scrub_dev = sdev;
2940 sparity->logic_start = logic_start;
2941 sparity->logic_end = logic_end;
2942 refcount_set(&sparity->refs, 1);
2943 INIT_LIST_HEAD(&sparity->spages);
2944 sparity->dbitmap = sparity->bitmap;
2945 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2946
2947 ret = 0;
2948 while (logic_start < logic_end) {
2949 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2950 key.type = BTRFS_METADATA_ITEM_KEY;
2951 else
2952 key.type = BTRFS_EXTENT_ITEM_KEY;
2953 key.objectid = logic_start;
2954 key.offset = (u64)-1;
2955
2956 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2957 if (ret < 0)
2958 goto out;
2959
2960 if (ret > 0) {
2961 ret = btrfs_previous_extent_item(root, path, 0);
2962 if (ret < 0)
2963 goto out;
2964 if (ret > 0) {
2965 btrfs_release_path(path);
2966 ret = btrfs_search_slot(NULL, root, &key,
2967 path, 0, 0);
2968 if (ret < 0)
2969 goto out;
2970 }
2971 }
2972
2973 stop_loop = 0;
2974 while (1) {
2975 u64 bytes;
2976
2977 l = path->nodes[0];
2978 slot = path->slots[0];
2979 if (slot >= btrfs_header_nritems(l)) {
2980 ret = btrfs_next_leaf(root, path);
2981 if (ret == 0)
2982 continue;
2983 if (ret < 0)
2984 goto out;
2985
2986 stop_loop = 1;
2987 break;
2988 }
2989 btrfs_item_key_to_cpu(l, &key, slot);
2990
2991 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2992 key.type != BTRFS_METADATA_ITEM_KEY)
2993 goto next;
2994
2995 if (key.type == BTRFS_METADATA_ITEM_KEY)
2996 bytes = fs_info->nodesize;
2997 else
2998 bytes = key.offset;
2999
3000 if (key.objectid + bytes <= logic_start)
3001 goto next;
3002
3003 if (key.objectid >= logic_end) {
3004 stop_loop = 1;
3005 break;
3006 }
3007
3008 while (key.objectid >= logic_start + map->stripe_len)
3009 logic_start += map->stripe_len;
3010
3011 extent = btrfs_item_ptr(l, slot,
3012 struct btrfs_extent_item);
3013 flags = btrfs_extent_flags(l, extent);
3014 generation = btrfs_extent_generation(l, extent);
3015
3016 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3017 (key.objectid < logic_start ||
3018 key.objectid + bytes >
3019 logic_start + map->stripe_len)) {
3020 btrfs_err(fs_info,
3021 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3022 key.objectid, logic_start);
3023 spin_lock(&sctx->stat_lock);
3024 sctx->stat.uncorrectable_errors++;
3025 spin_unlock(&sctx->stat_lock);
3026 goto next;
3027 }
3028again:
3029 extent_logical = key.objectid;
3030 ASSERT(bytes <= U32_MAX);
3031 extent_len = bytes;
3032
3033 if (extent_logical < logic_start) {
3034 extent_len -= logic_start - extent_logical;
3035 extent_logical = logic_start;
3036 }
3037
3038 if (extent_logical + extent_len >
3039 logic_start + map->stripe_len)
3040 extent_len = logic_start + map->stripe_len -
3041 extent_logical;
3042
3043 scrub_parity_mark_sectors_data(sparity, extent_logical,
3044 extent_len);
3045
3046 mapped_length = extent_len;
3047 bbio = NULL;
3048 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
3049 extent_logical, &mapped_length, &bbio,
3050 0);
3051 if (!ret) {
3052 if (!bbio || mapped_length < extent_len)
3053 ret = -EIO;
3054 }
3055 if (ret) {
3056 btrfs_put_bbio(bbio);
3057 goto out;
3058 }
3059 extent_physical = bbio->stripes[0].physical;
3060 extent_mirror_num = bbio->mirror_num;
3061 extent_dev = bbio->stripes[0].dev;
3062 btrfs_put_bbio(bbio);
3063
3064 ret = btrfs_lookup_csums_range(csum_root,
3065 extent_logical,
3066 extent_logical + extent_len - 1,
3067 &sctx->csum_list, 1);
3068 if (ret)
3069 goto out;
3070
3071 ret = scrub_extent_for_parity(sparity, extent_logical,
3072 extent_len,
3073 extent_physical,
3074 extent_dev, flags,
3075 generation,
3076 extent_mirror_num);
3077
3078 scrub_free_csums(sctx);
3079
3080 if (ret)
3081 goto out;
3082
3083 if (extent_logical + extent_len <
3084 key.objectid + bytes) {
3085 logic_start += map->stripe_len;
3086
3087 if (logic_start >= logic_end) {
3088 stop_loop = 1;
3089 break;
3090 }
3091
3092 if (logic_start < key.objectid + bytes) {
3093 cond_resched();
3094 goto again;
3095 }
3096 }
3097next:
3098 path->slots[0]++;
3099 }
3100
3101 btrfs_release_path(path);
3102
3103 if (stop_loop)
3104 break;
3105
3106 logic_start += map->stripe_len;
3107 }
3108out:
3109 if (ret < 0) {
3110 ASSERT(logic_end - logic_start <= U32_MAX);
3111 scrub_parity_mark_sectors_error(sparity, logic_start,
3112 logic_end - logic_start);
3113 }
3114 scrub_parity_put(sparity);
3115 scrub_submit(sctx);
3116 mutex_lock(&sctx->wr_lock);
3117 scrub_wr_submit(sctx);
3118 mutex_unlock(&sctx->wr_lock);
3119
3120 btrfs_release_path(path);
3121 return ret < 0 ? ret : 0;
3122}
3123
3124static void sync_replace_for_zoned(struct scrub_ctx *sctx)
3125{
3126 if (!btrfs_is_zoned(sctx->fs_info))
3127 return;
3128
3129 sctx->flush_all_writes = true;
3130 scrub_submit(sctx);
3131 mutex_lock(&sctx->wr_lock);
3132 scrub_wr_submit(sctx);
3133 mutex_unlock(&sctx->wr_lock);
3134
3135 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3136}
3137
3138static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
3139 u64 physical, u64 physical_end)
3140{
3141 struct btrfs_fs_info *fs_info = sctx->fs_info;
3142 int ret = 0;
3143
3144 if (!btrfs_is_zoned(fs_info))
3145 return 0;
3146
3147 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3148
3149 mutex_lock(&sctx->wr_lock);
3150 if (sctx->write_pointer < physical_end) {
3151 ret = btrfs_sync_zone_write_pointer(sctx->wr_tgtdev, logical,
3152 physical,
3153 sctx->write_pointer);
3154 if (ret)
3155 btrfs_err(fs_info,
3156 "zoned: failed to recover write pointer");
3157 }
3158 mutex_unlock(&sctx->wr_lock);
3159 btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical);
3160
3161 return ret;
3162}
3163
3164static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3165 struct map_lookup *map,
3166 struct btrfs_device *scrub_dev,
3167 int num, u64 base, u64 length,
3168 struct btrfs_block_group *cache)
3169{
3170 struct btrfs_path *path, *ppath;
3171 struct btrfs_fs_info *fs_info = sctx->fs_info;
3172 struct btrfs_root *root = fs_info->extent_root;
3173 struct btrfs_root *csum_root = fs_info->csum_root;
3174 struct btrfs_extent_item *extent;
3175 struct blk_plug plug;
3176 u64 flags;
3177 int ret;
3178 int slot;
3179 u64 nstripes;
3180 struct extent_buffer *l;
3181 u64 physical;
3182 u64 logical;
3183 u64 logic_end;
3184 u64 physical_end;
3185 u64 generation;
3186 int mirror_num;
3187 struct reada_control *reada1;
3188 struct reada_control *reada2;
3189 struct btrfs_key key;
3190 struct btrfs_key key_end;
3191 u64 increment = map->stripe_len;
3192 u64 offset;
3193 u64 extent_logical;
3194 u64 extent_physical;
3195
3196
3197
3198
3199 u32 extent_len;
3200 u64 stripe_logical;
3201 u64 stripe_end;
3202 struct btrfs_device *extent_dev;
3203 int extent_mirror_num;
3204 int stop_loop = 0;
3205
3206 physical = map->stripes[num].physical;
3207 offset = 0;
3208 nstripes = div64_u64(length, map->stripe_len);
3209 mirror_num = 1;
3210 increment = map->stripe_len;
3211 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3212 offset = map->stripe_len * num;
3213 increment = map->stripe_len * map->num_stripes;
3214 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3215 int factor = map->num_stripes / map->sub_stripes;
3216 offset = map->stripe_len * (num / map->sub_stripes);
3217 increment = map->stripe_len * factor;
3218 mirror_num = num % map->sub_stripes + 1;
3219 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3220 mirror_num = num % map->num_stripes + 1;
3221 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3222 mirror_num = num % map->num_stripes + 1;
3223 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3224 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3225 increment = map->stripe_len * nr_data_stripes(map);
3226 }
3227
3228 path = btrfs_alloc_path();
3229 if (!path)
3230 return -ENOMEM;
3231
3232 ppath = btrfs_alloc_path();
3233 if (!ppath) {
3234 btrfs_free_path(path);
3235 return -ENOMEM;
3236 }
3237
3238
3239
3240
3241
3242
3243 path->search_commit_root = 1;
3244 path->skip_locking = 1;
3245
3246 ppath->search_commit_root = 1;
3247 ppath->skip_locking = 1;
3248
3249
3250
3251
3252
3253 logical = base + offset;
3254 physical_end = physical + nstripes * map->stripe_len;
3255 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3256 get_raid56_logic_offset(physical_end, num,
3257 map, &logic_end, NULL);
3258 logic_end += base;
3259 } else {
3260 logic_end = logical + increment * nstripes;
3261 }
3262 wait_event(sctx->list_wait,
3263 atomic_read(&sctx->bios_in_flight) == 0);
3264 scrub_blocked_if_needed(fs_info);
3265
3266
3267 key.objectid = logical;
3268 key.type = BTRFS_EXTENT_ITEM_KEY;
3269 key.offset = (u64)0;
3270 key_end.objectid = logic_end;
3271 key_end.type = BTRFS_METADATA_ITEM_KEY;
3272 key_end.offset = (u64)-1;
3273 reada1 = btrfs_reada_add(root, &key, &key_end);
3274
3275 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3276 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3277 key.type = BTRFS_EXTENT_CSUM_KEY;
3278 key.offset = logical;
3279 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3280 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3281 key_end.offset = logic_end;
3282 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3283 } else {
3284 reada2 = NULL;
3285 }
3286
3287 if (!IS_ERR(reada1))
3288 btrfs_reada_wait(reada1);
3289 if (!IS_ERR_OR_NULL(reada2))
3290 btrfs_reada_wait(reada2);
3291
3292
3293
3294
3295
3296
3297 blk_start_plug(&plug);
3298
3299 if (sctx->is_dev_replace &&
3300 btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) {
3301 mutex_lock(&sctx->wr_lock);
3302 sctx->write_pointer = physical;
3303 mutex_unlock(&sctx->wr_lock);
3304 sctx->flush_all_writes = true;
3305 }
3306
3307
3308
3309
3310 ret = 0;
3311 while (physical < physical_end) {
3312
3313
3314
3315 if (atomic_read(&fs_info->scrub_cancel_req) ||
3316 atomic_read(&sctx->cancel_req)) {
3317 ret = -ECANCELED;
3318 goto out;
3319 }
3320
3321
3322
3323 if (atomic_read(&fs_info->scrub_pause_req)) {
3324
3325 sctx->flush_all_writes = true;
3326 scrub_submit(sctx);
3327 mutex_lock(&sctx->wr_lock);
3328 scrub_wr_submit(sctx);
3329 mutex_unlock(&sctx->wr_lock);
3330 wait_event(sctx->list_wait,
3331 atomic_read(&sctx->bios_in_flight) == 0);
3332 sctx->flush_all_writes = false;
3333 scrub_blocked_if_needed(fs_info);
3334 }
3335
3336 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3337 ret = get_raid56_logic_offset(physical, num, map,
3338 &logical,
3339 &stripe_logical);
3340 logical += base;
3341 if (ret) {
3342
3343 stripe_logical += base;
3344 stripe_end = stripe_logical + increment;
3345 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3346 ppath, stripe_logical,
3347 stripe_end);
3348 if (ret)
3349 goto out;
3350 goto skip;
3351 }
3352 }
3353
3354 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3355 key.type = BTRFS_METADATA_ITEM_KEY;
3356 else
3357 key.type = BTRFS_EXTENT_ITEM_KEY;
3358 key.objectid = logical;
3359 key.offset = (u64)-1;
3360
3361 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3362 if (ret < 0)
3363 goto out;
3364
3365 if (ret > 0) {
3366 ret = btrfs_previous_extent_item(root, path, 0);
3367 if (ret < 0)
3368 goto out;
3369 if (ret > 0) {
3370
3371
3372 btrfs_release_path(path);
3373 ret = btrfs_search_slot(NULL, root, &key,
3374 path, 0, 0);
3375 if (ret < 0)
3376 goto out;
3377 }
3378 }
3379
3380 stop_loop = 0;
3381 while (1) {
3382 u64 bytes;
3383
3384 l = path->nodes[0];
3385 slot = path->slots[0];
3386 if (slot >= btrfs_header_nritems(l)) {
3387 ret = btrfs_next_leaf(root, path);
3388 if (ret == 0)
3389 continue;
3390 if (ret < 0)
3391 goto out;
3392
3393 stop_loop = 1;
3394 break;
3395 }
3396 btrfs_item_key_to_cpu(l, &key, slot);
3397
3398 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3399 key.type != BTRFS_METADATA_ITEM_KEY)
3400 goto next;
3401
3402 if (key.type == BTRFS_METADATA_ITEM_KEY)
3403 bytes = fs_info->nodesize;
3404 else
3405 bytes = key.offset;
3406
3407 if (key.objectid + bytes <= logical)
3408 goto next;
3409
3410 if (key.objectid >= logical + map->stripe_len) {
3411
3412 if (key.objectid >= logic_end)
3413 stop_loop = 1;
3414 break;
3415 }
3416
3417
3418
3419
3420
3421
3422
3423 spin_lock(&cache->lock);
3424 if (cache->removed) {
3425 spin_unlock(&cache->lock);
3426 ret = 0;
3427 goto out;
3428 }
3429 spin_unlock(&cache->lock);
3430
3431 extent = btrfs_item_ptr(l, slot,
3432 struct btrfs_extent_item);
3433 flags = btrfs_extent_flags(l, extent);
3434 generation = btrfs_extent_generation(l, extent);
3435
3436 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3437 (key.objectid < logical ||
3438 key.objectid + bytes >
3439 logical + map->stripe_len)) {
3440 btrfs_err(fs_info,
3441 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3442 key.objectid, logical);
3443 spin_lock(&sctx->stat_lock);
3444 sctx->stat.uncorrectable_errors++;
3445 spin_unlock(&sctx->stat_lock);
3446 goto next;
3447 }
3448
3449again:
3450 extent_logical = key.objectid;
3451 ASSERT(bytes <= U32_MAX);
3452 extent_len = bytes;
3453
3454
3455
3456
3457 if (extent_logical < logical) {
3458 extent_len -= logical - extent_logical;
3459 extent_logical = logical;
3460 }
3461 if (extent_logical + extent_len >
3462 logical + map->stripe_len) {
3463 extent_len = logical + map->stripe_len -
3464 extent_logical;
3465 }
3466
3467 extent_physical = extent_logical - logical + physical;
3468 extent_dev = scrub_dev;
3469 extent_mirror_num = mirror_num;
3470 if (sctx->is_dev_replace)
3471 scrub_remap_extent(fs_info, extent_logical,
3472 extent_len, &extent_physical,
3473 &extent_dev,
3474 &extent_mirror_num);
3475
3476 if (flags & BTRFS_EXTENT_FLAG_DATA) {
3477 ret = btrfs_lookup_csums_range(csum_root,
3478 extent_logical,
3479 extent_logical + extent_len - 1,
3480 &sctx->csum_list, 1);
3481 if (ret)
3482 goto out;
3483 }
3484
3485 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3486 extent_physical, extent_dev, flags,
3487 generation, extent_mirror_num,
3488 extent_logical - logical + physical);
3489
3490 scrub_free_csums(sctx);
3491
3492 if (ret)
3493 goto out;
3494
3495 if (sctx->is_dev_replace)
3496 sync_replace_for_zoned(sctx);
3497
3498 if (extent_logical + extent_len <
3499 key.objectid + bytes) {
3500 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3501
3502
3503
3504
3505loop:
3506 physical += map->stripe_len;
3507 ret = get_raid56_logic_offset(physical,
3508 num, map, &logical,
3509 &stripe_logical);
3510 logical += base;
3511
3512 if (ret && physical < physical_end) {
3513 stripe_logical += base;
3514 stripe_end = stripe_logical +
3515 increment;
3516 ret = scrub_raid56_parity(sctx,
3517 map, scrub_dev, ppath,
3518 stripe_logical,
3519 stripe_end);
3520 if (ret)
3521 goto out;
3522 goto loop;
3523 }
3524 } else {
3525 physical += map->stripe_len;
3526 logical += increment;
3527 }
3528 if (logical < key.objectid + bytes) {
3529 cond_resched();
3530 goto again;
3531 }
3532
3533 if (physical >= physical_end) {
3534 stop_loop = 1;
3535 break;
3536 }
3537 }
3538next:
3539 path->slots[0]++;
3540 }
3541 btrfs_release_path(path);
3542skip:
3543 logical += increment;
3544 physical += map->stripe_len;
3545 spin_lock(&sctx->stat_lock);
3546 if (stop_loop)
3547 sctx->stat.last_physical = map->stripes[num].physical +
3548 length;
3549 else
3550 sctx->stat.last_physical = physical;
3551 spin_unlock(&sctx->stat_lock);
3552 if (stop_loop)
3553 break;
3554 }
3555out:
3556
3557 scrub_submit(sctx);
3558 mutex_lock(&sctx->wr_lock);
3559 scrub_wr_submit(sctx);
3560 mutex_unlock(&sctx->wr_lock);
3561
3562 blk_finish_plug(&plug);
3563 btrfs_free_path(path);
3564 btrfs_free_path(ppath);
3565
3566 if (sctx->is_dev_replace && ret >= 0) {
3567 int ret2;
3568
3569 ret2 = sync_write_pointer_for_zoned(sctx, base + offset,
3570 map->stripes[num].physical,
3571 physical_end);
3572 if (ret2)
3573 ret = ret2;
3574 }
3575
3576 return ret < 0 ? ret : 0;
3577}
3578
3579static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3580 struct btrfs_device *scrub_dev,
3581 u64 chunk_offset, u64 length,
3582 u64 dev_offset,
3583 struct btrfs_block_group *cache)
3584{
3585 struct btrfs_fs_info *fs_info = sctx->fs_info;
3586 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3587 struct map_lookup *map;
3588 struct extent_map *em;
3589 int i;
3590 int ret = 0;
3591
3592 read_lock(&map_tree->lock);
3593 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3594 read_unlock(&map_tree->lock);
3595
3596 if (!em) {
3597
3598
3599
3600
3601 spin_lock(&cache->lock);
3602 if (!cache->removed)
3603 ret = -EINVAL;
3604 spin_unlock(&cache->lock);
3605
3606 return ret;
3607 }
3608
3609 map = em->map_lookup;
3610 if (em->start != chunk_offset)
3611 goto out;
3612
3613 if (em->len < length)
3614 goto out;
3615
3616 for (i = 0; i < map->num_stripes; ++i) {
3617 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3618 map->stripes[i].physical == dev_offset) {
3619 ret = scrub_stripe(sctx, map, scrub_dev, i,
3620 chunk_offset, length, cache);
3621 if (ret)
3622 goto out;
3623 }
3624 }
3625out:
3626 free_extent_map(em);
3627
3628 return ret;
3629}
3630
3631static int finish_extent_writes_for_zoned(struct btrfs_root *root,
3632 struct btrfs_block_group *cache)
3633{
3634 struct btrfs_fs_info *fs_info = cache->fs_info;
3635 struct btrfs_trans_handle *trans;
3636
3637 if (!btrfs_is_zoned(fs_info))
3638 return 0;
3639
3640 btrfs_wait_block_group_reservations(cache);
3641 btrfs_wait_nocow_writers(cache);
3642 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, cache->length);
3643
3644 trans = btrfs_join_transaction(root);
3645 if (IS_ERR(trans))
3646 return PTR_ERR(trans);
3647 return btrfs_commit_transaction(trans);
3648}
3649
3650static noinline_for_stack
3651int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3652 struct btrfs_device *scrub_dev, u64 start, u64 end)
3653{
3654 struct btrfs_dev_extent *dev_extent = NULL;
3655 struct btrfs_path *path;
3656 struct btrfs_fs_info *fs_info = sctx->fs_info;
3657 struct btrfs_root *root = fs_info->dev_root;
3658 u64 length;
3659 u64 chunk_offset;
3660 int ret = 0;
3661 int ro_set;
3662 int slot;
3663 struct extent_buffer *l;
3664 struct btrfs_key key;
3665 struct btrfs_key found_key;
3666 struct btrfs_block_group *cache;
3667 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3668
3669 path = btrfs_alloc_path();
3670 if (!path)
3671 return -ENOMEM;
3672
3673 path->reada = READA_FORWARD;
3674 path->search_commit_root = 1;
3675 path->skip_locking = 1;
3676
3677 key.objectid = scrub_dev->devid;
3678 key.offset = 0ull;
3679 key.type = BTRFS_DEV_EXTENT_KEY;
3680
3681 while (1) {
3682 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3683 if (ret < 0)
3684 break;
3685 if (ret > 0) {
3686 if (path->slots[0] >=
3687 btrfs_header_nritems(path->nodes[0])) {
3688 ret = btrfs_next_leaf(root, path);
3689 if (ret < 0)
3690 break;
3691 if (ret > 0) {
3692 ret = 0;
3693 break;
3694 }
3695 } else {
3696 ret = 0;
3697 }
3698 }
3699
3700 l = path->nodes[0];
3701 slot = path->slots[0];
3702
3703 btrfs_item_key_to_cpu(l, &found_key, slot);
3704
3705 if (found_key.objectid != scrub_dev->devid)
3706 break;
3707
3708 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3709 break;
3710
3711 if (found_key.offset >= end)
3712 break;
3713
3714 if (found_key.offset < key.offset)
3715 break;
3716
3717 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3718 length = btrfs_dev_extent_length(l, dev_extent);
3719
3720 if (found_key.offset + length <= start)
3721 goto skip;
3722
3723 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3724
3725
3726
3727
3728
3729 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3730
3731
3732
3733 if (!cache)
3734 goto skip;
3735
3736 if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
3737 spin_lock(&cache->lock);
3738 if (!cache->to_copy) {
3739 spin_unlock(&cache->lock);
3740 btrfs_put_block_group(cache);
3741 goto skip;
3742 }
3743 spin_unlock(&cache->lock);
3744 }
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754 spin_lock(&cache->lock);
3755 if (cache->removed) {
3756 spin_unlock(&cache->lock);
3757 btrfs_put_block_group(cache);
3758 goto skip;
3759 }
3760 btrfs_freeze_block_group(cache);
3761 spin_unlock(&cache->lock);
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771 scrub_pause_on(fs_info);
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803 ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
3804 if (!ret && sctx->is_dev_replace) {
3805 ret = finish_extent_writes_for_zoned(root, cache);
3806 if (ret) {
3807 btrfs_dec_block_group_ro(cache);
3808 scrub_pause_off(fs_info);
3809 btrfs_put_block_group(cache);
3810 break;
3811 }
3812 }
3813
3814 if (ret == 0) {
3815 ro_set = 1;
3816 } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
3817
3818
3819
3820
3821
3822
3823
3824 ro_set = 0;
3825 } else if (ret == -ETXTBSY) {
3826 btrfs_warn(fs_info,
3827 "skipping scrub of block group %llu due to active swapfile",
3828 cache->start);
3829 scrub_pause_off(fs_info);
3830 ret = 0;
3831 goto skip_unfreeze;
3832 } else {
3833 btrfs_warn(fs_info,
3834 "failed setting block group ro: %d", ret);
3835 btrfs_unfreeze_block_group(cache);
3836 btrfs_put_block_group(cache);
3837 scrub_pause_off(fs_info);
3838 break;
3839 }
3840
3841
3842
3843
3844
3845
3846 if (sctx->is_dev_replace) {
3847 btrfs_wait_nocow_writers(cache);
3848 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
3849 cache->length);
3850 }
3851
3852 scrub_pause_off(fs_info);
3853 down_write(&dev_replace->rwsem);
3854 dev_replace->cursor_right = found_key.offset + length;
3855 dev_replace->cursor_left = found_key.offset;
3856 dev_replace->item_needs_writeback = 1;
3857 up_write(&dev_replace->rwsem);
3858
3859 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3860 found_key.offset, cache);
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872 sctx->flush_all_writes = true;
3873 scrub_submit(sctx);
3874 mutex_lock(&sctx->wr_lock);
3875 scrub_wr_submit(sctx);
3876 mutex_unlock(&sctx->wr_lock);
3877
3878 wait_event(sctx->list_wait,
3879 atomic_read(&sctx->bios_in_flight) == 0);
3880
3881 scrub_pause_on(fs_info);
3882
3883
3884
3885
3886
3887
3888 wait_event(sctx->list_wait,
3889 atomic_read(&sctx->workers_pending) == 0);
3890 sctx->flush_all_writes = false;
3891
3892 scrub_pause_off(fs_info);
3893
3894 if (sctx->is_dev_replace &&
3895 !btrfs_finish_block_group_to_copy(dev_replace->srcdev,
3896 cache, found_key.offset))
3897 ro_set = 0;
3898
3899 down_write(&dev_replace->rwsem);
3900 dev_replace->cursor_left = dev_replace->cursor_right;
3901 dev_replace->item_needs_writeback = 1;
3902 up_write(&dev_replace->rwsem);
3903
3904 if (ro_set)
3905 btrfs_dec_block_group_ro(cache);
3906
3907
3908
3909
3910
3911
3912
3913
3914 spin_lock(&cache->lock);
3915 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3916 cache->used == 0) {
3917 spin_unlock(&cache->lock);
3918 if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
3919 btrfs_discard_queue_work(&fs_info->discard_ctl,
3920 cache);
3921 else
3922 btrfs_mark_bg_unused(cache);
3923 } else {
3924 spin_unlock(&cache->lock);
3925 }
3926skip_unfreeze:
3927 btrfs_unfreeze_block_group(cache);
3928 btrfs_put_block_group(cache);
3929 if (ret)
3930 break;
3931 if (sctx->is_dev_replace &&
3932 atomic64_read(&dev_replace->num_write_errors) > 0) {
3933 ret = -EIO;
3934 break;
3935 }
3936 if (sctx->stat.malloc_errors > 0) {
3937 ret = -ENOMEM;
3938 break;
3939 }
3940skip:
3941 key.offset = found_key.offset + length;
3942 btrfs_release_path(path);
3943 }
3944
3945 btrfs_free_path(path);
3946
3947 return ret;
3948}
3949
3950static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3951 struct btrfs_device *scrub_dev)
3952{
3953 int i;
3954 u64 bytenr;
3955 u64 gen;
3956 int ret;
3957 struct btrfs_fs_info *fs_info = sctx->fs_info;
3958
3959 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3960 return -EROFS;
3961
3962
3963 if (scrub_dev->fs_devices != fs_info->fs_devices)
3964 gen = scrub_dev->generation;
3965 else
3966 gen = fs_info->last_trans_committed;
3967
3968 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3969 bytenr = btrfs_sb_offset(i);
3970 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3971 scrub_dev->commit_total_bytes)
3972 break;
3973 if (!btrfs_check_super_location(scrub_dev, bytenr))
3974 continue;
3975
3976 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3977 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3978 NULL, bytenr);
3979 if (ret)
3980 return ret;
3981 }
3982 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3983
3984 return 0;
3985}
3986
3987static void scrub_workers_put(struct btrfs_fs_info *fs_info)
3988{
3989 if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
3990 &fs_info->scrub_lock)) {
3991 struct btrfs_workqueue *scrub_workers = NULL;
3992 struct btrfs_workqueue *scrub_wr_comp = NULL;
3993 struct btrfs_workqueue *scrub_parity = NULL;
3994
3995 scrub_workers = fs_info->scrub_workers;
3996 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3997 scrub_parity = fs_info->scrub_parity_workers;
3998
3999 fs_info->scrub_workers = NULL;
4000 fs_info->scrub_wr_completion_workers = NULL;
4001 fs_info->scrub_parity_workers = NULL;
4002 mutex_unlock(&fs_info->scrub_lock);
4003
4004 btrfs_destroy_workqueue(scrub_workers);
4005 btrfs_destroy_workqueue(scrub_wr_comp);
4006 btrfs_destroy_workqueue(scrub_parity);
4007 }
4008}
4009
4010
4011
4012
4013static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
4014 int is_dev_replace)
4015{
4016 struct btrfs_workqueue *scrub_workers = NULL;
4017 struct btrfs_workqueue *scrub_wr_comp = NULL;
4018 struct btrfs_workqueue *scrub_parity = NULL;
4019 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
4020 int max_active = fs_info->thread_pool_size;
4021 int ret = -ENOMEM;
4022
4023 if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
4024 return 0;
4025
4026 scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags,
4027 is_dev_replace ? 1 : max_active, 4);
4028 if (!scrub_workers)
4029 goto fail_scrub_workers;
4030
4031 scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
4032 max_active, 2);
4033 if (!scrub_wr_comp)
4034 goto fail_scrub_wr_completion_workers;
4035
4036 scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
4037 max_active, 2);
4038 if (!scrub_parity)
4039 goto fail_scrub_parity_workers;
4040
4041 mutex_lock(&fs_info->scrub_lock);
4042 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
4043 ASSERT(fs_info->scrub_workers == NULL &&
4044 fs_info->scrub_wr_completion_workers == NULL &&
4045 fs_info->scrub_parity_workers == NULL);
4046 fs_info->scrub_workers = scrub_workers;
4047 fs_info->scrub_wr_completion_workers = scrub_wr_comp;
4048 fs_info->scrub_parity_workers = scrub_parity;
4049 refcount_set(&fs_info->scrub_workers_refcnt, 1);
4050 mutex_unlock(&fs_info->scrub_lock);
4051 return 0;
4052 }
4053
4054 refcount_inc(&fs_info->scrub_workers_refcnt);
4055 mutex_unlock(&fs_info->scrub_lock);
4056
4057 ret = 0;
4058 btrfs_destroy_workqueue(scrub_parity);
4059fail_scrub_parity_workers:
4060 btrfs_destroy_workqueue(scrub_wr_comp);
4061fail_scrub_wr_completion_workers:
4062 btrfs_destroy_workqueue(scrub_workers);
4063fail_scrub_workers:
4064 return ret;
4065}
4066
4067int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
4068 u64 end, struct btrfs_scrub_progress *progress,
4069 int readonly, int is_dev_replace)
4070{
4071 struct scrub_ctx *sctx;
4072 int ret;
4073 struct btrfs_device *dev;
4074 unsigned int nofs_flag;
4075
4076 if (btrfs_fs_closing(fs_info))
4077 return -EAGAIN;
4078
4079 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
4080
4081
4082
4083
4084
4085 btrfs_err(fs_info,
4086 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
4087 fs_info->nodesize,
4088 BTRFS_STRIPE_LEN);
4089 return -EINVAL;
4090 }
4091
4092 if (fs_info->nodesize >
4093 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
4094 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
4095
4096
4097
4098
4099 btrfs_err(fs_info,
4100 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
4101 fs_info->nodesize,
4102 SCRUB_MAX_PAGES_PER_BLOCK,
4103 fs_info->sectorsize,
4104 SCRUB_MAX_PAGES_PER_BLOCK);
4105 return -EINVAL;
4106 }
4107
4108
4109 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
4110 if (IS_ERR(sctx))
4111 return PTR_ERR(sctx);
4112
4113 ret = scrub_workers_get(fs_info, is_dev_replace);
4114 if (ret)
4115 goto out_free_ctx;
4116
4117 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4118 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
4119 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
4120 !is_dev_replace)) {
4121 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4122 ret = -ENODEV;
4123 goto out;
4124 }
4125
4126 if (!is_dev_replace && !readonly &&
4127 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
4128 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4129 btrfs_err_in_rcu(fs_info,
4130 "scrub on devid %llu: filesystem on %s is not writable",
4131 devid, rcu_str_deref(dev->name));
4132 ret = -EROFS;
4133 goto out;
4134 }
4135
4136 mutex_lock(&fs_info->scrub_lock);
4137 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
4138 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
4139 mutex_unlock(&fs_info->scrub_lock);
4140 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4141 ret = -EIO;
4142 goto out;
4143 }
4144
4145 down_read(&fs_info->dev_replace.rwsem);
4146 if (dev->scrub_ctx ||
4147 (!is_dev_replace &&
4148 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
4149 up_read(&fs_info->dev_replace.rwsem);
4150 mutex_unlock(&fs_info->scrub_lock);
4151 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4152 ret = -EINPROGRESS;
4153 goto out;
4154 }
4155 up_read(&fs_info->dev_replace.rwsem);
4156
4157 sctx->readonly = readonly;
4158 dev->scrub_ctx = sctx;
4159 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4160
4161
4162
4163
4164
4165 __scrub_blocked_if_needed(fs_info);
4166 atomic_inc(&fs_info->scrubs_running);
4167 mutex_unlock(&fs_info->scrub_lock);
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178 nofs_flag = memalloc_nofs_save();
4179 if (!is_dev_replace) {
4180 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
4181
4182
4183
4184
4185 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4186 ret = scrub_supers(sctx, dev);
4187 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4188 }
4189
4190 if (!ret)
4191 ret = scrub_enumerate_chunks(sctx, dev, start, end);
4192 memalloc_nofs_restore(nofs_flag);
4193
4194 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
4195 atomic_dec(&fs_info->scrubs_running);
4196 wake_up(&fs_info->scrub_pause_wait);
4197
4198 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
4199
4200 if (progress)
4201 memcpy(progress, &sctx->stat, sizeof(*progress));
4202
4203 if (!is_dev_replace)
4204 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
4205 ret ? "not finished" : "finished", devid, ret);
4206
4207 mutex_lock(&fs_info->scrub_lock);
4208 dev->scrub_ctx = NULL;
4209 mutex_unlock(&fs_info->scrub_lock);
4210
4211 scrub_workers_put(fs_info);
4212 scrub_put_ctx(sctx);
4213
4214 return ret;
4215out:
4216 scrub_workers_put(fs_info);
4217out_free_ctx:
4218 scrub_free_ctx(sctx);
4219
4220 return ret;
4221}
4222
4223void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
4224{
4225 mutex_lock(&fs_info->scrub_lock);
4226 atomic_inc(&fs_info->scrub_pause_req);
4227 while (atomic_read(&fs_info->scrubs_paused) !=
4228 atomic_read(&fs_info->scrubs_running)) {
4229 mutex_unlock(&fs_info->scrub_lock);
4230 wait_event(fs_info->scrub_pause_wait,
4231 atomic_read(&fs_info->scrubs_paused) ==
4232 atomic_read(&fs_info->scrubs_running));
4233 mutex_lock(&fs_info->scrub_lock);
4234 }
4235 mutex_unlock(&fs_info->scrub_lock);
4236}
4237
4238void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
4239{
4240 atomic_dec(&fs_info->scrub_pause_req);
4241 wake_up(&fs_info->scrub_pause_wait);
4242}
4243
4244int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
4245{
4246 mutex_lock(&fs_info->scrub_lock);
4247 if (!atomic_read(&fs_info->scrubs_running)) {
4248 mutex_unlock(&fs_info->scrub_lock);
4249 return -ENOTCONN;
4250 }
4251
4252 atomic_inc(&fs_info->scrub_cancel_req);
4253 while (atomic_read(&fs_info->scrubs_running)) {
4254 mutex_unlock(&fs_info->scrub_lock);
4255 wait_event(fs_info->scrub_pause_wait,
4256 atomic_read(&fs_info->scrubs_running) == 0);
4257 mutex_lock(&fs_info->scrub_lock);
4258 }
4259 atomic_dec(&fs_info->scrub_cancel_req);
4260 mutex_unlock(&fs_info->scrub_lock);
4261
4262 return 0;
4263}
4264
4265int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4266{
4267 struct btrfs_fs_info *fs_info = dev->fs_info;
4268 struct scrub_ctx *sctx;
4269
4270 mutex_lock(&fs_info->scrub_lock);
4271 sctx = dev->scrub_ctx;
4272 if (!sctx) {
4273 mutex_unlock(&fs_info->scrub_lock);
4274 return -ENOTCONN;
4275 }
4276 atomic_inc(&sctx->cancel_req);
4277 while (dev->scrub_ctx) {
4278 mutex_unlock(&fs_info->scrub_lock);
4279 wait_event(fs_info->scrub_pause_wait,
4280 dev->scrub_ctx == NULL);
4281 mutex_lock(&fs_info->scrub_lock);
4282 }
4283 mutex_unlock(&fs_info->scrub_lock);
4284
4285 return 0;
4286}
4287
4288int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4289 struct btrfs_scrub_progress *progress)
4290{
4291 struct btrfs_device *dev;
4292 struct scrub_ctx *sctx = NULL;
4293
4294 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4295 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
4296 if (dev)
4297 sctx = dev->scrub_ctx;
4298 if (sctx)
4299 memcpy(progress, &sctx->stat, sizeof(*progress));
4300 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4301
4302 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4303}
4304
4305static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4306 u64 extent_logical, u32 extent_len,
4307 u64 *extent_physical,
4308 struct btrfs_device **extent_dev,
4309 int *extent_mirror_num)
4310{
4311 u64 mapped_length;
4312 struct btrfs_bio *bbio = NULL;
4313 int ret;
4314
4315 mapped_length = extent_len;
4316 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4317 &mapped_length, &bbio, 0);
4318 if (ret || !bbio || mapped_length < extent_len ||
4319 !bbio->stripes[0].dev->bdev) {
4320 btrfs_put_bbio(bbio);
4321 return;
4322 }
4323
4324 *extent_physical = bbio->stripes[0].physical;
4325 *extent_mirror_num = bbio->mirror_num;
4326 *extent_dev = bbio->stripes[0].dev;
4327 btrfs_put_bbio(bbio);
4328}
4329