1
2
3
4
5
6#include <linux/blkdev.h>
7#include <linux/ratelimit.h>
8#include <linux/sched/mm.h>
9#include <crypto/hash.h>
10#include "ctree.h"
11#include "discard.h"
12#include "volumes.h"
13#include "disk-io.h"
14#include "ordered-data.h"
15#include "transaction.h"
16#include "backref.h"
17#include "extent_io.h"
18#include "dev-replace.h"
19#include "check-integrity.h"
20#include "rcu-string.h"
21#include "raid56.h"
22#include "block-group.h"
23#include "zoned.h"
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38struct scrub_block;
39struct scrub_ctx;
40
41
42
43
44
45
46
47#define SCRUB_PAGES_PER_RD_BIO 32
48#define SCRUB_PAGES_PER_WR_BIO 32
49#define SCRUB_BIOS_PER_SCTX 64
50
51
52
53
54
55
56#define SCRUB_MAX_PAGES_PER_BLOCK 16
57
58struct scrub_recover {
59 refcount_t refs;
60 struct btrfs_io_context *bioc;
61 u64 map_length;
62};
63
64struct scrub_page {
65 struct scrub_block *sblock;
66 struct page *page;
67 struct btrfs_device *dev;
68 struct list_head list;
69 u64 flags;
70 u64 generation;
71 u64 logical;
72 u64 physical;
73 u64 physical_for_dev_replace;
74 atomic_t refs;
75 u8 mirror_num;
76 unsigned int have_csum:1;
77 unsigned int io_error:1;
78 u8 csum[BTRFS_CSUM_SIZE];
79
80 struct scrub_recover *recover;
81};
82
83struct scrub_bio {
84 int index;
85 struct scrub_ctx *sctx;
86 struct btrfs_device *dev;
87 struct bio *bio;
88 blk_status_t status;
89 u64 logical;
90 u64 physical;
91#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
92 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
93#else
94 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
95#endif
96 int page_count;
97 int next_free;
98 struct btrfs_work work;
99};
100
101struct scrub_block {
102 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
103 int page_count;
104 atomic_t outstanding_pages;
105 refcount_t refs;
106 struct scrub_ctx *sctx;
107 struct scrub_parity *sparity;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113
114
115
116 unsigned int data_corrected:1;
117 };
118 struct btrfs_work work;
119};
120
121
122struct scrub_parity {
123 struct scrub_ctx *sctx;
124
125 struct btrfs_device *scrub_dev;
126
127 u64 logic_start;
128
129 u64 logic_end;
130
131 int nsectors;
132
133 u32 stripe_len;
134
135 refcount_t refs;
136
137 struct list_head spages;
138
139
140 struct btrfs_work work;
141
142
143 unsigned long *dbitmap;
144
145
146
147
148
149 unsigned long *ebitmap;
150
151 unsigned long bitmap[];
152};
153
154struct scrub_ctx {
155 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
156 struct btrfs_fs_info *fs_info;
157 int first_free;
158 int curr;
159 atomic_t bios_in_flight;
160 atomic_t workers_pending;
161 spinlock_t list_lock;
162 wait_queue_head_t list_wait;
163 struct list_head csum_list;
164 atomic_t cancel_req;
165 int readonly;
166 int pages_per_rd_bio;
167
168
169 ktime_t throttle_deadline;
170 u64 throttle_sent;
171
172 int is_dev_replace;
173 u64 write_pointer;
174
175 struct scrub_bio *wr_curr_bio;
176 struct mutex wr_lock;
177 int pages_per_wr_bio;
178 struct btrfs_device *wr_tgtdev;
179 bool flush_all_writes;
180
181
182
183
184 struct btrfs_scrub_progress stat;
185 spinlock_t stat_lock;
186
187
188
189
190
191
192
193
194 refcount_t refs;
195};
196
197struct scrub_warning {
198 struct btrfs_path *path;
199 u64 extent_item_size;
200 const char *errstr;
201 u64 physical;
202 u64 logical;
203 struct btrfs_device *dev;
204};
205
206struct full_stripe_lock {
207 struct rb_node node;
208 u64 logical;
209 u64 refs;
210 struct mutex mutex;
211};
212
213static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
214 struct scrub_block *sblocks_for_recheck);
215static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
216 struct scrub_block *sblock,
217 int retry_failed_mirror);
218static void scrub_recheck_block_checksum(struct scrub_block *sblock);
219static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
220 struct scrub_block *sblock_good);
221static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
222 struct scrub_block *sblock_good,
223 int page_num, int force_write);
224static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
225static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
226 int page_num);
227static int scrub_checksum_data(struct scrub_block *sblock);
228static int scrub_checksum_tree_block(struct scrub_block *sblock);
229static int scrub_checksum_super(struct scrub_block *sblock);
230static void scrub_block_put(struct scrub_block *sblock);
231static void scrub_page_get(struct scrub_page *spage);
232static void scrub_page_put(struct scrub_page *spage);
233static void scrub_parity_get(struct scrub_parity *sparity);
234static void scrub_parity_put(struct scrub_parity *sparity);
235static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
236 u64 physical, struct btrfs_device *dev, u64 flags,
237 u64 gen, int mirror_num, u8 *csum,
238 u64 physical_for_dev_replace);
239static void scrub_bio_end_io(struct bio *bio);
240static void scrub_bio_end_io_worker(struct btrfs_work *work);
241static void scrub_block_complete(struct scrub_block *sblock);
242static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
243 u64 extent_logical, u32 extent_len,
244 u64 *extent_physical,
245 struct btrfs_device **extent_dev,
246 int *extent_mirror_num);
247static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
248 struct scrub_page *spage);
249static void scrub_wr_submit(struct scrub_ctx *sctx);
250static void scrub_wr_bio_end_io(struct bio *bio);
251static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
252static void scrub_put_ctx(struct scrub_ctx *sctx);
253
254static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
255{
256 return spage->recover &&
257 (spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
258}
259
260static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
261{
262 refcount_inc(&sctx->refs);
263 atomic_inc(&sctx->bios_in_flight);
264}
265
266static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
267{
268 atomic_dec(&sctx->bios_in_flight);
269 wake_up(&sctx->list_wait);
270 scrub_put_ctx(sctx);
271}
272
273static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
274{
275 while (atomic_read(&fs_info->scrub_pause_req)) {
276 mutex_unlock(&fs_info->scrub_lock);
277 wait_event(fs_info->scrub_pause_wait,
278 atomic_read(&fs_info->scrub_pause_req) == 0);
279 mutex_lock(&fs_info->scrub_lock);
280 }
281}
282
283static void scrub_pause_on(struct btrfs_fs_info *fs_info)
284{
285 atomic_inc(&fs_info->scrubs_paused);
286 wake_up(&fs_info->scrub_pause_wait);
287}
288
289static void scrub_pause_off(struct btrfs_fs_info *fs_info)
290{
291 mutex_lock(&fs_info->scrub_lock);
292 __scrub_blocked_if_needed(fs_info);
293 atomic_dec(&fs_info->scrubs_paused);
294 mutex_unlock(&fs_info->scrub_lock);
295
296 wake_up(&fs_info->scrub_pause_wait);
297}
298
299static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
300{
301 scrub_pause_on(fs_info);
302 scrub_pause_off(fs_info);
303}
304
305
306
307
308
309
310
311
312
313
314
315static struct full_stripe_lock *insert_full_stripe_lock(
316 struct btrfs_full_stripe_locks_tree *locks_root,
317 u64 fstripe_logical)
318{
319 struct rb_node **p;
320 struct rb_node *parent = NULL;
321 struct full_stripe_lock *entry;
322 struct full_stripe_lock *ret;
323
324 lockdep_assert_held(&locks_root->lock);
325
326 p = &locks_root->root.rb_node;
327 while (*p) {
328 parent = *p;
329 entry = rb_entry(parent, struct full_stripe_lock, node);
330 if (fstripe_logical < entry->logical) {
331 p = &(*p)->rb_left;
332 } else if (fstripe_logical > entry->logical) {
333 p = &(*p)->rb_right;
334 } else {
335 entry->refs++;
336 return entry;
337 }
338 }
339
340
341
342
343 ret = kmalloc(sizeof(*ret), GFP_KERNEL);
344 if (!ret)
345 return ERR_PTR(-ENOMEM);
346 ret->logical = fstripe_logical;
347 ret->refs = 1;
348 mutex_init(&ret->mutex);
349
350 rb_link_node(&ret->node, parent, p);
351 rb_insert_color(&ret->node, &locks_root->root);
352 return ret;
353}
354
355
356
357
358
359
360
361static struct full_stripe_lock *search_full_stripe_lock(
362 struct btrfs_full_stripe_locks_tree *locks_root,
363 u64 fstripe_logical)
364{
365 struct rb_node *node;
366 struct full_stripe_lock *entry;
367
368 lockdep_assert_held(&locks_root->lock);
369
370 node = locks_root->root.rb_node;
371 while (node) {
372 entry = rb_entry(node, struct full_stripe_lock, node);
373 if (fstripe_logical < entry->logical)
374 node = node->rb_left;
375 else if (fstripe_logical > entry->logical)
376 node = node->rb_right;
377 else
378 return entry;
379 }
380 return NULL;
381}
382
383
384
385
386
387
388static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
389{
390 u64 ret;
391
392
393
394
395
396 WARN_ON_ONCE(cache->full_stripe_len >= U32_MAX);
397
398
399
400
401
402 ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
403 cache->full_stripe_len + cache->start;
404 return ret;
405}
406
407
408
409
410
411
412
413
414
415
416
417
418static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
419 bool *locked_ret)
420{
421 struct btrfs_block_group *bg_cache;
422 struct btrfs_full_stripe_locks_tree *locks_root;
423 struct full_stripe_lock *existing;
424 u64 fstripe_start;
425 int ret = 0;
426
427 *locked_ret = false;
428 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
429 if (!bg_cache) {
430 ASSERT(0);
431 return -ENOENT;
432 }
433
434
435 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
436 goto out;
437 locks_root = &bg_cache->full_stripe_locks_root;
438
439 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
440
441
442 mutex_lock(&locks_root->lock);
443 existing = insert_full_stripe_lock(locks_root, fstripe_start);
444 mutex_unlock(&locks_root->lock);
445 if (IS_ERR(existing)) {
446 ret = PTR_ERR(existing);
447 goto out;
448 }
449 mutex_lock(&existing->mutex);
450 *locked_ret = true;
451out:
452 btrfs_put_block_group(bg_cache);
453 return ret;
454}
455
456
457
458
459
460
461
462
463
464
465static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
466 bool locked)
467{
468 struct btrfs_block_group *bg_cache;
469 struct btrfs_full_stripe_locks_tree *locks_root;
470 struct full_stripe_lock *fstripe_lock;
471 u64 fstripe_start;
472 bool freeit = false;
473 int ret = 0;
474
475
476 if (!locked)
477 return 0;
478
479 bg_cache = btrfs_lookup_block_group(fs_info, bytenr);
480 if (!bg_cache) {
481 ASSERT(0);
482 return -ENOENT;
483 }
484 if (!(bg_cache->flags & BTRFS_BLOCK_GROUP_RAID56_MASK))
485 goto out;
486
487 locks_root = &bg_cache->full_stripe_locks_root;
488 fstripe_start = get_full_stripe_logical(bg_cache, bytenr);
489
490 mutex_lock(&locks_root->lock);
491 fstripe_lock = search_full_stripe_lock(locks_root, fstripe_start);
492
493 if (!fstripe_lock) {
494 WARN_ON(1);
495 ret = -ENOENT;
496 mutex_unlock(&locks_root->lock);
497 goto out;
498 }
499
500 if (fstripe_lock->refs == 0) {
501 WARN_ON(1);
502 btrfs_warn(fs_info, "full stripe lock at %llu refcount underflow",
503 fstripe_lock->logical);
504 } else {
505 fstripe_lock->refs--;
506 }
507
508 if (fstripe_lock->refs == 0) {
509 rb_erase(&fstripe_lock->node, &locks_root->root);
510 freeit = true;
511 }
512 mutex_unlock(&locks_root->lock);
513
514 mutex_unlock(&fstripe_lock->mutex);
515 if (freeit)
516 kfree(fstripe_lock);
517out:
518 btrfs_put_block_group(bg_cache);
519 return ret;
520}
521
522static void scrub_free_csums(struct scrub_ctx *sctx)
523{
524 while (!list_empty(&sctx->csum_list)) {
525 struct btrfs_ordered_sum *sum;
526 sum = list_first_entry(&sctx->csum_list,
527 struct btrfs_ordered_sum, list);
528 list_del(&sum->list);
529 kfree(sum);
530 }
531}
532
533static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
534{
535 int i;
536
537 if (!sctx)
538 return;
539
540
541 if (sctx->curr != -1) {
542 struct scrub_bio *sbio = sctx->bios[sctx->curr];
543
544 for (i = 0; i < sbio->page_count; i++) {
545 WARN_ON(!sbio->pagev[i]->page);
546 scrub_block_put(sbio->pagev[i]->sblock);
547 }
548 bio_put(sbio->bio);
549 }
550
551 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
552 struct scrub_bio *sbio = sctx->bios[i];
553
554 if (!sbio)
555 break;
556 kfree(sbio);
557 }
558
559 kfree(sctx->wr_curr_bio);
560 scrub_free_csums(sctx);
561 kfree(sctx);
562}
563
564static void scrub_put_ctx(struct scrub_ctx *sctx)
565{
566 if (refcount_dec_and_test(&sctx->refs))
567 scrub_free_ctx(sctx);
568}
569
570static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
571 struct btrfs_fs_info *fs_info, int is_dev_replace)
572{
573 struct scrub_ctx *sctx;
574 int i;
575
576 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
577 if (!sctx)
578 goto nomem;
579 refcount_set(&sctx->refs, 1);
580 sctx->is_dev_replace = is_dev_replace;
581 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
582 sctx->curr = -1;
583 sctx->fs_info = fs_info;
584 INIT_LIST_HEAD(&sctx->csum_list);
585 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
586 struct scrub_bio *sbio;
587
588 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
589 if (!sbio)
590 goto nomem;
591 sctx->bios[i] = sbio;
592
593 sbio->index = i;
594 sbio->sctx = sctx;
595 sbio->page_count = 0;
596 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
597 NULL);
598
599 if (i != SCRUB_BIOS_PER_SCTX - 1)
600 sctx->bios[i]->next_free = i + 1;
601 else
602 sctx->bios[i]->next_free = -1;
603 }
604 sctx->first_free = 0;
605 atomic_set(&sctx->bios_in_flight, 0);
606 atomic_set(&sctx->workers_pending, 0);
607 atomic_set(&sctx->cancel_req, 0);
608
609 spin_lock_init(&sctx->list_lock);
610 spin_lock_init(&sctx->stat_lock);
611 init_waitqueue_head(&sctx->list_wait);
612 sctx->throttle_deadline = 0;
613
614 WARN_ON(sctx->wr_curr_bio != NULL);
615 mutex_init(&sctx->wr_lock);
616 sctx->wr_curr_bio = NULL;
617 if (is_dev_replace) {
618 WARN_ON(!fs_info->dev_replace.tgtdev);
619 sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
620 sctx->wr_tgtdev = fs_info->dev_replace.tgtdev;
621 sctx->flush_all_writes = false;
622 }
623
624 return sctx;
625
626nomem:
627 scrub_free_ctx(sctx);
628 return ERR_PTR(-ENOMEM);
629}
630
631static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
632 void *warn_ctx)
633{
634 u32 nlink;
635 int ret;
636 int i;
637 unsigned nofs_flag;
638 struct extent_buffer *eb;
639 struct btrfs_inode_item *inode_item;
640 struct scrub_warning *swarn = warn_ctx;
641 struct btrfs_fs_info *fs_info = swarn->dev->fs_info;
642 struct inode_fs_paths *ipath = NULL;
643 struct btrfs_root *local_root;
644 struct btrfs_key key;
645
646 local_root = btrfs_get_fs_root(fs_info, root, true);
647 if (IS_ERR(local_root)) {
648 ret = PTR_ERR(local_root);
649 goto err;
650 }
651
652
653
654
655 key.objectid = inum;
656 key.type = BTRFS_INODE_ITEM_KEY;
657 key.offset = 0;
658
659 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
660 if (ret) {
661 btrfs_put_root(local_root);
662 btrfs_release_path(swarn->path);
663 goto err;
664 }
665
666 eb = swarn->path->nodes[0];
667 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
668 struct btrfs_inode_item);
669 nlink = btrfs_inode_nlink(eb, inode_item);
670 btrfs_release_path(swarn->path);
671
672
673
674
675
676
677 nofs_flag = memalloc_nofs_save();
678 ipath = init_ipath(4096, local_root, swarn->path);
679 memalloc_nofs_restore(nofs_flag);
680 if (IS_ERR(ipath)) {
681 btrfs_put_root(local_root);
682 ret = PTR_ERR(ipath);
683 ipath = NULL;
684 goto err;
685 }
686 ret = paths_from_inode(inum, ipath);
687
688 if (ret < 0)
689 goto err;
690
691
692
693
694
695 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
696 btrfs_warn_in_rcu(fs_info,
697"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
698 swarn->errstr, swarn->logical,
699 rcu_str_deref(swarn->dev->name),
700 swarn->physical,
701 root, inum, offset,
702 fs_info->sectorsize, nlink,
703 (char *)(unsigned long)ipath->fspath->val[i]);
704
705 btrfs_put_root(local_root);
706 free_ipath(ipath);
707 return 0;
708
709err:
710 btrfs_warn_in_rcu(fs_info,
711 "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
712 swarn->errstr, swarn->logical,
713 rcu_str_deref(swarn->dev->name),
714 swarn->physical,
715 root, inum, offset, ret);
716
717 free_ipath(ipath);
718 return 0;
719}
720
721static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
722{
723 struct btrfs_device *dev;
724 struct btrfs_fs_info *fs_info;
725 struct btrfs_path *path;
726 struct btrfs_key found_key;
727 struct extent_buffer *eb;
728 struct btrfs_extent_item *ei;
729 struct scrub_warning swarn;
730 unsigned long ptr = 0;
731 u64 extent_item_pos;
732 u64 flags = 0;
733 u64 ref_root;
734 u32 item_size;
735 u8 ref_level = 0;
736 int ret;
737
738 WARN_ON(sblock->page_count < 1);
739 dev = sblock->pagev[0]->dev;
740 fs_info = sblock->sctx->fs_info;
741
742 path = btrfs_alloc_path();
743 if (!path)
744 return;
745
746 swarn.physical = sblock->pagev[0]->physical;
747 swarn.logical = sblock->pagev[0]->logical;
748 swarn.errstr = errstr;
749 swarn.dev = NULL;
750
751 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
752 &flags);
753 if (ret < 0)
754 goto out;
755
756 extent_item_pos = swarn.logical - found_key.objectid;
757 swarn.extent_item_size = found_key.offset;
758
759 eb = path->nodes[0];
760 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
761 item_size = btrfs_item_size_nr(eb, path->slots[0]);
762
763 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
764 do {
765 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
766 item_size, &ref_root,
767 &ref_level);
768 btrfs_warn_in_rcu(fs_info,
769"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
770 errstr, swarn.logical,
771 rcu_str_deref(dev->name),
772 swarn.physical,
773 ref_level ? "node" : "leaf",
774 ret < 0 ? -1 : ref_level,
775 ret < 0 ? -1 : ref_root);
776 } while (ret != 1);
777 btrfs_release_path(path);
778 } else {
779 btrfs_release_path(path);
780 swarn.path = path;
781 swarn.dev = dev;
782 iterate_extent_inodes(fs_info, found_key.objectid,
783 extent_item_pos, 1,
784 scrub_print_warning_inode, &swarn, false);
785 }
786
787out:
788 btrfs_free_path(path);
789}
790
791static inline void scrub_get_recover(struct scrub_recover *recover)
792{
793 refcount_inc(&recover->refs);
794}
795
796static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
797 struct scrub_recover *recover)
798{
799 if (refcount_dec_and_test(&recover->refs)) {
800 btrfs_bio_counter_dec(fs_info);
801 btrfs_put_bioc(recover->bioc);
802 kfree(recover);
803 }
804}
805
806
807
808
809
810
811
812
813
814static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
815{
816 struct scrub_ctx *sctx = sblock_to_check->sctx;
817 struct btrfs_device *dev;
818 struct btrfs_fs_info *fs_info;
819 u64 logical;
820 unsigned int failed_mirror_index;
821 unsigned int is_metadata;
822 unsigned int have_csum;
823 struct scrub_block *sblocks_for_recheck;
824 struct scrub_block *sblock_bad;
825 int ret;
826 int mirror_index;
827 int page_num;
828 int success;
829 bool full_stripe_locked;
830 unsigned int nofs_flag;
831 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
832 DEFAULT_RATELIMIT_BURST);
833
834 BUG_ON(sblock_to_check->page_count < 1);
835 fs_info = sctx->fs_info;
836 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
837
838
839
840
841
842 spin_lock(&sctx->stat_lock);
843 ++sctx->stat.super_errors;
844 spin_unlock(&sctx->stat_lock);
845 return 0;
846 }
847 logical = sblock_to_check->pagev[0]->logical;
848 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
849 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
850 is_metadata = !(sblock_to_check->pagev[0]->flags &
851 BTRFS_EXTENT_FLAG_DATA);
852 have_csum = sblock_to_check->pagev[0]->have_csum;
853 dev = sblock_to_check->pagev[0]->dev;
854
855 if (btrfs_is_zoned(fs_info) && !sctx->is_dev_replace)
856 return btrfs_repair_one_zone(fs_info, logical);
857
858
859
860
861
862
863
864
865
866
867 nofs_flag = memalloc_nofs_save();
868
869
870
871
872
873
874
875 ret = lock_full_stripe(fs_info, logical, &full_stripe_locked);
876 if (ret < 0) {
877 memalloc_nofs_restore(nofs_flag);
878 spin_lock(&sctx->stat_lock);
879 if (ret == -ENOMEM)
880 sctx->stat.malloc_errors++;
881 sctx->stat.read_errors++;
882 sctx->stat.uncorrectable_errors++;
883 spin_unlock(&sctx->stat_lock);
884 return ret;
885 }
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
917 sizeof(*sblocks_for_recheck), GFP_KERNEL);
918 if (!sblocks_for_recheck) {
919 spin_lock(&sctx->stat_lock);
920 sctx->stat.malloc_errors++;
921 sctx->stat.read_errors++;
922 sctx->stat.uncorrectable_errors++;
923 spin_unlock(&sctx->stat_lock);
924 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
925 goto out;
926 }
927
928
929 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
930 if (ret) {
931 spin_lock(&sctx->stat_lock);
932 sctx->stat.read_errors++;
933 sctx->stat.uncorrectable_errors++;
934 spin_unlock(&sctx->stat_lock);
935 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
936 goto out;
937 }
938 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
939 sblock_bad = sblocks_for_recheck + failed_mirror_index;
940
941
942 scrub_recheck_block(fs_info, sblock_bad, 1);
943
944 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
945 sblock_bad->no_io_error_seen) {
946
947
948
949
950
951
952
953
954 spin_lock(&sctx->stat_lock);
955 sctx->stat.unverified_errors++;
956 sblock_to_check->data_corrected = 1;
957 spin_unlock(&sctx->stat_lock);
958
959 if (sctx->is_dev_replace)
960 scrub_write_block_to_dev_replace(sblock_bad);
961 goto out;
962 }
963
964 if (!sblock_bad->no_io_error_seen) {
965 spin_lock(&sctx->stat_lock);
966 sctx->stat.read_errors++;
967 spin_unlock(&sctx->stat_lock);
968 if (__ratelimit(&rs))
969 scrub_print_warning("i/o error", sblock_to_check);
970 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
971 } else if (sblock_bad->checksum_error) {
972 spin_lock(&sctx->stat_lock);
973 sctx->stat.csum_errors++;
974 spin_unlock(&sctx->stat_lock);
975 if (__ratelimit(&rs))
976 scrub_print_warning("checksum error", sblock_to_check);
977 btrfs_dev_stat_inc_and_print(dev,
978 BTRFS_DEV_STAT_CORRUPTION_ERRS);
979 } else if (sblock_bad->header_error) {
980 spin_lock(&sctx->stat_lock);
981 sctx->stat.verify_errors++;
982 spin_unlock(&sctx->stat_lock);
983 if (__ratelimit(&rs))
984 scrub_print_warning("checksum/header error",
985 sblock_to_check);
986 if (sblock_bad->generation_error)
987 btrfs_dev_stat_inc_and_print(dev,
988 BTRFS_DEV_STAT_GENERATION_ERRS);
989 else
990 btrfs_dev_stat_inc_and_print(dev,
991 BTRFS_DEV_STAT_CORRUPTION_ERRS);
992 }
993
994 if (sctx->readonly) {
995 ASSERT(!sctx->is_dev_replace);
996 goto out;
997 }
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014 for (mirror_index = 0; ;mirror_index++) {
1015 struct scrub_block *sblock_other;
1016
1017 if (mirror_index == failed_mirror_index)
1018 continue;
1019
1020
1021 if (!scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1022 if (mirror_index >= BTRFS_MAX_MIRRORS)
1023 break;
1024 if (!sblocks_for_recheck[mirror_index].page_count)
1025 break;
1026
1027 sblock_other = sblocks_for_recheck + mirror_index;
1028 } else {
1029 struct scrub_recover *r = sblock_bad->pagev[0]->recover;
1030 int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs;
1031
1032 if (mirror_index >= max_allowed)
1033 break;
1034 if (!sblocks_for_recheck[1].page_count)
1035 break;
1036
1037 ASSERT(failed_mirror_index == 0);
1038 sblock_other = sblocks_for_recheck + 1;
1039 sblock_other->pagev[0]->mirror_num = 1 + mirror_index;
1040 }
1041
1042
1043 scrub_recheck_block(fs_info, sblock_other, 0);
1044
1045 if (!sblock_other->header_error &&
1046 !sblock_other->checksum_error &&
1047 sblock_other->no_io_error_seen) {
1048 if (sctx->is_dev_replace) {
1049 scrub_write_block_to_dev_replace(sblock_other);
1050 goto corrected_error;
1051 } else {
1052 ret = scrub_repair_block_from_good_copy(
1053 sblock_bad, sblock_other);
1054 if (!ret)
1055 goto corrected_error;
1056 }
1057 }
1058 }
1059
1060 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1061 goto did_not_correct_error;
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087 success = 1;
1088 for (page_num = 0; page_num < sblock_bad->page_count;
1089 page_num++) {
1090 struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
1091 struct scrub_block *sblock_other = NULL;
1092
1093
1094 if (!spage_bad->io_error && !sctx->is_dev_replace)
1095 continue;
1096
1097 if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
1098
1099
1100
1101
1102
1103
1104
1105 sblock_other = NULL;
1106 } else if (spage_bad->io_error) {
1107
1108 for (mirror_index = 0;
1109 mirror_index < BTRFS_MAX_MIRRORS &&
1110 sblocks_for_recheck[mirror_index].page_count > 0;
1111 mirror_index++) {
1112 if (!sblocks_for_recheck[mirror_index].
1113 pagev[page_num]->io_error) {
1114 sblock_other = sblocks_for_recheck +
1115 mirror_index;
1116 break;
1117 }
1118 }
1119 if (!sblock_other)
1120 success = 0;
1121 }
1122
1123 if (sctx->is_dev_replace) {
1124
1125
1126
1127
1128
1129
1130
1131 if (!sblock_other)
1132 sblock_other = sblock_bad;
1133
1134 if (scrub_write_page_to_dev_replace(sblock_other,
1135 page_num) != 0) {
1136 atomic64_inc(
1137 &fs_info->dev_replace.num_write_errors);
1138 success = 0;
1139 }
1140 } else if (sblock_other) {
1141 ret = scrub_repair_page_from_good_copy(sblock_bad,
1142 sblock_other,
1143 page_num, 0);
1144 if (0 == ret)
1145 spage_bad->io_error = 0;
1146 else
1147 success = 0;
1148 }
1149 }
1150
1151 if (success && !sctx->is_dev_replace) {
1152 if (is_metadata || have_csum) {
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162 scrub_recheck_block(fs_info, sblock_bad, 1);
1163 if (!sblock_bad->header_error &&
1164 !sblock_bad->checksum_error &&
1165 sblock_bad->no_io_error_seen)
1166 goto corrected_error;
1167 else
1168 goto did_not_correct_error;
1169 } else {
1170corrected_error:
1171 spin_lock(&sctx->stat_lock);
1172 sctx->stat.corrected_errors++;
1173 sblock_to_check->data_corrected = 1;
1174 spin_unlock(&sctx->stat_lock);
1175 btrfs_err_rl_in_rcu(fs_info,
1176 "fixed up error at logical %llu on dev %s",
1177 logical, rcu_str_deref(dev->name));
1178 }
1179 } else {
1180did_not_correct_error:
1181 spin_lock(&sctx->stat_lock);
1182 sctx->stat.uncorrectable_errors++;
1183 spin_unlock(&sctx->stat_lock);
1184 btrfs_err_rl_in_rcu(fs_info,
1185 "unable to fixup (regular) error at logical %llu on dev %s",
1186 logical, rcu_str_deref(dev->name));
1187 }
1188
1189out:
1190 if (sblocks_for_recheck) {
1191 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1192 mirror_index++) {
1193 struct scrub_block *sblock = sblocks_for_recheck +
1194 mirror_index;
1195 struct scrub_recover *recover;
1196 int page_index;
1197
1198 for (page_index = 0; page_index < sblock->page_count;
1199 page_index++) {
1200 sblock->pagev[page_index]->sblock = NULL;
1201 recover = sblock->pagev[page_index]->recover;
1202 if (recover) {
1203 scrub_put_recover(fs_info, recover);
1204 sblock->pagev[page_index]->recover =
1205 NULL;
1206 }
1207 scrub_page_put(sblock->pagev[page_index]);
1208 }
1209 }
1210 kfree(sblocks_for_recheck);
1211 }
1212
1213 ret = unlock_full_stripe(fs_info, logical, full_stripe_locked);
1214 memalloc_nofs_restore(nofs_flag);
1215 if (ret < 0)
1216 return ret;
1217 return 0;
1218}
1219
1220static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
1221{
1222 if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
1223 return 2;
1224 else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
1225 return 3;
1226 else
1227 return (int)bioc->num_stripes;
1228}
1229
1230static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1231 u64 *raid_map,
1232 u64 mapped_length,
1233 int nstripes, int mirror,
1234 int *stripe_index,
1235 u64 *stripe_offset)
1236{
1237 int i;
1238
1239 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1240
1241 for (i = 0; i < nstripes; i++) {
1242 if (raid_map[i] == RAID6_Q_STRIPE ||
1243 raid_map[i] == RAID5_P_STRIPE)
1244 continue;
1245
1246 if (logical >= raid_map[i] &&
1247 logical < raid_map[i] + mapped_length)
1248 break;
1249 }
1250
1251 *stripe_index = i;
1252 *stripe_offset = logical - raid_map[i];
1253 } else {
1254
1255 *stripe_index = mirror;
1256 *stripe_offset = 0;
1257 }
1258}
1259
1260static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1261 struct scrub_block *sblocks_for_recheck)
1262{
1263 struct scrub_ctx *sctx = original_sblock->sctx;
1264 struct btrfs_fs_info *fs_info = sctx->fs_info;
1265 u64 length = original_sblock->page_count * fs_info->sectorsize;
1266 u64 logical = original_sblock->pagev[0]->logical;
1267 u64 generation = original_sblock->pagev[0]->generation;
1268 u64 flags = original_sblock->pagev[0]->flags;
1269 u64 have_csum = original_sblock->pagev[0]->have_csum;
1270 struct scrub_recover *recover;
1271 struct btrfs_io_context *bioc;
1272 u64 sublen;
1273 u64 mapped_length;
1274 u64 stripe_offset;
1275 int stripe_index;
1276 int page_index = 0;
1277 int mirror_index;
1278 int nmirrors;
1279 int ret;
1280
1281
1282
1283
1284
1285
1286
1287 while (length > 0) {
1288 sublen = min_t(u64, length, fs_info->sectorsize);
1289 mapped_length = sublen;
1290 bioc = NULL;
1291
1292
1293
1294
1295
1296 btrfs_bio_counter_inc_blocked(fs_info);
1297 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
1298 logical, &mapped_length, &bioc);
1299 if (ret || !bioc || mapped_length < sublen) {
1300 btrfs_put_bioc(bioc);
1301 btrfs_bio_counter_dec(fs_info);
1302 return -EIO;
1303 }
1304
1305 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1306 if (!recover) {
1307 btrfs_put_bioc(bioc);
1308 btrfs_bio_counter_dec(fs_info);
1309 return -ENOMEM;
1310 }
1311
1312 refcount_set(&recover->refs, 1);
1313 recover->bioc = bioc;
1314 recover->map_length = mapped_length;
1315
1316 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1317
1318 nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS);
1319
1320 for (mirror_index = 0; mirror_index < nmirrors;
1321 mirror_index++) {
1322 struct scrub_block *sblock;
1323 struct scrub_page *spage;
1324
1325 sblock = sblocks_for_recheck + mirror_index;
1326 sblock->sctx = sctx;
1327
1328 spage = kzalloc(sizeof(*spage), GFP_NOFS);
1329 if (!spage) {
1330leave_nomem:
1331 spin_lock(&sctx->stat_lock);
1332 sctx->stat.malloc_errors++;
1333 spin_unlock(&sctx->stat_lock);
1334 scrub_put_recover(fs_info, recover);
1335 return -ENOMEM;
1336 }
1337 scrub_page_get(spage);
1338 sblock->pagev[page_index] = spage;
1339 spage->sblock = sblock;
1340 spage->flags = flags;
1341 spage->generation = generation;
1342 spage->logical = logical;
1343 spage->have_csum = have_csum;
1344 if (have_csum)
1345 memcpy(spage->csum,
1346 original_sblock->pagev[0]->csum,
1347 sctx->fs_info->csum_size);
1348
1349 scrub_stripe_index_and_offset(logical,
1350 bioc->map_type,
1351 bioc->raid_map,
1352 mapped_length,
1353 bioc->num_stripes -
1354 bioc->num_tgtdevs,
1355 mirror_index,
1356 &stripe_index,
1357 &stripe_offset);
1358 spage->physical = bioc->stripes[stripe_index].physical +
1359 stripe_offset;
1360 spage->dev = bioc->stripes[stripe_index].dev;
1361
1362 BUG_ON(page_index >= original_sblock->page_count);
1363 spage->physical_for_dev_replace =
1364 original_sblock->pagev[page_index]->
1365 physical_for_dev_replace;
1366
1367 spage->mirror_num = mirror_index + 1;
1368 sblock->page_count++;
1369 spage->page = alloc_page(GFP_NOFS);
1370 if (!spage->page)
1371 goto leave_nomem;
1372
1373 scrub_get_recover(recover);
1374 spage->recover = recover;
1375 }
1376 scrub_put_recover(fs_info, recover);
1377 length -= sublen;
1378 logical += sublen;
1379 page_index++;
1380 }
1381
1382 return 0;
1383}
1384
1385static void scrub_bio_wait_endio(struct bio *bio)
1386{
1387 complete(bio->bi_private);
1388}
1389
1390static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1391 struct bio *bio,
1392 struct scrub_page *spage)
1393{
1394 DECLARE_COMPLETION_ONSTACK(done);
1395 int ret;
1396 int mirror_num;
1397
1398 bio->bi_iter.bi_sector = spage->logical >> 9;
1399 bio->bi_private = &done;
1400 bio->bi_end_io = scrub_bio_wait_endio;
1401
1402 mirror_num = spage->sblock->pagev[0]->mirror_num;
1403 ret = raid56_parity_recover(bio, spage->recover->bioc,
1404 spage->recover->map_length,
1405 mirror_num, 0);
1406 if (ret)
1407 return ret;
1408
1409 wait_for_completion_io(&done);
1410 return blk_status_to_errno(bio->bi_status);
1411}
1412
1413static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
1414 struct scrub_block *sblock)
1415{
1416 struct scrub_page *first_page = sblock->pagev[0];
1417 struct bio *bio;
1418 int page_num;
1419
1420
1421 ASSERT(first_page->dev);
1422 if (!first_page->dev->bdev)
1423 goto out;
1424
1425 bio = btrfs_bio_alloc(BIO_MAX_VECS);
1426 bio_set_dev(bio, first_page->dev->bdev);
1427
1428 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1429 struct scrub_page *spage = sblock->pagev[page_num];
1430
1431 WARN_ON(!spage->page);
1432 bio_add_page(bio, spage->page, PAGE_SIZE, 0);
1433 }
1434
1435 if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
1436 bio_put(bio);
1437 goto out;
1438 }
1439
1440 bio_put(bio);
1441
1442 scrub_recheck_block_checksum(sblock);
1443
1444 return;
1445out:
1446 for (page_num = 0; page_num < sblock->page_count; page_num++)
1447 sblock->pagev[page_num]->io_error = 1;
1448
1449 sblock->no_io_error_seen = 0;
1450}
1451
1452
1453
1454
1455
1456
1457
1458
1459static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1460 struct scrub_block *sblock,
1461 int retry_failed_mirror)
1462{
1463 int page_num;
1464
1465 sblock->no_io_error_seen = 1;
1466
1467
1468 if (!retry_failed_mirror && scrub_is_page_on_raid56(sblock->pagev[0]))
1469 return scrub_recheck_block_on_raid56(fs_info, sblock);
1470
1471 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1472 struct bio *bio;
1473 struct scrub_page *spage = sblock->pagev[page_num];
1474
1475 if (spage->dev->bdev == NULL) {
1476 spage->io_error = 1;
1477 sblock->no_io_error_seen = 0;
1478 continue;
1479 }
1480
1481 WARN_ON(!spage->page);
1482 bio = btrfs_bio_alloc(1);
1483 bio_set_dev(bio, spage->dev->bdev);
1484
1485 bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
1486 bio->bi_iter.bi_sector = spage->physical >> 9;
1487 bio->bi_opf = REQ_OP_READ;
1488
1489 if (btrfsic_submit_bio_wait(bio)) {
1490 spage->io_error = 1;
1491 sblock->no_io_error_seen = 0;
1492 }
1493
1494 bio_put(bio);
1495 }
1496
1497 if (sblock->no_io_error_seen)
1498 scrub_recheck_block_checksum(sblock);
1499}
1500
1501static inline int scrub_check_fsid(u8 fsid[],
1502 struct scrub_page *spage)
1503{
1504 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1505 int ret;
1506
1507 ret = memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
1508 return !ret;
1509}
1510
1511static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1512{
1513 sblock->header_error = 0;
1514 sblock->checksum_error = 0;
1515 sblock->generation_error = 0;
1516
1517 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1518 scrub_checksum_data(sblock);
1519 else
1520 scrub_checksum_tree_block(sblock);
1521}
1522
1523static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1524 struct scrub_block *sblock_good)
1525{
1526 int page_num;
1527 int ret = 0;
1528
1529 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1530 int ret_sub;
1531
1532 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1533 sblock_good,
1534 page_num, 1);
1535 if (ret_sub)
1536 ret = ret_sub;
1537 }
1538
1539 return ret;
1540}
1541
1542static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1543 struct scrub_block *sblock_good,
1544 int page_num, int force_write)
1545{
1546 struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
1547 struct scrub_page *spage_good = sblock_good->pagev[page_num];
1548 struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
1549 const u32 sectorsize = fs_info->sectorsize;
1550
1551 BUG_ON(spage_bad->page == NULL);
1552 BUG_ON(spage_good->page == NULL);
1553 if (force_write || sblock_bad->header_error ||
1554 sblock_bad->checksum_error || spage_bad->io_error) {
1555 struct bio *bio;
1556 int ret;
1557
1558 if (!spage_bad->dev->bdev) {
1559 btrfs_warn_rl(fs_info,
1560 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1561 return -EIO;
1562 }
1563
1564 bio = btrfs_bio_alloc(1);
1565 bio_set_dev(bio, spage_bad->dev->bdev);
1566 bio->bi_iter.bi_sector = spage_bad->physical >> 9;
1567 bio->bi_opf = REQ_OP_WRITE;
1568
1569 ret = bio_add_page(bio, spage_good->page, sectorsize, 0);
1570 if (ret != sectorsize) {
1571 bio_put(bio);
1572 return -EIO;
1573 }
1574
1575 if (btrfsic_submit_bio_wait(bio)) {
1576 btrfs_dev_stat_inc_and_print(spage_bad->dev,
1577 BTRFS_DEV_STAT_WRITE_ERRS);
1578 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1579 bio_put(bio);
1580 return -EIO;
1581 }
1582 bio_put(bio);
1583 }
1584
1585 return 0;
1586}
1587
1588static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1589{
1590 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
1591 int page_num;
1592
1593
1594
1595
1596
1597 if (sblock->sparity)
1598 return;
1599
1600 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1601 int ret;
1602
1603 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1604 if (ret)
1605 atomic64_inc(&fs_info->dev_replace.num_write_errors);
1606 }
1607}
1608
1609static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1610 int page_num)
1611{
1612 struct scrub_page *spage = sblock->pagev[page_num];
1613
1614 BUG_ON(spage->page == NULL);
1615 if (spage->io_error)
1616 clear_page(page_address(spage->page));
1617
1618 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1619}
1620
1621static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
1622{
1623 int ret = 0;
1624 u64 length;
1625
1626 if (!btrfs_is_zoned(sctx->fs_info))
1627 return 0;
1628
1629 if (!btrfs_dev_is_sequential(sctx->wr_tgtdev, physical))
1630 return 0;
1631
1632 if (sctx->write_pointer < physical) {
1633 length = physical - sctx->write_pointer;
1634
1635 ret = btrfs_zoned_issue_zeroout(sctx->wr_tgtdev,
1636 sctx->write_pointer, length);
1637 if (!ret)
1638 sctx->write_pointer = physical;
1639 }
1640 return ret;
1641}
1642
1643static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1644 struct scrub_page *spage)
1645{
1646 struct scrub_bio *sbio;
1647 int ret;
1648 const u32 sectorsize = sctx->fs_info->sectorsize;
1649
1650 mutex_lock(&sctx->wr_lock);
1651again:
1652 if (!sctx->wr_curr_bio) {
1653 sctx->wr_curr_bio = kzalloc(sizeof(*sctx->wr_curr_bio),
1654 GFP_KERNEL);
1655 if (!sctx->wr_curr_bio) {
1656 mutex_unlock(&sctx->wr_lock);
1657 return -ENOMEM;
1658 }
1659 sctx->wr_curr_bio->sctx = sctx;
1660 sctx->wr_curr_bio->page_count = 0;
1661 }
1662 sbio = sctx->wr_curr_bio;
1663 if (sbio->page_count == 0) {
1664 struct bio *bio;
1665
1666 ret = fill_writer_pointer_gap(sctx,
1667 spage->physical_for_dev_replace);
1668 if (ret) {
1669 mutex_unlock(&sctx->wr_lock);
1670 return ret;
1671 }
1672
1673 sbio->physical = spage->physical_for_dev_replace;
1674 sbio->logical = spage->logical;
1675 sbio->dev = sctx->wr_tgtdev;
1676 bio = sbio->bio;
1677 if (!bio) {
1678 bio = btrfs_bio_alloc(sctx->pages_per_wr_bio);
1679 sbio->bio = bio;
1680 }
1681
1682 bio->bi_private = sbio;
1683 bio->bi_end_io = scrub_wr_bio_end_io;
1684 bio_set_dev(bio, sbio->dev->bdev);
1685 bio->bi_iter.bi_sector = sbio->physical >> 9;
1686 bio->bi_opf = REQ_OP_WRITE;
1687 sbio->status = 0;
1688 } else if (sbio->physical + sbio->page_count * sectorsize !=
1689 spage->physical_for_dev_replace ||
1690 sbio->logical + sbio->page_count * sectorsize !=
1691 spage->logical) {
1692 scrub_wr_submit(sctx);
1693 goto again;
1694 }
1695
1696 ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0);
1697 if (ret != sectorsize) {
1698 if (sbio->page_count < 1) {
1699 bio_put(sbio->bio);
1700 sbio->bio = NULL;
1701 mutex_unlock(&sctx->wr_lock);
1702 return -EIO;
1703 }
1704 scrub_wr_submit(sctx);
1705 goto again;
1706 }
1707
1708 sbio->pagev[sbio->page_count] = spage;
1709 scrub_page_get(spage);
1710 sbio->page_count++;
1711 if (sbio->page_count == sctx->pages_per_wr_bio)
1712 scrub_wr_submit(sctx);
1713 mutex_unlock(&sctx->wr_lock);
1714
1715 return 0;
1716}
1717
1718static void scrub_wr_submit(struct scrub_ctx *sctx)
1719{
1720 struct scrub_bio *sbio;
1721
1722 if (!sctx->wr_curr_bio)
1723 return;
1724
1725 sbio = sctx->wr_curr_bio;
1726 sctx->wr_curr_bio = NULL;
1727 WARN_ON(!sbio->bio->bi_bdev);
1728 scrub_pending_bio_inc(sctx);
1729
1730
1731
1732
1733 btrfsic_submit_bio(sbio->bio);
1734
1735 if (btrfs_is_zoned(sctx->fs_info))
1736 sctx->write_pointer = sbio->physical + sbio->page_count *
1737 sctx->fs_info->sectorsize;
1738}
1739
1740static void scrub_wr_bio_end_io(struct bio *bio)
1741{
1742 struct scrub_bio *sbio = bio->bi_private;
1743 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
1744
1745 sbio->status = bio->bi_status;
1746 sbio->bio = bio;
1747
1748 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1749 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1750}
1751
1752static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1753{
1754 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1755 struct scrub_ctx *sctx = sbio->sctx;
1756 int i;
1757
1758 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1759 if (sbio->status) {
1760 struct btrfs_dev_replace *dev_replace =
1761 &sbio->sctx->fs_info->dev_replace;
1762
1763 for (i = 0; i < sbio->page_count; i++) {
1764 struct scrub_page *spage = sbio->pagev[i];
1765
1766 spage->io_error = 1;
1767 atomic64_inc(&dev_replace->num_write_errors);
1768 }
1769 }
1770
1771 for (i = 0; i < sbio->page_count; i++)
1772 scrub_page_put(sbio->pagev[i]);
1773
1774 bio_put(sbio->bio);
1775 kfree(sbio);
1776 scrub_pending_bio_dec(sctx);
1777}
1778
1779static int scrub_checksum(struct scrub_block *sblock)
1780{
1781 u64 flags;
1782 int ret;
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792 sblock->header_error = 0;
1793 sblock->generation_error = 0;
1794 sblock->checksum_error = 0;
1795
1796 WARN_ON(sblock->page_count < 1);
1797 flags = sblock->pagev[0]->flags;
1798 ret = 0;
1799 if (flags & BTRFS_EXTENT_FLAG_DATA)
1800 ret = scrub_checksum_data(sblock);
1801 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1802 ret = scrub_checksum_tree_block(sblock);
1803 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1804 (void)scrub_checksum_super(sblock);
1805 else
1806 WARN_ON(1);
1807 if (ret)
1808 scrub_handle_errored_block(sblock);
1809
1810 return ret;
1811}
1812
1813static int scrub_checksum_data(struct scrub_block *sblock)
1814{
1815 struct scrub_ctx *sctx = sblock->sctx;
1816 struct btrfs_fs_info *fs_info = sctx->fs_info;
1817 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1818 u8 csum[BTRFS_CSUM_SIZE];
1819 struct scrub_page *spage;
1820 char *kaddr;
1821
1822 BUG_ON(sblock->page_count < 1);
1823 spage = sblock->pagev[0];
1824 if (!spage->have_csum)
1825 return 0;
1826
1827 kaddr = page_address(spage->page);
1828
1829 shash->tfm = fs_info->csum_shash;
1830 crypto_shash_init(shash);
1831
1832
1833
1834
1835
1836 crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
1837
1838 if (memcmp(csum, spage->csum, fs_info->csum_size))
1839 sblock->checksum_error = 1;
1840 return sblock->checksum_error;
1841}
1842
1843static int scrub_checksum_tree_block(struct scrub_block *sblock)
1844{
1845 struct scrub_ctx *sctx = sblock->sctx;
1846 struct btrfs_header *h;
1847 struct btrfs_fs_info *fs_info = sctx->fs_info;
1848 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1849 u8 calculated_csum[BTRFS_CSUM_SIZE];
1850 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1851
1852
1853
1854
1855
1856 const u32 sectorsize = sctx->fs_info->sectorsize;
1857 const int num_sectors = fs_info->nodesize >> fs_info->sectorsize_bits;
1858 int i;
1859 struct scrub_page *spage;
1860 char *kaddr;
1861
1862 BUG_ON(sblock->page_count < 1);
1863
1864
1865 ASSERT(sblock->page_count == num_sectors);
1866
1867 spage = sblock->pagev[0];
1868 kaddr = page_address(spage->page);
1869 h = (struct btrfs_header *)kaddr;
1870 memcpy(on_disk_csum, h->csum, sctx->fs_info->csum_size);
1871
1872
1873
1874
1875
1876
1877 if (spage->logical != btrfs_stack_header_bytenr(h))
1878 sblock->header_error = 1;
1879
1880 if (spage->generation != btrfs_stack_header_generation(h)) {
1881 sblock->header_error = 1;
1882 sblock->generation_error = 1;
1883 }
1884
1885 if (!scrub_check_fsid(h->fsid, spage))
1886 sblock->header_error = 1;
1887
1888 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1889 BTRFS_UUID_SIZE))
1890 sblock->header_error = 1;
1891
1892 shash->tfm = fs_info->csum_shash;
1893 crypto_shash_init(shash);
1894 crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
1895 sectorsize - BTRFS_CSUM_SIZE);
1896
1897 for (i = 1; i < num_sectors; i++) {
1898 kaddr = page_address(sblock->pagev[i]->page);
1899 crypto_shash_update(shash, kaddr, sectorsize);
1900 }
1901
1902 crypto_shash_final(shash, calculated_csum);
1903 if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size))
1904 sblock->checksum_error = 1;
1905
1906 return sblock->header_error || sblock->checksum_error;
1907}
1908
1909static int scrub_checksum_super(struct scrub_block *sblock)
1910{
1911 struct btrfs_super_block *s;
1912 struct scrub_ctx *sctx = sblock->sctx;
1913 struct btrfs_fs_info *fs_info = sctx->fs_info;
1914 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
1915 u8 calculated_csum[BTRFS_CSUM_SIZE];
1916 struct scrub_page *spage;
1917 char *kaddr;
1918 int fail_gen = 0;
1919 int fail_cor = 0;
1920
1921 BUG_ON(sblock->page_count < 1);
1922 spage = sblock->pagev[0];
1923 kaddr = page_address(spage->page);
1924 s = (struct btrfs_super_block *)kaddr;
1925
1926 if (spage->logical != btrfs_super_bytenr(s))
1927 ++fail_cor;
1928
1929 if (spage->generation != btrfs_super_generation(s))
1930 ++fail_gen;
1931
1932 if (!scrub_check_fsid(s->fsid, spage))
1933 ++fail_cor;
1934
1935 shash->tfm = fs_info->csum_shash;
1936 crypto_shash_init(shash);
1937 crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
1938 BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
1939
1940 if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size))
1941 ++fail_cor;
1942
1943 if (fail_cor + fail_gen) {
1944
1945
1946
1947
1948
1949 spin_lock(&sctx->stat_lock);
1950 ++sctx->stat.super_errors;
1951 spin_unlock(&sctx->stat_lock);
1952 if (fail_cor)
1953 btrfs_dev_stat_inc_and_print(spage->dev,
1954 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1955 else
1956 btrfs_dev_stat_inc_and_print(spage->dev,
1957 BTRFS_DEV_STAT_GENERATION_ERRS);
1958 }
1959
1960 return fail_cor + fail_gen;
1961}
1962
1963static void scrub_block_get(struct scrub_block *sblock)
1964{
1965 refcount_inc(&sblock->refs);
1966}
1967
1968static void scrub_block_put(struct scrub_block *sblock)
1969{
1970 if (refcount_dec_and_test(&sblock->refs)) {
1971 int i;
1972
1973 if (sblock->sparity)
1974 scrub_parity_put(sblock->sparity);
1975
1976 for (i = 0; i < sblock->page_count; i++)
1977 scrub_page_put(sblock->pagev[i]);
1978 kfree(sblock);
1979 }
1980}
1981
1982static void scrub_page_get(struct scrub_page *spage)
1983{
1984 atomic_inc(&spage->refs);
1985}
1986
1987static void scrub_page_put(struct scrub_page *spage)
1988{
1989 if (atomic_dec_and_test(&spage->refs)) {
1990 if (spage->page)
1991 __free_page(spage->page);
1992 kfree(spage);
1993 }
1994}
1995
1996
1997
1998
1999
2000static void scrub_throttle(struct scrub_ctx *sctx)
2001{
2002 const int time_slice = 1000;
2003 struct scrub_bio *sbio;
2004 struct btrfs_device *device;
2005 s64 delta;
2006 ktime_t now;
2007 u32 div;
2008 u64 bwlimit;
2009
2010 sbio = sctx->bios[sctx->curr];
2011 device = sbio->dev;
2012 bwlimit = READ_ONCE(device->scrub_speed_max);
2013 if (bwlimit == 0)
2014 return;
2015
2016
2017
2018
2019
2020 div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
2021 div = min_t(u32, 64, div);
2022
2023
2024 now = ktime_get();
2025 if (sctx->throttle_deadline == 0) {
2026 sctx->throttle_deadline = ktime_add_ms(now, time_slice / div);
2027 sctx->throttle_sent = 0;
2028 }
2029
2030
2031 if (ktime_before(now, sctx->throttle_deadline)) {
2032
2033 sctx->throttle_sent += sbio->bio->bi_iter.bi_size;
2034 if (sctx->throttle_sent <= div_u64(bwlimit, div))
2035 return;
2036
2037
2038 delta = ktime_ms_delta(sctx->throttle_deadline, now);
2039 } else {
2040
2041 delta = 0;
2042 }
2043
2044 if (delta) {
2045 long timeout;
2046
2047 timeout = div_u64(delta * HZ, 1000);
2048 schedule_timeout_interruptible(timeout);
2049 }
2050
2051
2052 sctx->throttle_deadline = 0;
2053}
2054
2055static void scrub_submit(struct scrub_ctx *sctx)
2056{
2057 struct scrub_bio *sbio;
2058
2059 if (sctx->curr == -1)
2060 return;
2061
2062 scrub_throttle(sctx);
2063
2064 sbio = sctx->bios[sctx->curr];
2065 sctx->curr = -1;
2066 scrub_pending_bio_inc(sctx);
2067 btrfsic_submit_bio(sbio->bio);
2068}
2069
2070static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2071 struct scrub_page *spage)
2072{
2073 struct scrub_block *sblock = spage->sblock;
2074 struct scrub_bio *sbio;
2075 const u32 sectorsize = sctx->fs_info->sectorsize;
2076 int ret;
2077
2078again:
2079
2080
2081
2082 while (sctx->curr == -1) {
2083 spin_lock(&sctx->list_lock);
2084 sctx->curr = sctx->first_free;
2085 if (sctx->curr != -1) {
2086 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2087 sctx->bios[sctx->curr]->next_free = -1;
2088 sctx->bios[sctx->curr]->page_count = 0;
2089 spin_unlock(&sctx->list_lock);
2090 } else {
2091 spin_unlock(&sctx->list_lock);
2092 wait_event(sctx->list_wait, sctx->first_free != -1);
2093 }
2094 }
2095 sbio = sctx->bios[sctx->curr];
2096 if (sbio->page_count == 0) {
2097 struct bio *bio;
2098
2099 sbio->physical = spage->physical;
2100 sbio->logical = spage->logical;
2101 sbio->dev = spage->dev;
2102 bio = sbio->bio;
2103 if (!bio) {
2104 bio = btrfs_bio_alloc(sctx->pages_per_rd_bio);
2105 sbio->bio = bio;
2106 }
2107
2108 bio->bi_private = sbio;
2109 bio->bi_end_io = scrub_bio_end_io;
2110 bio_set_dev(bio, sbio->dev->bdev);
2111 bio->bi_iter.bi_sector = sbio->physical >> 9;
2112 bio->bi_opf = REQ_OP_READ;
2113 sbio->status = 0;
2114 } else if (sbio->physical + sbio->page_count * sectorsize !=
2115 spage->physical ||
2116 sbio->logical + sbio->page_count * sectorsize !=
2117 spage->logical ||
2118 sbio->dev != spage->dev) {
2119 scrub_submit(sctx);
2120 goto again;
2121 }
2122
2123 sbio->pagev[sbio->page_count] = spage;
2124 ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0);
2125 if (ret != sectorsize) {
2126 if (sbio->page_count < 1) {
2127 bio_put(sbio->bio);
2128 sbio->bio = NULL;
2129 return -EIO;
2130 }
2131 scrub_submit(sctx);
2132 goto again;
2133 }
2134
2135 scrub_block_get(sblock);
2136 atomic_inc(&sblock->outstanding_pages);
2137 sbio->page_count++;
2138 if (sbio->page_count == sctx->pages_per_rd_bio)
2139 scrub_submit(sctx);
2140
2141 return 0;
2142}
2143
2144static void scrub_missing_raid56_end_io(struct bio *bio)
2145{
2146 struct scrub_block *sblock = bio->bi_private;
2147 struct btrfs_fs_info *fs_info = sblock->sctx->fs_info;
2148
2149 if (bio->bi_status)
2150 sblock->no_io_error_seen = 0;
2151
2152 bio_put(bio);
2153
2154 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2155}
2156
2157static void scrub_missing_raid56_worker(struct btrfs_work *work)
2158{
2159 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2160 struct scrub_ctx *sctx = sblock->sctx;
2161 struct btrfs_fs_info *fs_info = sctx->fs_info;
2162 u64 logical;
2163 struct btrfs_device *dev;
2164
2165 logical = sblock->pagev[0]->logical;
2166 dev = sblock->pagev[0]->dev;
2167
2168 if (sblock->no_io_error_seen)
2169 scrub_recheck_block_checksum(sblock);
2170
2171 if (!sblock->no_io_error_seen) {
2172 spin_lock(&sctx->stat_lock);
2173 sctx->stat.read_errors++;
2174 spin_unlock(&sctx->stat_lock);
2175 btrfs_err_rl_in_rcu(fs_info,
2176 "IO error rebuilding logical %llu for dev %s",
2177 logical, rcu_str_deref(dev->name));
2178 } else if (sblock->header_error || sblock->checksum_error) {
2179 spin_lock(&sctx->stat_lock);
2180 sctx->stat.uncorrectable_errors++;
2181 spin_unlock(&sctx->stat_lock);
2182 btrfs_err_rl_in_rcu(fs_info,
2183 "failed to rebuild valid logical %llu for dev %s",
2184 logical, rcu_str_deref(dev->name));
2185 } else {
2186 scrub_write_block_to_dev_replace(sblock);
2187 }
2188
2189 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2190 mutex_lock(&sctx->wr_lock);
2191 scrub_wr_submit(sctx);
2192 mutex_unlock(&sctx->wr_lock);
2193 }
2194
2195 scrub_block_put(sblock);
2196 scrub_pending_bio_dec(sctx);
2197}
2198
2199static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2200{
2201 struct scrub_ctx *sctx = sblock->sctx;
2202 struct btrfs_fs_info *fs_info = sctx->fs_info;
2203 u64 length = sblock->page_count * PAGE_SIZE;
2204 u64 logical = sblock->pagev[0]->logical;
2205 struct btrfs_io_context *bioc = NULL;
2206 struct bio *bio;
2207 struct btrfs_raid_bio *rbio;
2208 int ret;
2209 int i;
2210
2211 btrfs_bio_counter_inc_blocked(fs_info);
2212 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
2213 &length, &bioc);
2214 if (ret || !bioc || !bioc->raid_map)
2215 goto bioc_out;
2216
2217 if (WARN_ON(!sctx->is_dev_replace ||
2218 !(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2219
2220
2221
2222
2223
2224
2225 goto bioc_out;
2226 }
2227
2228 bio = btrfs_bio_alloc(BIO_MAX_VECS);
2229 bio->bi_iter.bi_sector = logical >> 9;
2230 bio->bi_private = sblock;
2231 bio->bi_end_io = scrub_missing_raid56_end_io;
2232
2233 rbio = raid56_alloc_missing_rbio(bio, bioc, length);
2234 if (!rbio)
2235 goto rbio_out;
2236
2237 for (i = 0; i < sblock->page_count; i++) {
2238 struct scrub_page *spage = sblock->pagev[i];
2239
2240 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2241 }
2242
2243 btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
2244 scrub_block_get(sblock);
2245 scrub_pending_bio_inc(sctx);
2246 raid56_submit_missing_rbio(rbio);
2247 return;
2248
2249rbio_out:
2250 bio_put(bio);
2251bioc_out:
2252 btrfs_bio_counter_dec(fs_info);
2253 btrfs_put_bioc(bioc);
2254 spin_lock(&sctx->stat_lock);
2255 sctx->stat.malloc_errors++;
2256 spin_unlock(&sctx->stat_lock);
2257}
2258
2259static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
2260 u64 physical, struct btrfs_device *dev, u64 flags,
2261 u64 gen, int mirror_num, u8 *csum,
2262 u64 physical_for_dev_replace)
2263{
2264 struct scrub_block *sblock;
2265 const u32 sectorsize = sctx->fs_info->sectorsize;
2266 int index;
2267
2268 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2269 if (!sblock) {
2270 spin_lock(&sctx->stat_lock);
2271 sctx->stat.malloc_errors++;
2272 spin_unlock(&sctx->stat_lock);
2273 return -ENOMEM;
2274 }
2275
2276
2277
2278 refcount_set(&sblock->refs, 1);
2279 sblock->sctx = sctx;
2280 sblock->no_io_error_seen = 1;
2281
2282 for (index = 0; len > 0; index++) {
2283 struct scrub_page *spage;
2284
2285
2286
2287
2288
2289 u32 l = min(sectorsize, len);
2290
2291 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2292 if (!spage) {
2293leave_nomem:
2294 spin_lock(&sctx->stat_lock);
2295 sctx->stat.malloc_errors++;
2296 spin_unlock(&sctx->stat_lock);
2297 scrub_block_put(sblock);
2298 return -ENOMEM;
2299 }
2300 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2301 scrub_page_get(spage);
2302 sblock->pagev[index] = spage;
2303 spage->sblock = sblock;
2304 spage->dev = dev;
2305 spage->flags = flags;
2306 spage->generation = gen;
2307 spage->logical = logical;
2308 spage->physical = physical;
2309 spage->physical_for_dev_replace = physical_for_dev_replace;
2310 spage->mirror_num = mirror_num;
2311 if (csum) {
2312 spage->have_csum = 1;
2313 memcpy(spage->csum, csum, sctx->fs_info->csum_size);
2314 } else {
2315 spage->have_csum = 0;
2316 }
2317 sblock->page_count++;
2318 spage->page = alloc_page(GFP_KERNEL);
2319 if (!spage->page)
2320 goto leave_nomem;
2321 len -= l;
2322 logical += l;
2323 physical += l;
2324 physical_for_dev_replace += l;
2325 }
2326
2327 WARN_ON(sblock->page_count == 0);
2328 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2329
2330
2331
2332
2333 scrub_missing_raid56_pages(sblock);
2334 } else {
2335 for (index = 0; index < sblock->page_count; index++) {
2336 struct scrub_page *spage = sblock->pagev[index];
2337 int ret;
2338
2339 ret = scrub_add_page_to_rd_bio(sctx, spage);
2340 if (ret) {
2341 scrub_block_put(sblock);
2342 return ret;
2343 }
2344 }
2345
2346 if (flags & BTRFS_EXTENT_FLAG_SUPER)
2347 scrub_submit(sctx);
2348 }
2349
2350
2351 scrub_block_put(sblock);
2352 return 0;
2353}
2354
2355static void scrub_bio_end_io(struct bio *bio)
2356{
2357 struct scrub_bio *sbio = bio->bi_private;
2358 struct btrfs_fs_info *fs_info = sbio->dev->fs_info;
2359
2360 sbio->status = bio->bi_status;
2361 sbio->bio = bio;
2362
2363 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2364}
2365
2366static void scrub_bio_end_io_worker(struct btrfs_work *work)
2367{
2368 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2369 struct scrub_ctx *sctx = sbio->sctx;
2370 int i;
2371
2372 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2373 if (sbio->status) {
2374 for (i = 0; i < sbio->page_count; i++) {
2375 struct scrub_page *spage = sbio->pagev[i];
2376
2377 spage->io_error = 1;
2378 spage->sblock->no_io_error_seen = 0;
2379 }
2380 }
2381
2382
2383 for (i = 0; i < sbio->page_count; i++) {
2384 struct scrub_page *spage = sbio->pagev[i];
2385 struct scrub_block *sblock = spage->sblock;
2386
2387 if (atomic_dec_and_test(&sblock->outstanding_pages))
2388 scrub_block_complete(sblock);
2389 scrub_block_put(sblock);
2390 }
2391
2392 bio_put(sbio->bio);
2393 sbio->bio = NULL;
2394 spin_lock(&sctx->list_lock);
2395 sbio->next_free = sctx->first_free;
2396 sctx->first_free = sbio->index;
2397 spin_unlock(&sctx->list_lock);
2398
2399 if (sctx->is_dev_replace && sctx->flush_all_writes) {
2400 mutex_lock(&sctx->wr_lock);
2401 scrub_wr_submit(sctx);
2402 mutex_unlock(&sctx->wr_lock);
2403 }
2404
2405 scrub_pending_bio_dec(sctx);
2406}
2407
2408static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2409 unsigned long *bitmap,
2410 u64 start, u32 len)
2411{
2412 u64 offset;
2413 u32 nsectors;
2414 u32 sectorsize_bits = sparity->sctx->fs_info->sectorsize_bits;
2415
2416 if (len >= sparity->stripe_len) {
2417 bitmap_set(bitmap, 0, sparity->nsectors);
2418 return;
2419 }
2420
2421 start -= sparity->logic_start;
2422 start = div64_u64_rem(start, sparity->stripe_len, &offset);
2423 offset = offset >> sectorsize_bits;
2424 nsectors = len >> sectorsize_bits;
2425
2426 if (offset + nsectors <= sparity->nsectors) {
2427 bitmap_set(bitmap, offset, nsectors);
2428 return;
2429 }
2430
2431 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2432 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2433}
2434
2435static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2436 u64 start, u32 len)
2437{
2438 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2439}
2440
2441static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2442 u64 start, u32 len)
2443{
2444 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2445}
2446
2447static void scrub_block_complete(struct scrub_block *sblock)
2448{
2449 int corrupted = 0;
2450
2451 if (!sblock->no_io_error_seen) {
2452 corrupted = 1;
2453 scrub_handle_errored_block(sblock);
2454 } else {
2455
2456
2457
2458
2459
2460 corrupted = scrub_checksum(sblock);
2461 if (!corrupted && sblock->sctx->is_dev_replace)
2462 scrub_write_block_to_dev_replace(sblock);
2463 }
2464
2465 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2466 u64 start = sblock->pagev[0]->logical;
2467 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2468 sblock->sctx->fs_info->sectorsize;
2469
2470 ASSERT(end - start <= U32_MAX);
2471 scrub_parity_mark_sectors_error(sblock->sparity,
2472 start, end - start);
2473 }
2474}
2475
2476static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum)
2477{
2478 sctx->stat.csum_discards += sum->len >> sctx->fs_info->sectorsize_bits;
2479 list_del(&sum->list);
2480 kfree(sum);
2481}
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2495{
2496 bool found = false;
2497
2498 while (!list_empty(&sctx->csum_list)) {
2499 struct btrfs_ordered_sum *sum = NULL;
2500 unsigned long index;
2501 unsigned long num_sectors;
2502
2503 sum = list_first_entry(&sctx->csum_list,
2504 struct btrfs_ordered_sum, list);
2505
2506 if (sum->bytenr > logical)
2507 break;
2508
2509
2510
2511
2512
2513
2514
2515 if (sum->bytenr + sum->len <= logical) {
2516 drop_csum_range(sctx, sum);
2517 continue;
2518 }
2519
2520
2521 found = true;
2522 index = (logical - sum->bytenr) >> sctx->fs_info->sectorsize_bits;
2523 num_sectors = sum->len >> sctx->fs_info->sectorsize_bits;
2524
2525 memcpy(csum, sum->sums + index * sctx->fs_info->csum_size,
2526 sctx->fs_info->csum_size);
2527
2528
2529 if (index == num_sectors - 1)
2530 drop_csum_range(sctx, sum);
2531 break;
2532 }
2533 if (!found)
2534 return 0;
2535 return 1;
2536}
2537
2538
2539static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
2540 u64 logical, u32 len,
2541 u64 physical, struct btrfs_device *dev, u64 flags,
2542 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2543{
2544 int ret;
2545 u8 csum[BTRFS_CSUM_SIZE];
2546 u32 blocksize;
2547
2548 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2549 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2550 blocksize = map->stripe_len;
2551 else
2552 blocksize = sctx->fs_info->sectorsize;
2553 spin_lock(&sctx->stat_lock);
2554 sctx->stat.data_extents_scrubbed++;
2555 sctx->stat.data_bytes_scrubbed += len;
2556 spin_unlock(&sctx->stat_lock);
2557 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2558 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
2559 blocksize = map->stripe_len;
2560 else
2561 blocksize = sctx->fs_info->nodesize;
2562 spin_lock(&sctx->stat_lock);
2563 sctx->stat.tree_extents_scrubbed++;
2564 sctx->stat.tree_bytes_scrubbed += len;
2565 spin_unlock(&sctx->stat_lock);
2566 } else {
2567 blocksize = sctx->fs_info->sectorsize;
2568 WARN_ON(1);
2569 }
2570
2571 while (len) {
2572 u32 l = min(len, blocksize);
2573 int have_csum = 0;
2574
2575 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2576
2577 have_csum = scrub_find_csum(sctx, logical, csum);
2578 if (have_csum == 0)
2579 ++sctx->stat.no_csum;
2580 }
2581 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2582 mirror_num, have_csum ? csum : NULL,
2583 physical_for_dev_replace);
2584 if (ret)
2585 return ret;
2586 len -= l;
2587 logical += l;
2588 physical += l;
2589 physical_for_dev_replace += l;
2590 }
2591 return 0;
2592}
2593
2594static int scrub_pages_for_parity(struct scrub_parity *sparity,
2595 u64 logical, u32 len,
2596 u64 physical, struct btrfs_device *dev,
2597 u64 flags, u64 gen, int mirror_num, u8 *csum)
2598{
2599 struct scrub_ctx *sctx = sparity->sctx;
2600 struct scrub_block *sblock;
2601 const u32 sectorsize = sctx->fs_info->sectorsize;
2602 int index;
2603
2604 ASSERT(IS_ALIGNED(len, sectorsize));
2605
2606 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2607 if (!sblock) {
2608 spin_lock(&sctx->stat_lock);
2609 sctx->stat.malloc_errors++;
2610 spin_unlock(&sctx->stat_lock);
2611 return -ENOMEM;
2612 }
2613
2614
2615
2616 refcount_set(&sblock->refs, 1);
2617 sblock->sctx = sctx;
2618 sblock->no_io_error_seen = 1;
2619 sblock->sparity = sparity;
2620 scrub_parity_get(sparity);
2621
2622 for (index = 0; len > 0; index++) {
2623 struct scrub_page *spage;
2624
2625 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2626 if (!spage) {
2627leave_nomem:
2628 spin_lock(&sctx->stat_lock);
2629 sctx->stat.malloc_errors++;
2630 spin_unlock(&sctx->stat_lock);
2631 scrub_block_put(sblock);
2632 return -ENOMEM;
2633 }
2634 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2635
2636 scrub_page_get(spage);
2637 sblock->pagev[index] = spage;
2638
2639 scrub_page_get(spage);
2640 list_add_tail(&spage->list, &sparity->spages);
2641 spage->sblock = sblock;
2642 spage->dev = dev;
2643 spage->flags = flags;
2644 spage->generation = gen;
2645 spage->logical = logical;
2646 spage->physical = physical;
2647 spage->mirror_num = mirror_num;
2648 if (csum) {
2649 spage->have_csum = 1;
2650 memcpy(spage->csum, csum, sctx->fs_info->csum_size);
2651 } else {
2652 spage->have_csum = 0;
2653 }
2654 sblock->page_count++;
2655 spage->page = alloc_page(GFP_KERNEL);
2656 if (!spage->page)
2657 goto leave_nomem;
2658
2659
2660
2661 len -= sectorsize;
2662 logical += sectorsize;
2663 physical += sectorsize;
2664 }
2665
2666 WARN_ON(sblock->page_count == 0);
2667 for (index = 0; index < sblock->page_count; index++) {
2668 struct scrub_page *spage = sblock->pagev[index];
2669 int ret;
2670
2671 ret = scrub_add_page_to_rd_bio(sctx, spage);
2672 if (ret) {
2673 scrub_block_put(sblock);
2674 return ret;
2675 }
2676 }
2677
2678
2679 scrub_block_put(sblock);
2680 return 0;
2681}
2682
2683static int scrub_extent_for_parity(struct scrub_parity *sparity,
2684 u64 logical, u32 len,
2685 u64 physical, struct btrfs_device *dev,
2686 u64 flags, u64 gen, int mirror_num)
2687{
2688 struct scrub_ctx *sctx = sparity->sctx;
2689 int ret;
2690 u8 csum[BTRFS_CSUM_SIZE];
2691 u32 blocksize;
2692
2693 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) {
2694 scrub_parity_mark_sectors_error(sparity, logical, len);
2695 return 0;
2696 }
2697
2698 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2699 blocksize = sparity->stripe_len;
2700 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2701 blocksize = sparity->stripe_len;
2702 } else {
2703 blocksize = sctx->fs_info->sectorsize;
2704 WARN_ON(1);
2705 }
2706
2707 while (len) {
2708 u32 l = min(len, blocksize);
2709 int have_csum = 0;
2710
2711 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2712
2713 have_csum = scrub_find_csum(sctx, logical, csum);
2714 if (have_csum == 0)
2715 goto skip;
2716 }
2717 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2718 flags, gen, mirror_num,
2719 have_csum ? csum : NULL);
2720 if (ret)
2721 return ret;
2722skip:
2723 len -= l;
2724 logical += l;
2725 physical += l;
2726 }
2727 return 0;
2728}
2729
2730
2731
2732
2733
2734
2735
2736
2737static int get_raid56_logic_offset(u64 physical, int num,
2738 struct map_lookup *map, u64 *offset,
2739 u64 *stripe_start)
2740{
2741 int i;
2742 int j = 0;
2743 u64 stripe_nr;
2744 u64 last_offset;
2745 u32 stripe_index;
2746 u32 rot;
2747 const int data_stripes = nr_data_stripes(map);
2748
2749 last_offset = (physical - map->stripes[num].physical) * data_stripes;
2750 if (stripe_start)
2751 *stripe_start = last_offset;
2752
2753 *offset = last_offset;
2754 for (i = 0; i < data_stripes; i++) {
2755 *offset = last_offset + i * map->stripe_len;
2756
2757 stripe_nr = div64_u64(*offset, map->stripe_len);
2758 stripe_nr = div_u64(stripe_nr, data_stripes);
2759
2760
2761 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2762
2763 rot += i;
2764 stripe_index = rot % map->num_stripes;
2765 if (stripe_index == num)
2766 return 0;
2767 if (stripe_index < num)
2768 j++;
2769 }
2770 *offset = last_offset + j * map->stripe_len;
2771 return 1;
2772}
2773
2774static void scrub_free_parity(struct scrub_parity *sparity)
2775{
2776 struct scrub_ctx *sctx = sparity->sctx;
2777 struct scrub_page *curr, *next;
2778 int nbits;
2779
2780 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2781 if (nbits) {
2782 spin_lock(&sctx->stat_lock);
2783 sctx->stat.read_errors += nbits;
2784 sctx->stat.uncorrectable_errors += nbits;
2785 spin_unlock(&sctx->stat_lock);
2786 }
2787
2788 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2789 list_del_init(&curr->list);
2790 scrub_page_put(curr);
2791 }
2792
2793 kfree(sparity);
2794}
2795
2796static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2797{
2798 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2799 work);
2800 struct scrub_ctx *sctx = sparity->sctx;
2801
2802 scrub_free_parity(sparity);
2803 scrub_pending_bio_dec(sctx);
2804}
2805
2806static void scrub_parity_bio_endio(struct bio *bio)
2807{
2808 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2809 struct btrfs_fs_info *fs_info = sparity->sctx->fs_info;
2810
2811 if (bio->bi_status)
2812 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2813 sparity->nsectors);
2814
2815 bio_put(bio);
2816
2817 btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
2818 NULL);
2819 btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
2820}
2821
2822static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2823{
2824 struct scrub_ctx *sctx = sparity->sctx;
2825 struct btrfs_fs_info *fs_info = sctx->fs_info;
2826 struct bio *bio;
2827 struct btrfs_raid_bio *rbio;
2828 struct btrfs_io_context *bioc = NULL;
2829 u64 length;
2830 int ret;
2831
2832 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2833 sparity->nsectors))
2834 goto out;
2835
2836 length = sparity->logic_end - sparity->logic_start;
2837
2838 btrfs_bio_counter_inc_blocked(fs_info);
2839 ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
2840 &length, &bioc);
2841 if (ret || !bioc || !bioc->raid_map)
2842 goto bioc_out;
2843
2844 bio = btrfs_bio_alloc(BIO_MAX_VECS);
2845 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2846 bio->bi_private = sparity;
2847 bio->bi_end_io = scrub_parity_bio_endio;
2848
2849 rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, length,
2850 sparity->scrub_dev,
2851 sparity->dbitmap,
2852 sparity->nsectors);
2853 if (!rbio)
2854 goto rbio_out;
2855
2856 scrub_pending_bio_inc(sctx);
2857 raid56_parity_submit_scrub_rbio(rbio);
2858 return;
2859
2860rbio_out:
2861 bio_put(bio);
2862bioc_out:
2863 btrfs_bio_counter_dec(fs_info);
2864 btrfs_put_bioc(bioc);
2865 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2866 sparity->nsectors);
2867 spin_lock(&sctx->stat_lock);
2868 sctx->stat.malloc_errors++;
2869 spin_unlock(&sctx->stat_lock);
2870out:
2871 scrub_free_parity(sparity);
2872}
2873
2874static inline int scrub_calc_parity_bitmap_len(int nsectors)
2875{
2876 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2877}
2878
2879static void scrub_parity_get(struct scrub_parity *sparity)
2880{
2881 refcount_inc(&sparity->refs);
2882}
2883
2884static void scrub_parity_put(struct scrub_parity *sparity)
2885{
2886 if (!refcount_dec_and_test(&sparity->refs))
2887 return;
2888
2889 scrub_parity_check_and_repair(sparity);
2890}
2891
2892static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2893 struct map_lookup *map,
2894 struct btrfs_device *sdev,
2895 struct btrfs_path *path,
2896 u64 logic_start,
2897 u64 logic_end)
2898{
2899 struct btrfs_fs_info *fs_info = sctx->fs_info;
2900 struct btrfs_root *root = fs_info->extent_root;
2901 struct btrfs_root *csum_root = fs_info->csum_root;
2902 struct btrfs_extent_item *extent;
2903 struct btrfs_io_context *bioc = NULL;
2904 u64 flags;
2905 int ret;
2906 int slot;
2907 struct extent_buffer *l;
2908 struct btrfs_key key;
2909 u64 generation;
2910 u64 extent_logical;
2911 u64 extent_physical;
2912
2913 u32 extent_len;
2914 u64 mapped_length;
2915 struct btrfs_device *extent_dev;
2916 struct scrub_parity *sparity;
2917 int nsectors;
2918 int bitmap_len;
2919 int extent_mirror_num;
2920 int stop_loop = 0;
2921
2922 ASSERT(map->stripe_len <= U32_MAX);
2923 nsectors = map->stripe_len >> fs_info->sectorsize_bits;
2924 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2925 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2926 GFP_NOFS);
2927 if (!sparity) {
2928 spin_lock(&sctx->stat_lock);
2929 sctx->stat.malloc_errors++;
2930 spin_unlock(&sctx->stat_lock);
2931 return -ENOMEM;
2932 }
2933
2934 ASSERT(map->stripe_len <= U32_MAX);
2935 sparity->stripe_len = map->stripe_len;
2936 sparity->nsectors = nsectors;
2937 sparity->sctx = sctx;
2938 sparity->scrub_dev = sdev;
2939 sparity->logic_start = logic_start;
2940 sparity->logic_end = logic_end;
2941 refcount_set(&sparity->refs, 1);
2942 INIT_LIST_HEAD(&sparity->spages);
2943 sparity->dbitmap = sparity->bitmap;
2944 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2945
2946 ret = 0;
2947 while (logic_start < logic_end) {
2948 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2949 key.type = BTRFS_METADATA_ITEM_KEY;
2950 else
2951 key.type = BTRFS_EXTENT_ITEM_KEY;
2952 key.objectid = logic_start;
2953 key.offset = (u64)-1;
2954
2955 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2956 if (ret < 0)
2957 goto out;
2958
2959 if (ret > 0) {
2960 ret = btrfs_previous_extent_item(root, path, 0);
2961 if (ret < 0)
2962 goto out;
2963 if (ret > 0) {
2964 btrfs_release_path(path);
2965 ret = btrfs_search_slot(NULL, root, &key,
2966 path, 0, 0);
2967 if (ret < 0)
2968 goto out;
2969 }
2970 }
2971
2972 stop_loop = 0;
2973 while (1) {
2974 u64 bytes;
2975
2976 l = path->nodes[0];
2977 slot = path->slots[0];
2978 if (slot >= btrfs_header_nritems(l)) {
2979 ret = btrfs_next_leaf(root, path);
2980 if (ret == 0)
2981 continue;
2982 if (ret < 0)
2983 goto out;
2984
2985 stop_loop = 1;
2986 break;
2987 }
2988 btrfs_item_key_to_cpu(l, &key, slot);
2989
2990 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2991 key.type != BTRFS_METADATA_ITEM_KEY)
2992 goto next;
2993
2994 if (key.type == BTRFS_METADATA_ITEM_KEY)
2995 bytes = fs_info->nodesize;
2996 else
2997 bytes = key.offset;
2998
2999 if (key.objectid + bytes <= logic_start)
3000 goto next;
3001
3002 if (key.objectid >= logic_end) {
3003 stop_loop = 1;
3004 break;
3005 }
3006
3007 while (key.objectid >= logic_start + map->stripe_len)
3008 logic_start += map->stripe_len;
3009
3010 extent = btrfs_item_ptr(l, slot,
3011 struct btrfs_extent_item);
3012 flags = btrfs_extent_flags(l, extent);
3013 generation = btrfs_extent_generation(l, extent);
3014
3015 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3016 (key.objectid < logic_start ||
3017 key.objectid + bytes >
3018 logic_start + map->stripe_len)) {
3019 btrfs_err(fs_info,
3020 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3021 key.objectid, logic_start);
3022 spin_lock(&sctx->stat_lock);
3023 sctx->stat.uncorrectable_errors++;
3024 spin_unlock(&sctx->stat_lock);
3025 goto next;
3026 }
3027again:
3028 extent_logical = key.objectid;
3029 ASSERT(bytes <= U32_MAX);
3030 extent_len = bytes;
3031
3032 if (extent_logical < logic_start) {
3033 extent_len -= logic_start - extent_logical;
3034 extent_logical = logic_start;
3035 }
3036
3037 if (extent_logical + extent_len >
3038 logic_start + map->stripe_len)
3039 extent_len = logic_start + map->stripe_len -
3040 extent_logical;
3041
3042 scrub_parity_mark_sectors_data(sparity, extent_logical,
3043 extent_len);
3044
3045 mapped_length = extent_len;
3046 bioc = NULL;
3047 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
3048 extent_logical, &mapped_length, &bioc,
3049 0);
3050 if (!ret) {
3051 if (!bioc || mapped_length < extent_len)
3052 ret = -EIO;
3053 }
3054 if (ret) {
3055 btrfs_put_bioc(bioc);
3056 goto out;
3057 }
3058 extent_physical = bioc->stripes[0].physical;
3059 extent_mirror_num = bioc->mirror_num;
3060 extent_dev = bioc->stripes[0].dev;
3061 btrfs_put_bioc(bioc);
3062
3063 ret = btrfs_lookup_csums_range(csum_root,
3064 extent_logical,
3065 extent_logical + extent_len - 1,
3066 &sctx->csum_list, 1);
3067 if (ret)
3068 goto out;
3069
3070 ret = scrub_extent_for_parity(sparity, extent_logical,
3071 extent_len,
3072 extent_physical,
3073 extent_dev, flags,
3074 generation,
3075 extent_mirror_num);
3076
3077 scrub_free_csums(sctx);
3078
3079 if (ret)
3080 goto out;
3081
3082 if (extent_logical + extent_len <
3083 key.objectid + bytes) {
3084 logic_start += map->stripe_len;
3085
3086 if (logic_start >= logic_end) {
3087 stop_loop = 1;
3088 break;
3089 }
3090
3091 if (logic_start < key.objectid + bytes) {
3092 cond_resched();
3093 goto again;
3094 }
3095 }
3096next:
3097 path->slots[0]++;
3098 }
3099
3100 btrfs_release_path(path);
3101
3102 if (stop_loop)
3103 break;
3104
3105 logic_start += map->stripe_len;
3106 }
3107out:
3108 if (ret < 0) {
3109 ASSERT(logic_end - logic_start <= U32_MAX);
3110 scrub_parity_mark_sectors_error(sparity, logic_start,
3111 logic_end - logic_start);
3112 }
3113 scrub_parity_put(sparity);
3114 scrub_submit(sctx);
3115 mutex_lock(&sctx->wr_lock);
3116 scrub_wr_submit(sctx);
3117 mutex_unlock(&sctx->wr_lock);
3118
3119 btrfs_release_path(path);
3120 return ret < 0 ? ret : 0;
3121}
3122
3123static void sync_replace_for_zoned(struct scrub_ctx *sctx)
3124{
3125 if (!btrfs_is_zoned(sctx->fs_info))
3126 return;
3127
3128 sctx->flush_all_writes = true;
3129 scrub_submit(sctx);
3130 mutex_lock(&sctx->wr_lock);
3131 scrub_wr_submit(sctx);
3132 mutex_unlock(&sctx->wr_lock);
3133
3134 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3135}
3136
3137static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical,
3138 u64 physical, u64 physical_end)
3139{
3140 struct btrfs_fs_info *fs_info = sctx->fs_info;
3141 int ret = 0;
3142
3143 if (!btrfs_is_zoned(fs_info))
3144 return 0;
3145
3146 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3147
3148 mutex_lock(&sctx->wr_lock);
3149 if (sctx->write_pointer < physical_end) {
3150 ret = btrfs_sync_zone_write_pointer(sctx->wr_tgtdev, logical,
3151 physical,
3152 sctx->write_pointer);
3153 if (ret)
3154 btrfs_err(fs_info,
3155 "zoned: failed to recover write pointer");
3156 }
3157 mutex_unlock(&sctx->wr_lock);
3158 btrfs_dev_clear_zone_empty(sctx->wr_tgtdev, physical);
3159
3160 return ret;
3161}
3162
3163static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3164 struct map_lookup *map,
3165 struct btrfs_device *scrub_dev,
3166 int num, u64 base, u64 length,
3167 struct btrfs_block_group *cache)
3168{
3169 struct btrfs_path *path, *ppath;
3170 struct btrfs_fs_info *fs_info = sctx->fs_info;
3171 struct btrfs_root *root = fs_info->extent_root;
3172 struct btrfs_root *csum_root = fs_info->csum_root;
3173 struct btrfs_extent_item *extent;
3174 struct blk_plug plug;
3175 u64 flags;
3176 int ret;
3177 int slot;
3178 u64 nstripes;
3179 struct extent_buffer *l;
3180 u64 physical;
3181 u64 logical;
3182 u64 logic_end;
3183 u64 physical_end;
3184 u64 generation;
3185 int mirror_num;
3186 struct reada_control *reada1;
3187 struct reada_control *reada2;
3188 struct btrfs_key key;
3189 struct btrfs_key key_end;
3190 u64 increment = map->stripe_len;
3191 u64 offset;
3192 u64 extent_logical;
3193 u64 extent_physical;
3194
3195
3196
3197
3198 u32 extent_len;
3199 u64 stripe_logical;
3200 u64 stripe_end;
3201 struct btrfs_device *extent_dev;
3202 int extent_mirror_num;
3203 int stop_loop = 0;
3204
3205 physical = map->stripes[num].physical;
3206 offset = 0;
3207 nstripes = div64_u64(length, map->stripe_len);
3208 mirror_num = 1;
3209 increment = map->stripe_len;
3210 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3211 offset = map->stripe_len * num;
3212 increment = map->stripe_len * map->num_stripes;
3213 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3214 int factor = map->num_stripes / map->sub_stripes;
3215 offset = map->stripe_len * (num / map->sub_stripes);
3216 increment = map->stripe_len * factor;
3217 mirror_num = num % map->sub_stripes + 1;
3218 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
3219 mirror_num = num % map->num_stripes + 1;
3220 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3221 mirror_num = num % map->num_stripes + 1;
3222 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3223 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3224 increment = map->stripe_len * nr_data_stripes(map);
3225 }
3226
3227 path = btrfs_alloc_path();
3228 if (!path)
3229 return -ENOMEM;
3230
3231 ppath = btrfs_alloc_path();
3232 if (!ppath) {
3233 btrfs_free_path(path);
3234 return -ENOMEM;
3235 }
3236
3237
3238
3239
3240
3241
3242 path->search_commit_root = 1;
3243 path->skip_locking = 1;
3244
3245 ppath->search_commit_root = 1;
3246 ppath->skip_locking = 1;
3247
3248
3249
3250
3251
3252 logical = base + offset;
3253 physical_end = physical + nstripes * map->stripe_len;
3254 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3255 get_raid56_logic_offset(physical_end, num,
3256 map, &logic_end, NULL);
3257 logic_end += base;
3258 } else {
3259 logic_end = logical + increment * nstripes;
3260 }
3261 wait_event(sctx->list_wait,
3262 atomic_read(&sctx->bios_in_flight) == 0);
3263 scrub_blocked_if_needed(fs_info);
3264
3265
3266 key.objectid = logical;
3267 key.type = BTRFS_EXTENT_ITEM_KEY;
3268 key.offset = (u64)0;
3269 key_end.objectid = logic_end;
3270 key_end.type = BTRFS_METADATA_ITEM_KEY;
3271 key_end.offset = (u64)-1;
3272 reada1 = btrfs_reada_add(root, &key, &key_end);
3273
3274 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3275 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3276 key.type = BTRFS_EXTENT_CSUM_KEY;
3277 key.offset = logical;
3278 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3279 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3280 key_end.offset = logic_end;
3281 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3282 } else {
3283 reada2 = NULL;
3284 }
3285
3286 if (!IS_ERR(reada1))
3287 btrfs_reada_wait(reada1);
3288 if (!IS_ERR_OR_NULL(reada2))
3289 btrfs_reada_wait(reada2);
3290
3291
3292
3293
3294
3295
3296 blk_start_plug(&plug);
3297
3298 if (sctx->is_dev_replace &&
3299 btrfs_dev_is_sequential(sctx->wr_tgtdev, physical)) {
3300 mutex_lock(&sctx->wr_lock);
3301 sctx->write_pointer = physical;
3302 mutex_unlock(&sctx->wr_lock);
3303 sctx->flush_all_writes = true;
3304 }
3305
3306
3307
3308
3309 ret = 0;
3310 while (physical < physical_end) {
3311
3312
3313
3314 if (atomic_read(&fs_info->scrub_cancel_req) ||
3315 atomic_read(&sctx->cancel_req)) {
3316 ret = -ECANCELED;
3317 goto out;
3318 }
3319
3320
3321
3322 if (atomic_read(&fs_info->scrub_pause_req)) {
3323
3324 sctx->flush_all_writes = true;
3325 scrub_submit(sctx);
3326 mutex_lock(&sctx->wr_lock);
3327 scrub_wr_submit(sctx);
3328 mutex_unlock(&sctx->wr_lock);
3329 wait_event(sctx->list_wait,
3330 atomic_read(&sctx->bios_in_flight) == 0);
3331 sctx->flush_all_writes = false;
3332 scrub_blocked_if_needed(fs_info);
3333 }
3334
3335 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3336 ret = get_raid56_logic_offset(physical, num, map,
3337 &logical,
3338 &stripe_logical);
3339 logical += base;
3340 if (ret) {
3341
3342 stripe_logical += base;
3343 stripe_end = stripe_logical + increment;
3344 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3345 ppath, stripe_logical,
3346 stripe_end);
3347 if (ret)
3348 goto out;
3349 goto skip;
3350 }
3351 }
3352
3353 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3354 key.type = BTRFS_METADATA_ITEM_KEY;
3355 else
3356 key.type = BTRFS_EXTENT_ITEM_KEY;
3357 key.objectid = logical;
3358 key.offset = (u64)-1;
3359
3360 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3361 if (ret < 0)
3362 goto out;
3363
3364 if (ret > 0) {
3365 ret = btrfs_previous_extent_item(root, path, 0);
3366 if (ret < 0)
3367 goto out;
3368 if (ret > 0) {
3369
3370
3371 btrfs_release_path(path);
3372 ret = btrfs_search_slot(NULL, root, &key,
3373 path, 0, 0);
3374 if (ret < 0)
3375 goto out;
3376 }
3377 }
3378
3379 stop_loop = 0;
3380 while (1) {
3381 u64 bytes;
3382
3383 l = path->nodes[0];
3384 slot = path->slots[0];
3385 if (slot >= btrfs_header_nritems(l)) {
3386 ret = btrfs_next_leaf(root, path);
3387 if (ret == 0)
3388 continue;
3389 if (ret < 0)
3390 goto out;
3391
3392 stop_loop = 1;
3393 break;
3394 }
3395 btrfs_item_key_to_cpu(l, &key, slot);
3396
3397 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3398 key.type != BTRFS_METADATA_ITEM_KEY)
3399 goto next;
3400
3401 if (key.type == BTRFS_METADATA_ITEM_KEY)
3402 bytes = fs_info->nodesize;
3403 else
3404 bytes = key.offset;
3405
3406 if (key.objectid + bytes <= logical)
3407 goto next;
3408
3409 if (key.objectid >= logical + map->stripe_len) {
3410
3411 if (key.objectid >= logic_end)
3412 stop_loop = 1;
3413 break;
3414 }
3415
3416
3417
3418
3419
3420
3421
3422 spin_lock(&cache->lock);
3423 if (cache->removed) {
3424 spin_unlock(&cache->lock);
3425 ret = 0;
3426 goto out;
3427 }
3428 spin_unlock(&cache->lock);
3429
3430 extent = btrfs_item_ptr(l, slot,
3431 struct btrfs_extent_item);
3432 flags = btrfs_extent_flags(l, extent);
3433 generation = btrfs_extent_generation(l, extent);
3434
3435 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3436 (key.objectid < logical ||
3437 key.objectid + bytes >
3438 logical + map->stripe_len)) {
3439 btrfs_err(fs_info,
3440 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3441 key.objectid, logical);
3442 spin_lock(&sctx->stat_lock);
3443 sctx->stat.uncorrectable_errors++;
3444 spin_unlock(&sctx->stat_lock);
3445 goto next;
3446 }
3447
3448again:
3449 extent_logical = key.objectid;
3450 ASSERT(bytes <= U32_MAX);
3451 extent_len = bytes;
3452
3453
3454
3455
3456 if (extent_logical < logical) {
3457 extent_len -= logical - extent_logical;
3458 extent_logical = logical;
3459 }
3460 if (extent_logical + extent_len >
3461 logical + map->stripe_len) {
3462 extent_len = logical + map->stripe_len -
3463 extent_logical;
3464 }
3465
3466 extent_physical = extent_logical - logical + physical;
3467 extent_dev = scrub_dev;
3468 extent_mirror_num = mirror_num;
3469 if (sctx->is_dev_replace)
3470 scrub_remap_extent(fs_info, extent_logical,
3471 extent_len, &extent_physical,
3472 &extent_dev,
3473 &extent_mirror_num);
3474
3475 if (flags & BTRFS_EXTENT_FLAG_DATA) {
3476 ret = btrfs_lookup_csums_range(csum_root,
3477 extent_logical,
3478 extent_logical + extent_len - 1,
3479 &sctx->csum_list, 1);
3480 if (ret)
3481 goto out;
3482 }
3483
3484 ret = scrub_extent(sctx, map, extent_logical, extent_len,
3485 extent_physical, extent_dev, flags,
3486 generation, extent_mirror_num,
3487 extent_logical - logical + physical);
3488
3489 scrub_free_csums(sctx);
3490
3491 if (ret)
3492 goto out;
3493
3494 if (sctx->is_dev_replace)
3495 sync_replace_for_zoned(sctx);
3496
3497 if (extent_logical + extent_len <
3498 key.objectid + bytes) {
3499 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3500
3501
3502
3503
3504loop:
3505 physical += map->stripe_len;
3506 ret = get_raid56_logic_offset(physical,
3507 num, map, &logical,
3508 &stripe_logical);
3509 logical += base;
3510
3511 if (ret && physical < physical_end) {
3512 stripe_logical += base;
3513 stripe_end = stripe_logical +
3514 increment;
3515 ret = scrub_raid56_parity(sctx,
3516 map, scrub_dev, ppath,
3517 stripe_logical,
3518 stripe_end);
3519 if (ret)
3520 goto out;
3521 goto loop;
3522 }
3523 } else {
3524 physical += map->stripe_len;
3525 logical += increment;
3526 }
3527 if (logical < key.objectid + bytes) {
3528 cond_resched();
3529 goto again;
3530 }
3531
3532 if (physical >= physical_end) {
3533 stop_loop = 1;
3534 break;
3535 }
3536 }
3537next:
3538 path->slots[0]++;
3539 }
3540 btrfs_release_path(path);
3541skip:
3542 logical += increment;
3543 physical += map->stripe_len;
3544 spin_lock(&sctx->stat_lock);
3545 if (stop_loop)
3546 sctx->stat.last_physical = map->stripes[num].physical +
3547 length;
3548 else
3549 sctx->stat.last_physical = physical;
3550 spin_unlock(&sctx->stat_lock);
3551 if (stop_loop)
3552 break;
3553 }
3554out:
3555
3556 scrub_submit(sctx);
3557 mutex_lock(&sctx->wr_lock);
3558 scrub_wr_submit(sctx);
3559 mutex_unlock(&sctx->wr_lock);
3560
3561 blk_finish_plug(&plug);
3562 btrfs_free_path(path);
3563 btrfs_free_path(ppath);
3564
3565 if (sctx->is_dev_replace && ret >= 0) {
3566 int ret2;
3567
3568 ret2 = sync_write_pointer_for_zoned(sctx, base + offset,
3569 map->stripes[num].physical,
3570 physical_end);
3571 if (ret2)
3572 ret = ret2;
3573 }
3574
3575 return ret < 0 ? ret : 0;
3576}
3577
3578static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3579 struct btrfs_device *scrub_dev,
3580 u64 chunk_offset, u64 length,
3581 u64 dev_offset,
3582 struct btrfs_block_group *cache)
3583{
3584 struct btrfs_fs_info *fs_info = sctx->fs_info;
3585 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
3586 struct map_lookup *map;
3587 struct extent_map *em;
3588 int i;
3589 int ret = 0;
3590
3591 read_lock(&map_tree->lock);
3592 em = lookup_extent_mapping(map_tree, chunk_offset, 1);
3593 read_unlock(&map_tree->lock);
3594
3595 if (!em) {
3596
3597
3598
3599
3600 spin_lock(&cache->lock);
3601 if (!cache->removed)
3602 ret = -EINVAL;
3603 spin_unlock(&cache->lock);
3604
3605 return ret;
3606 }
3607
3608 map = em->map_lookup;
3609 if (em->start != chunk_offset)
3610 goto out;
3611
3612 if (em->len < length)
3613 goto out;
3614
3615 for (i = 0; i < map->num_stripes; ++i) {
3616 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3617 map->stripes[i].physical == dev_offset) {
3618 ret = scrub_stripe(sctx, map, scrub_dev, i,
3619 chunk_offset, length, cache);
3620 if (ret)
3621 goto out;
3622 }
3623 }
3624out:
3625 free_extent_map(em);
3626
3627 return ret;
3628}
3629
3630static int finish_extent_writes_for_zoned(struct btrfs_root *root,
3631 struct btrfs_block_group *cache)
3632{
3633 struct btrfs_fs_info *fs_info = cache->fs_info;
3634 struct btrfs_trans_handle *trans;
3635
3636 if (!btrfs_is_zoned(fs_info))
3637 return 0;
3638
3639 btrfs_wait_block_group_reservations(cache);
3640 btrfs_wait_nocow_writers(cache);
3641 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, cache->length);
3642
3643 trans = btrfs_join_transaction(root);
3644 if (IS_ERR(trans))
3645 return PTR_ERR(trans);
3646 return btrfs_commit_transaction(trans);
3647}
3648
3649static noinline_for_stack
3650int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3651 struct btrfs_device *scrub_dev, u64 start, u64 end)
3652{
3653 struct btrfs_dev_extent *dev_extent = NULL;
3654 struct btrfs_path *path;
3655 struct btrfs_fs_info *fs_info = sctx->fs_info;
3656 struct btrfs_root *root = fs_info->dev_root;
3657 u64 length;
3658 u64 chunk_offset;
3659 int ret = 0;
3660 int ro_set;
3661 int slot;
3662 struct extent_buffer *l;
3663 struct btrfs_key key;
3664 struct btrfs_key found_key;
3665 struct btrfs_block_group *cache;
3666 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3667
3668 path = btrfs_alloc_path();
3669 if (!path)
3670 return -ENOMEM;
3671
3672 path->reada = READA_FORWARD;
3673 path->search_commit_root = 1;
3674 path->skip_locking = 1;
3675
3676 key.objectid = scrub_dev->devid;
3677 key.offset = 0ull;
3678 key.type = BTRFS_DEV_EXTENT_KEY;
3679
3680 while (1) {
3681 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3682 if (ret < 0)
3683 break;
3684 if (ret > 0) {
3685 if (path->slots[0] >=
3686 btrfs_header_nritems(path->nodes[0])) {
3687 ret = btrfs_next_leaf(root, path);
3688 if (ret < 0)
3689 break;
3690 if (ret > 0) {
3691 ret = 0;
3692 break;
3693 }
3694 } else {
3695 ret = 0;
3696 }
3697 }
3698
3699 l = path->nodes[0];
3700 slot = path->slots[0];
3701
3702 btrfs_item_key_to_cpu(l, &found_key, slot);
3703
3704 if (found_key.objectid != scrub_dev->devid)
3705 break;
3706
3707 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3708 break;
3709
3710 if (found_key.offset >= end)
3711 break;
3712
3713 if (found_key.offset < key.offset)
3714 break;
3715
3716 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3717 length = btrfs_dev_extent_length(l, dev_extent);
3718
3719 if (found_key.offset + length <= start)
3720 goto skip;
3721
3722 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3723
3724
3725
3726
3727
3728 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3729
3730
3731
3732 if (!cache)
3733 goto skip;
3734
3735 if (sctx->is_dev_replace && btrfs_is_zoned(fs_info)) {
3736 spin_lock(&cache->lock);
3737 if (!cache->to_copy) {
3738 spin_unlock(&cache->lock);
3739 btrfs_put_block_group(cache);
3740 goto skip;
3741 }
3742 spin_unlock(&cache->lock);
3743 }
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753 spin_lock(&cache->lock);
3754 if (cache->removed) {
3755 spin_unlock(&cache->lock);
3756 btrfs_put_block_group(cache);
3757 goto skip;
3758 }
3759 btrfs_freeze_block_group(cache);
3760 spin_unlock(&cache->lock);
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770 scrub_pause_on(fs_info);
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802 ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
3803 if (!ret && sctx->is_dev_replace) {
3804 ret = finish_extent_writes_for_zoned(root, cache);
3805 if (ret) {
3806 btrfs_dec_block_group_ro(cache);
3807 scrub_pause_off(fs_info);
3808 btrfs_put_block_group(cache);
3809 break;
3810 }
3811 }
3812
3813 if (ret == 0) {
3814 ro_set = 1;
3815 } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
3816
3817
3818
3819
3820
3821
3822
3823 ro_set = 0;
3824 } else if (ret == -ETXTBSY) {
3825 btrfs_warn(fs_info,
3826 "skipping scrub of block group %llu due to active swapfile",
3827 cache->start);
3828 scrub_pause_off(fs_info);
3829 ret = 0;
3830 goto skip_unfreeze;
3831 } else {
3832 btrfs_warn(fs_info,
3833 "failed setting block group ro: %d", ret);
3834 btrfs_unfreeze_block_group(cache);
3835 btrfs_put_block_group(cache);
3836 scrub_pause_off(fs_info);
3837 break;
3838 }
3839
3840
3841
3842
3843
3844
3845 if (sctx->is_dev_replace) {
3846 btrfs_wait_nocow_writers(cache);
3847 btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
3848 cache->length);
3849 }
3850
3851 scrub_pause_off(fs_info);
3852 down_write(&dev_replace->rwsem);
3853 dev_replace->cursor_right = found_key.offset + length;
3854 dev_replace->cursor_left = found_key.offset;
3855 dev_replace->item_needs_writeback = 1;
3856 up_write(&dev_replace->rwsem);
3857
3858 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3859 found_key.offset, cache);
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871 sctx->flush_all_writes = true;
3872 scrub_submit(sctx);
3873 mutex_lock(&sctx->wr_lock);
3874 scrub_wr_submit(sctx);
3875 mutex_unlock(&sctx->wr_lock);
3876
3877 wait_event(sctx->list_wait,
3878 atomic_read(&sctx->bios_in_flight) == 0);
3879
3880 scrub_pause_on(fs_info);
3881
3882
3883
3884
3885
3886
3887 wait_event(sctx->list_wait,
3888 atomic_read(&sctx->workers_pending) == 0);
3889 sctx->flush_all_writes = false;
3890
3891 scrub_pause_off(fs_info);
3892
3893 if (sctx->is_dev_replace &&
3894 !btrfs_finish_block_group_to_copy(dev_replace->srcdev,
3895 cache, found_key.offset))
3896 ro_set = 0;
3897
3898 down_write(&dev_replace->rwsem);
3899 dev_replace->cursor_left = dev_replace->cursor_right;
3900 dev_replace->item_needs_writeback = 1;
3901 up_write(&dev_replace->rwsem);
3902
3903 if (ro_set)
3904 btrfs_dec_block_group_ro(cache);
3905
3906
3907
3908
3909
3910
3911
3912
3913 spin_lock(&cache->lock);
3914 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3915 cache->used == 0) {
3916 spin_unlock(&cache->lock);
3917 if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
3918 btrfs_discard_queue_work(&fs_info->discard_ctl,
3919 cache);
3920 else
3921 btrfs_mark_bg_unused(cache);
3922 } else {
3923 spin_unlock(&cache->lock);
3924 }
3925skip_unfreeze:
3926 btrfs_unfreeze_block_group(cache);
3927 btrfs_put_block_group(cache);
3928 if (ret)
3929 break;
3930 if (sctx->is_dev_replace &&
3931 atomic64_read(&dev_replace->num_write_errors) > 0) {
3932 ret = -EIO;
3933 break;
3934 }
3935 if (sctx->stat.malloc_errors > 0) {
3936 ret = -ENOMEM;
3937 break;
3938 }
3939skip:
3940 key.offset = found_key.offset + length;
3941 btrfs_release_path(path);
3942 }
3943
3944 btrfs_free_path(path);
3945
3946 return ret;
3947}
3948
3949static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3950 struct btrfs_device *scrub_dev)
3951{
3952 int i;
3953 u64 bytenr;
3954 u64 gen;
3955 int ret;
3956 struct btrfs_fs_info *fs_info = sctx->fs_info;
3957
3958 if (BTRFS_FS_ERROR(fs_info))
3959 return -EROFS;
3960
3961
3962 if (scrub_dev->fs_devices != fs_info->fs_devices)
3963 gen = scrub_dev->generation;
3964 else
3965 gen = fs_info->last_trans_committed;
3966
3967 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3968 bytenr = btrfs_sb_offset(i);
3969 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3970 scrub_dev->commit_total_bytes)
3971 break;
3972 if (!btrfs_check_super_location(scrub_dev, bytenr))
3973 continue;
3974
3975 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3976 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3977 NULL, bytenr);
3978 if (ret)
3979 return ret;
3980 }
3981 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3982
3983 return 0;
3984}
3985
3986static void scrub_workers_put(struct btrfs_fs_info *fs_info)
3987{
3988 if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
3989 &fs_info->scrub_lock)) {
3990 struct btrfs_workqueue *scrub_workers = NULL;
3991 struct btrfs_workqueue *scrub_wr_comp = NULL;
3992 struct btrfs_workqueue *scrub_parity = NULL;
3993
3994 scrub_workers = fs_info->scrub_workers;
3995 scrub_wr_comp = fs_info->scrub_wr_completion_workers;
3996 scrub_parity = fs_info->scrub_parity_workers;
3997
3998 fs_info->scrub_workers = NULL;
3999 fs_info->scrub_wr_completion_workers = NULL;
4000 fs_info->scrub_parity_workers = NULL;
4001 mutex_unlock(&fs_info->scrub_lock);
4002
4003 btrfs_destroy_workqueue(scrub_workers);
4004 btrfs_destroy_workqueue(scrub_wr_comp);
4005 btrfs_destroy_workqueue(scrub_parity);
4006 }
4007}
4008
4009
4010
4011
4012static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
4013 int is_dev_replace)
4014{
4015 struct btrfs_workqueue *scrub_workers = NULL;
4016 struct btrfs_workqueue *scrub_wr_comp = NULL;
4017 struct btrfs_workqueue *scrub_parity = NULL;
4018 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
4019 int max_active = fs_info->thread_pool_size;
4020 int ret = -ENOMEM;
4021
4022 if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
4023 return 0;
4024
4025 scrub_workers = btrfs_alloc_workqueue(fs_info, "scrub", flags,
4026 is_dev_replace ? 1 : max_active, 4);
4027 if (!scrub_workers)
4028 goto fail_scrub_workers;
4029
4030 scrub_wr_comp = btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
4031 max_active, 2);
4032 if (!scrub_wr_comp)
4033 goto fail_scrub_wr_completion_workers;
4034
4035 scrub_parity = btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
4036 max_active, 2);
4037 if (!scrub_parity)
4038 goto fail_scrub_parity_workers;
4039
4040 mutex_lock(&fs_info->scrub_lock);
4041 if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
4042 ASSERT(fs_info->scrub_workers == NULL &&
4043 fs_info->scrub_wr_completion_workers == NULL &&
4044 fs_info->scrub_parity_workers == NULL);
4045 fs_info->scrub_workers = scrub_workers;
4046 fs_info->scrub_wr_completion_workers = scrub_wr_comp;
4047 fs_info->scrub_parity_workers = scrub_parity;
4048 refcount_set(&fs_info->scrub_workers_refcnt, 1);
4049 mutex_unlock(&fs_info->scrub_lock);
4050 return 0;
4051 }
4052
4053 refcount_inc(&fs_info->scrub_workers_refcnt);
4054 mutex_unlock(&fs_info->scrub_lock);
4055
4056 ret = 0;
4057 btrfs_destroy_workqueue(scrub_parity);
4058fail_scrub_parity_workers:
4059 btrfs_destroy_workqueue(scrub_wr_comp);
4060fail_scrub_wr_completion_workers:
4061 btrfs_destroy_workqueue(scrub_workers);
4062fail_scrub_workers:
4063 return ret;
4064}
4065
4066int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
4067 u64 end, struct btrfs_scrub_progress *progress,
4068 int readonly, int is_dev_replace)
4069{
4070 struct btrfs_dev_lookup_args args = { .devid = devid };
4071 struct scrub_ctx *sctx;
4072 int ret;
4073 struct btrfs_device *dev;
4074 unsigned int nofs_flag;
4075
4076 if (btrfs_fs_closing(fs_info))
4077 return -EAGAIN;
4078
4079 if (fs_info->nodesize > BTRFS_STRIPE_LEN) {
4080
4081
4082
4083
4084
4085 btrfs_err(fs_info,
4086 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
4087 fs_info->nodesize,
4088 BTRFS_STRIPE_LEN);
4089 return -EINVAL;
4090 }
4091
4092 if (fs_info->nodesize >
4093 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
4094 fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
4095
4096
4097
4098
4099 btrfs_err(fs_info,
4100 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
4101 fs_info->nodesize,
4102 SCRUB_MAX_PAGES_PER_BLOCK,
4103 fs_info->sectorsize,
4104 SCRUB_MAX_PAGES_PER_BLOCK);
4105 return -EINVAL;
4106 }
4107
4108
4109 sctx = scrub_setup_ctx(fs_info, is_dev_replace);
4110 if (IS_ERR(sctx))
4111 return PTR_ERR(sctx);
4112
4113 ret = scrub_workers_get(fs_info, is_dev_replace);
4114 if (ret)
4115 goto out_free_ctx;
4116
4117 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4118 dev = btrfs_find_device(fs_info->fs_devices, &args);
4119 if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
4120 !is_dev_replace)) {
4121 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4122 ret = -ENODEV;
4123 goto out;
4124 }
4125
4126 if (!is_dev_replace && !readonly &&
4127 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
4128 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4129 btrfs_err_in_rcu(fs_info,
4130 "scrub on devid %llu: filesystem on %s is not writable",
4131 devid, rcu_str_deref(dev->name));
4132 ret = -EROFS;
4133 goto out;
4134 }
4135
4136 mutex_lock(&fs_info->scrub_lock);
4137 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
4138 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
4139 mutex_unlock(&fs_info->scrub_lock);
4140 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4141 ret = -EIO;
4142 goto out;
4143 }
4144
4145 down_read(&fs_info->dev_replace.rwsem);
4146 if (dev->scrub_ctx ||
4147 (!is_dev_replace &&
4148 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
4149 up_read(&fs_info->dev_replace.rwsem);
4150 mutex_unlock(&fs_info->scrub_lock);
4151 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4152 ret = -EINPROGRESS;
4153 goto out;
4154 }
4155 up_read(&fs_info->dev_replace.rwsem);
4156
4157 sctx->readonly = readonly;
4158 dev->scrub_ctx = sctx;
4159 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4160
4161
4162
4163
4164
4165 __scrub_blocked_if_needed(fs_info);
4166 atomic_inc(&fs_info->scrubs_running);
4167 mutex_unlock(&fs_info->scrub_lock);
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178 nofs_flag = memalloc_nofs_save();
4179 if (!is_dev_replace) {
4180 btrfs_info(fs_info, "scrub: started on devid %llu", devid);
4181
4182
4183
4184
4185 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4186 ret = scrub_supers(sctx, dev);
4187 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4188 }
4189
4190 if (!ret)
4191 ret = scrub_enumerate_chunks(sctx, dev, start, end);
4192 memalloc_nofs_restore(nofs_flag);
4193
4194 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
4195 atomic_dec(&fs_info->scrubs_running);
4196 wake_up(&fs_info->scrub_pause_wait);
4197
4198 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
4199
4200 if (progress)
4201 memcpy(progress, &sctx->stat, sizeof(*progress));
4202
4203 if (!is_dev_replace)
4204 btrfs_info(fs_info, "scrub: %s on devid %llu with status: %d",
4205 ret ? "not finished" : "finished", devid, ret);
4206
4207 mutex_lock(&fs_info->scrub_lock);
4208 dev->scrub_ctx = NULL;
4209 mutex_unlock(&fs_info->scrub_lock);
4210
4211 scrub_workers_put(fs_info);
4212 scrub_put_ctx(sctx);
4213
4214 return ret;
4215out:
4216 scrub_workers_put(fs_info);
4217out_free_ctx:
4218 scrub_free_ctx(sctx);
4219
4220 return ret;
4221}
4222
4223void btrfs_scrub_pause(struct btrfs_fs_info *fs_info)
4224{
4225 mutex_lock(&fs_info->scrub_lock);
4226 atomic_inc(&fs_info->scrub_pause_req);
4227 while (atomic_read(&fs_info->scrubs_paused) !=
4228 atomic_read(&fs_info->scrubs_running)) {
4229 mutex_unlock(&fs_info->scrub_lock);
4230 wait_event(fs_info->scrub_pause_wait,
4231 atomic_read(&fs_info->scrubs_paused) ==
4232 atomic_read(&fs_info->scrubs_running));
4233 mutex_lock(&fs_info->scrub_lock);
4234 }
4235 mutex_unlock(&fs_info->scrub_lock);
4236}
4237
4238void btrfs_scrub_continue(struct btrfs_fs_info *fs_info)
4239{
4240 atomic_dec(&fs_info->scrub_pause_req);
4241 wake_up(&fs_info->scrub_pause_wait);
4242}
4243
4244int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
4245{
4246 mutex_lock(&fs_info->scrub_lock);
4247 if (!atomic_read(&fs_info->scrubs_running)) {
4248 mutex_unlock(&fs_info->scrub_lock);
4249 return -ENOTCONN;
4250 }
4251
4252 atomic_inc(&fs_info->scrub_cancel_req);
4253 while (atomic_read(&fs_info->scrubs_running)) {
4254 mutex_unlock(&fs_info->scrub_lock);
4255 wait_event(fs_info->scrub_pause_wait,
4256 atomic_read(&fs_info->scrubs_running) == 0);
4257 mutex_lock(&fs_info->scrub_lock);
4258 }
4259 atomic_dec(&fs_info->scrub_cancel_req);
4260 mutex_unlock(&fs_info->scrub_lock);
4261
4262 return 0;
4263}
4264
4265int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
4266{
4267 struct btrfs_fs_info *fs_info = dev->fs_info;
4268 struct scrub_ctx *sctx;
4269
4270 mutex_lock(&fs_info->scrub_lock);
4271 sctx = dev->scrub_ctx;
4272 if (!sctx) {
4273 mutex_unlock(&fs_info->scrub_lock);
4274 return -ENOTCONN;
4275 }
4276 atomic_inc(&sctx->cancel_req);
4277 while (dev->scrub_ctx) {
4278 mutex_unlock(&fs_info->scrub_lock);
4279 wait_event(fs_info->scrub_pause_wait,
4280 dev->scrub_ctx == NULL);
4281 mutex_lock(&fs_info->scrub_lock);
4282 }
4283 mutex_unlock(&fs_info->scrub_lock);
4284
4285 return 0;
4286}
4287
4288int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
4289 struct btrfs_scrub_progress *progress)
4290{
4291 struct btrfs_dev_lookup_args args = { .devid = devid };
4292 struct btrfs_device *dev;
4293 struct scrub_ctx *sctx = NULL;
4294
4295 mutex_lock(&fs_info->fs_devices->device_list_mutex);
4296 dev = btrfs_find_device(fs_info->fs_devices, &args);
4297 if (dev)
4298 sctx = dev->scrub_ctx;
4299 if (sctx)
4300 memcpy(progress, &sctx->stat, sizeof(*progress));
4301 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
4302
4303 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4304}
4305
4306static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4307 u64 extent_logical, u32 extent_len,
4308 u64 *extent_physical,
4309 struct btrfs_device **extent_dev,
4310 int *extent_mirror_num)
4311{
4312 u64 mapped_length;
4313 struct btrfs_io_context *bioc = NULL;
4314 int ret;
4315
4316 mapped_length = extent_len;
4317 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
4318 &mapped_length, &bioc, 0);
4319 if (ret || !bioc || mapped_length < extent_len ||
4320 !bioc->stripes[0].dev->bdev) {
4321 btrfs_put_bioc(bioc);
4322 return;
4323 }
4324
4325 *extent_physical = bioc->stripes[0].physical;
4326 *extent_mirror_num = bioc->mirror_num;
4327 *extent_dev = bioc->stripes[0].dev;
4328 btrfs_put_bioc(bioc);
4329}
4330