1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_recover {
67 atomic_t refs;
68 struct btrfs_bio *bbio;
69 u64 map_length;
70};
71
72struct scrub_page {
73 struct scrub_block *sblock;
74 struct page *page;
75 struct btrfs_device *dev;
76 struct list_head list;
77 u64 flags;
78 u64 generation;
79 u64 logical;
80 u64 physical;
81 u64 physical_for_dev_replace;
82 atomic_t refs;
83 struct {
84 unsigned int mirror_num:8;
85 unsigned int have_csum:1;
86 unsigned int io_error:1;
87 };
88 u8 csum[BTRFS_CSUM_SIZE];
89
90 struct scrub_recover *recover;
91};
92
93struct scrub_bio {
94 int index;
95 struct scrub_ctx *sctx;
96 struct btrfs_device *dev;
97 struct bio *bio;
98 int err;
99 u64 logical;
100 u64 physical;
101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
102 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
103#else
104 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
105#endif
106 int page_count;
107 int next_free;
108 struct btrfs_work work;
109};
110
111struct scrub_block {
112 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
113 int page_count;
114 atomic_t outstanding_pages;
115 atomic_t refs;
116 struct scrub_ctx *sctx;
117 struct scrub_parity *sparity;
118 struct {
119 unsigned int header_error:1;
120 unsigned int checksum_error:1;
121 unsigned int no_io_error_seen:1;
122 unsigned int generation_error:1;
123
124
125
126 unsigned int data_corrected:1;
127 };
128 struct btrfs_work work;
129};
130
131
132struct scrub_parity {
133 struct scrub_ctx *sctx;
134
135 struct btrfs_device *scrub_dev;
136
137 u64 logic_start;
138
139 u64 logic_end;
140
141 int nsectors;
142
143 int stripe_len;
144
145 atomic_t refs;
146
147 struct list_head spages;
148
149
150 struct btrfs_work work;
151
152
153 unsigned long *dbitmap;
154
155
156
157
158
159 unsigned long *ebitmap;
160
161 unsigned long bitmap[0];
162};
163
164struct scrub_wr_ctx {
165 struct scrub_bio *wr_curr_bio;
166 struct btrfs_device *tgtdev;
167 int pages_per_wr_bio;
168 atomic_t flush_all_writes;
169 struct mutex wr_lock;
170};
171
172struct scrub_ctx {
173 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
174 struct btrfs_root *dev_root;
175 int first_free;
176 int curr;
177 atomic_t bios_in_flight;
178 atomic_t workers_pending;
179 spinlock_t list_lock;
180 wait_queue_head_t list_wait;
181 u16 csum_size;
182 struct list_head csum_list;
183 atomic_t cancel_req;
184 int readonly;
185 int pages_per_rd_bio;
186 u32 sectorsize;
187 u32 nodesize;
188
189 int is_dev_replace;
190 struct scrub_wr_ctx wr_ctx;
191
192
193
194
195 struct btrfs_scrub_progress stat;
196 spinlock_t stat_lock;
197
198
199
200
201
202
203
204
205 atomic_t refs;
206};
207
208struct scrub_fixup_nodatasum {
209 struct scrub_ctx *sctx;
210 struct btrfs_device *dev;
211 u64 logical;
212 struct btrfs_root *root;
213 struct btrfs_work work;
214 int mirror_num;
215};
216
217struct scrub_nocow_inode {
218 u64 inum;
219 u64 offset;
220 u64 root;
221 struct list_head list;
222};
223
224struct scrub_copy_nocow_ctx {
225 struct scrub_ctx *sctx;
226 u64 logical;
227 u64 len;
228 int mirror_num;
229 u64 physical_for_dev_replace;
230 struct list_head inodes;
231 struct btrfs_work work;
232};
233
234struct scrub_warning {
235 struct btrfs_path *path;
236 u64 extent_item_size;
237 const char *errstr;
238 sector_t sector;
239 u64 logical;
240 struct btrfs_device *dev;
241};
242
243static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
244static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
245static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
246static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
247static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
248static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
249 struct scrub_block *sblocks_for_recheck);
250static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
251 struct scrub_block *sblock,
252 int retry_failed_mirror);
253static void scrub_recheck_block_checksum(struct scrub_block *sblock);
254static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
255 struct scrub_block *sblock_good);
256static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
257 struct scrub_block *sblock_good,
258 int page_num, int force_write);
259static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
260static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
261 int page_num);
262static int scrub_checksum_data(struct scrub_block *sblock);
263static int scrub_checksum_tree_block(struct scrub_block *sblock);
264static int scrub_checksum_super(struct scrub_block *sblock);
265static void scrub_block_get(struct scrub_block *sblock);
266static void scrub_block_put(struct scrub_block *sblock);
267static void scrub_page_get(struct scrub_page *spage);
268static void scrub_page_put(struct scrub_page *spage);
269static void scrub_parity_get(struct scrub_parity *sparity);
270static void scrub_parity_put(struct scrub_parity *sparity);
271static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
272 struct scrub_page *spage);
273static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
274 u64 physical, struct btrfs_device *dev, u64 flags,
275 u64 gen, int mirror_num, u8 *csum, int force,
276 u64 physical_for_dev_replace);
277static void scrub_bio_end_io(struct bio *bio);
278static void scrub_bio_end_io_worker(struct btrfs_work *work);
279static void scrub_block_complete(struct scrub_block *sblock);
280static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
281 u64 extent_logical, u64 extent_len,
282 u64 *extent_physical,
283 struct btrfs_device **extent_dev,
284 int *extent_mirror_num);
285static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
286 struct scrub_wr_ctx *wr_ctx,
287 struct btrfs_fs_info *fs_info,
288 struct btrfs_device *dev,
289 int is_dev_replace);
290static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
291static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
292 struct scrub_page *spage);
293static void scrub_wr_submit(struct scrub_ctx *sctx);
294static void scrub_wr_bio_end_io(struct bio *bio);
295static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
296static int write_page_nocow(struct scrub_ctx *sctx,
297 u64 physical_for_dev_replace, struct page *page);
298static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
299 struct scrub_copy_nocow_ctx *ctx);
300static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
301 int mirror_num, u64 physical_for_dev_replace);
302static void copy_nocow_pages_worker(struct btrfs_work *work);
303static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
305static void scrub_put_ctx(struct scrub_ctx *sctx);
306
307
308static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
309{
310 atomic_inc(&sctx->refs);
311 atomic_inc(&sctx->bios_in_flight);
312}
313
314static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
315{
316 atomic_dec(&sctx->bios_in_flight);
317 wake_up(&sctx->list_wait);
318 scrub_put_ctx(sctx);
319}
320
321static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
322{
323 while (atomic_read(&fs_info->scrub_pause_req)) {
324 mutex_unlock(&fs_info->scrub_lock);
325 wait_event(fs_info->scrub_pause_wait,
326 atomic_read(&fs_info->scrub_pause_req) == 0);
327 mutex_lock(&fs_info->scrub_lock);
328 }
329}
330
331static void scrub_pause_on(struct btrfs_fs_info *fs_info)
332{
333 atomic_inc(&fs_info->scrubs_paused);
334 wake_up(&fs_info->scrub_pause_wait);
335}
336
337static void scrub_pause_off(struct btrfs_fs_info *fs_info)
338{
339 mutex_lock(&fs_info->scrub_lock);
340 __scrub_blocked_if_needed(fs_info);
341 atomic_dec(&fs_info->scrubs_paused);
342 mutex_unlock(&fs_info->scrub_lock);
343
344 wake_up(&fs_info->scrub_pause_wait);
345}
346
347static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
348{
349 scrub_pause_on(fs_info);
350 scrub_pause_off(fs_info);
351}
352
353
354
355
356
357static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
358{
359 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
360
361 atomic_inc(&sctx->refs);
362
363
364
365
366
367
368
369
370
371 mutex_lock(&fs_info->scrub_lock);
372 atomic_inc(&fs_info->scrubs_running);
373 atomic_inc(&fs_info->scrubs_paused);
374 mutex_unlock(&fs_info->scrub_lock);
375
376
377
378
379
380
381
382
383 wake_up(&fs_info->scrub_pause_wait);
384
385 atomic_inc(&sctx->workers_pending);
386}
387
388
389static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
390{
391 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
392
393
394
395
396
397 mutex_lock(&fs_info->scrub_lock);
398 atomic_dec(&fs_info->scrubs_running);
399 atomic_dec(&fs_info->scrubs_paused);
400 mutex_unlock(&fs_info->scrub_lock);
401 atomic_dec(&sctx->workers_pending);
402 wake_up(&fs_info->scrub_pause_wait);
403 wake_up(&sctx->list_wait);
404 scrub_put_ctx(sctx);
405}
406
407static void scrub_free_csums(struct scrub_ctx *sctx)
408{
409 while (!list_empty(&sctx->csum_list)) {
410 struct btrfs_ordered_sum *sum;
411 sum = list_first_entry(&sctx->csum_list,
412 struct btrfs_ordered_sum, list);
413 list_del(&sum->list);
414 kfree(sum);
415 }
416}
417
418static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
419{
420 int i;
421
422 if (!sctx)
423 return;
424
425 scrub_free_wr_ctx(&sctx->wr_ctx);
426
427
428 if (sctx->curr != -1) {
429 struct scrub_bio *sbio = sctx->bios[sctx->curr];
430
431 for (i = 0; i < sbio->page_count; i++) {
432 WARN_ON(!sbio->pagev[i]->page);
433 scrub_block_put(sbio->pagev[i]->sblock);
434 }
435 bio_put(sbio->bio);
436 }
437
438 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
439 struct scrub_bio *sbio = sctx->bios[i];
440
441 if (!sbio)
442 break;
443 kfree(sbio);
444 }
445
446 scrub_free_csums(sctx);
447 kfree(sctx);
448}
449
450static void scrub_put_ctx(struct scrub_ctx *sctx)
451{
452 if (atomic_dec_and_test(&sctx->refs))
453 scrub_free_ctx(sctx);
454}
455
456static noinline_for_stack
457struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
458{
459 struct scrub_ctx *sctx;
460 int i;
461 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
462 int ret;
463
464 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
465 if (!sctx)
466 goto nomem;
467 atomic_set(&sctx->refs, 1);
468 sctx->is_dev_replace = is_dev_replace;
469 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
470 sctx->curr = -1;
471 sctx->dev_root = dev->dev_root;
472 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
473 struct scrub_bio *sbio;
474
475 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
476 if (!sbio)
477 goto nomem;
478 sctx->bios[i] = sbio;
479
480 sbio->index = i;
481 sbio->sctx = sctx;
482 sbio->page_count = 0;
483 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
484 scrub_bio_end_io_worker, NULL, NULL);
485
486 if (i != SCRUB_BIOS_PER_SCTX - 1)
487 sctx->bios[i]->next_free = i + 1;
488 else
489 sctx->bios[i]->next_free = -1;
490 }
491 sctx->first_free = 0;
492 sctx->nodesize = dev->dev_root->nodesize;
493 sctx->sectorsize = dev->dev_root->sectorsize;
494 atomic_set(&sctx->bios_in_flight, 0);
495 atomic_set(&sctx->workers_pending, 0);
496 atomic_set(&sctx->cancel_req, 0);
497 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
498 INIT_LIST_HEAD(&sctx->csum_list);
499
500 spin_lock_init(&sctx->list_lock);
501 spin_lock_init(&sctx->stat_lock);
502 init_waitqueue_head(&sctx->list_wait);
503
504 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
505 fs_info->dev_replace.tgtdev, is_dev_replace);
506 if (ret) {
507 scrub_free_ctx(sctx);
508 return ERR_PTR(ret);
509 }
510 return sctx;
511
512nomem:
513 scrub_free_ctx(sctx);
514 return ERR_PTR(-ENOMEM);
515}
516
517static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
518 void *warn_ctx)
519{
520 u64 isize;
521 u32 nlink;
522 int ret;
523 int i;
524 struct extent_buffer *eb;
525 struct btrfs_inode_item *inode_item;
526 struct scrub_warning *swarn = warn_ctx;
527 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
528 struct inode_fs_paths *ipath = NULL;
529 struct btrfs_root *local_root;
530 struct btrfs_key root_key;
531 struct btrfs_key key;
532
533 root_key.objectid = root;
534 root_key.type = BTRFS_ROOT_ITEM_KEY;
535 root_key.offset = (u64)-1;
536 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
537 if (IS_ERR(local_root)) {
538 ret = PTR_ERR(local_root);
539 goto err;
540 }
541
542
543
544
545 key.objectid = inum;
546 key.type = BTRFS_INODE_ITEM_KEY;
547 key.offset = 0;
548
549 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
550 if (ret) {
551 btrfs_release_path(swarn->path);
552 goto err;
553 }
554
555 eb = swarn->path->nodes[0];
556 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
557 struct btrfs_inode_item);
558 isize = btrfs_inode_size(eb, inode_item);
559 nlink = btrfs_inode_nlink(eb, inode_item);
560 btrfs_release_path(swarn->path);
561
562 ipath = init_ipath(4096, local_root, swarn->path);
563 if (IS_ERR(ipath)) {
564 ret = PTR_ERR(ipath);
565 ipath = NULL;
566 goto err;
567 }
568 ret = paths_from_inode(inum, ipath);
569
570 if (ret < 0)
571 goto err;
572
573
574
575
576
577 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
578 btrfs_warn_in_rcu(fs_info,
579 "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
580 swarn->errstr, swarn->logical,
581 rcu_str_deref(swarn->dev->name),
582 (unsigned long long)swarn->sector,
583 root, inum, offset,
584 min(isize - offset, (u64)PAGE_SIZE), nlink,
585 (char *)(unsigned long)ipath->fspath->val[i]);
586
587 free_ipath(ipath);
588 return 0;
589
590err:
591 btrfs_warn_in_rcu(fs_info,
592 "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d",
593 swarn->errstr, swarn->logical,
594 rcu_str_deref(swarn->dev->name),
595 (unsigned long long)swarn->sector,
596 root, inum, offset, ret);
597
598 free_ipath(ipath);
599 return 0;
600}
601
602static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
603{
604 struct btrfs_device *dev;
605 struct btrfs_fs_info *fs_info;
606 struct btrfs_path *path;
607 struct btrfs_key found_key;
608 struct extent_buffer *eb;
609 struct btrfs_extent_item *ei;
610 struct scrub_warning swarn;
611 unsigned long ptr = 0;
612 u64 extent_item_pos;
613 u64 flags = 0;
614 u64 ref_root;
615 u32 item_size;
616 u8 ref_level = 0;
617 int ret;
618
619 WARN_ON(sblock->page_count < 1);
620 dev = sblock->pagev[0]->dev;
621 fs_info = sblock->sctx->dev_root->fs_info;
622
623 path = btrfs_alloc_path();
624 if (!path)
625 return;
626
627 swarn.sector = (sblock->pagev[0]->physical) >> 9;
628 swarn.logical = sblock->pagev[0]->logical;
629 swarn.errstr = errstr;
630 swarn.dev = NULL;
631
632 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
633 &flags);
634 if (ret < 0)
635 goto out;
636
637 extent_item_pos = swarn.logical - found_key.objectid;
638 swarn.extent_item_size = found_key.offset;
639
640 eb = path->nodes[0];
641 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
642 item_size = btrfs_item_size_nr(eb, path->slots[0]);
643
644 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
645 do {
646 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
647 item_size, &ref_root,
648 &ref_level);
649 btrfs_warn_in_rcu(fs_info,
650 "%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu",
651 errstr, swarn.logical,
652 rcu_str_deref(dev->name),
653 (unsigned long long)swarn.sector,
654 ref_level ? "node" : "leaf",
655 ret < 0 ? -1 : ref_level,
656 ret < 0 ? -1 : ref_root);
657 } while (ret != 1);
658 btrfs_release_path(path);
659 } else {
660 btrfs_release_path(path);
661 swarn.path = path;
662 swarn.dev = dev;
663 iterate_extent_inodes(fs_info, found_key.objectid,
664 extent_item_pos, 1,
665 scrub_print_warning_inode, &swarn);
666 }
667
668out:
669 btrfs_free_path(path);
670}
671
672static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
673{
674 struct page *page = NULL;
675 unsigned long index;
676 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
677 int ret;
678 int corrected = 0;
679 struct btrfs_key key;
680 struct inode *inode = NULL;
681 struct btrfs_fs_info *fs_info;
682 u64 end = offset + PAGE_SIZE - 1;
683 struct btrfs_root *local_root;
684 int srcu_index;
685
686 key.objectid = root;
687 key.type = BTRFS_ROOT_ITEM_KEY;
688 key.offset = (u64)-1;
689
690 fs_info = fixup->root->fs_info;
691 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
692
693 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
694 if (IS_ERR(local_root)) {
695 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
696 return PTR_ERR(local_root);
697 }
698
699 key.type = BTRFS_INODE_ITEM_KEY;
700 key.objectid = inum;
701 key.offset = 0;
702 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
703 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
704 if (IS_ERR(inode))
705 return PTR_ERR(inode);
706
707 index = offset >> PAGE_SHIFT;
708
709 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
710 if (!page) {
711 ret = -ENOMEM;
712 goto out;
713 }
714
715 if (PageUptodate(page)) {
716 if (PageDirty(page)) {
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733 ret = -EIO;
734 goto out;
735 }
736 ret = repair_io_failure(inode, offset, PAGE_SIZE,
737 fixup->logical, page,
738 offset - page_offset(page),
739 fixup->mirror_num);
740 unlock_page(page);
741 corrected = !ret;
742 } else {
743
744
745
746
747
748 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
749 EXTENT_DAMAGED);
750 if (ret) {
751
752 WARN_ON(ret > 0);
753 if (ret > 0)
754 ret = -EFAULT;
755 goto out;
756 }
757
758 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
759 btrfs_get_extent,
760 fixup->mirror_num);
761 wait_on_page_locked(page);
762
763 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
764 end, EXTENT_DAMAGED, 0, NULL);
765 if (!corrected)
766 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
767 EXTENT_DAMAGED);
768 }
769
770out:
771 if (page)
772 put_page(page);
773
774 iput(inode);
775
776 if (ret < 0)
777 return ret;
778
779 if (ret == 0 && corrected) {
780
781
782
783
784 return 1;
785 }
786
787 return -EIO;
788}
789
790static void scrub_fixup_nodatasum(struct btrfs_work *work)
791{
792 int ret;
793 struct scrub_fixup_nodatasum *fixup;
794 struct scrub_ctx *sctx;
795 struct btrfs_trans_handle *trans = NULL;
796 struct btrfs_path *path;
797 int uncorrectable = 0;
798
799 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
800 sctx = fixup->sctx;
801
802 path = btrfs_alloc_path();
803 if (!path) {
804 spin_lock(&sctx->stat_lock);
805 ++sctx->stat.malloc_errors;
806 spin_unlock(&sctx->stat_lock);
807 uncorrectable = 1;
808 goto out;
809 }
810
811 trans = btrfs_join_transaction(fixup->root);
812 if (IS_ERR(trans)) {
813 uncorrectable = 1;
814 goto out;
815 }
816
817
818
819
820
821
822
823
824
825
826 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
827 path, scrub_fixup_readpage,
828 fixup);
829 if (ret < 0) {
830 uncorrectable = 1;
831 goto out;
832 }
833 WARN_ON(ret != 1);
834
835 spin_lock(&sctx->stat_lock);
836 ++sctx->stat.corrected_errors;
837 spin_unlock(&sctx->stat_lock);
838
839out:
840 if (trans && !IS_ERR(trans))
841 btrfs_end_transaction(trans, fixup->root);
842 if (uncorrectable) {
843 spin_lock(&sctx->stat_lock);
844 ++sctx->stat.uncorrectable_errors;
845 spin_unlock(&sctx->stat_lock);
846 btrfs_dev_replace_stats_inc(
847 &sctx->dev_root->fs_info->dev_replace.
848 num_uncorrectable_read_errors);
849 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
850 "unable to fixup (nodatasum) error at logical %llu on dev %s",
851 fixup->logical, rcu_str_deref(fixup->dev->name));
852 }
853
854 btrfs_free_path(path);
855 kfree(fixup);
856
857 scrub_pending_trans_workers_dec(sctx);
858}
859
860static inline void scrub_get_recover(struct scrub_recover *recover)
861{
862 atomic_inc(&recover->refs);
863}
864
865static inline void scrub_put_recover(struct scrub_recover *recover)
866{
867 if (atomic_dec_and_test(&recover->refs)) {
868 btrfs_put_bbio(recover->bbio);
869 kfree(recover);
870 }
871}
872
873
874
875
876
877
878
879
880
881static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
882{
883 struct scrub_ctx *sctx = sblock_to_check->sctx;
884 struct btrfs_device *dev;
885 struct btrfs_fs_info *fs_info;
886 u64 length;
887 u64 logical;
888 unsigned int failed_mirror_index;
889 unsigned int is_metadata;
890 unsigned int have_csum;
891 struct scrub_block *sblocks_for_recheck;
892 struct scrub_block *sblock_bad;
893 int ret;
894 int mirror_index;
895 int page_num;
896 int success;
897 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
898 DEFAULT_RATELIMIT_BURST);
899
900 BUG_ON(sblock_to_check->page_count < 1);
901 fs_info = sctx->dev_root->fs_info;
902 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
903
904
905
906
907
908 spin_lock(&sctx->stat_lock);
909 ++sctx->stat.super_errors;
910 spin_unlock(&sctx->stat_lock);
911 return 0;
912 }
913 length = sblock_to_check->page_count * PAGE_SIZE;
914 logical = sblock_to_check->pagev[0]->logical;
915 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
916 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
917 is_metadata = !(sblock_to_check->pagev[0]->flags &
918 BTRFS_EXTENT_FLAG_DATA);
919 have_csum = sblock_to_check->pagev[0]->have_csum;
920 dev = sblock_to_check->pagev[0]->dev;
921
922 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
923 sblocks_for_recheck = NULL;
924 goto nodatasum_case;
925 }
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
957 sizeof(*sblocks_for_recheck), GFP_NOFS);
958 if (!sblocks_for_recheck) {
959 spin_lock(&sctx->stat_lock);
960 sctx->stat.malloc_errors++;
961 sctx->stat.read_errors++;
962 sctx->stat.uncorrectable_errors++;
963 spin_unlock(&sctx->stat_lock);
964 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
965 goto out;
966 }
967
968
969 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
970 if (ret) {
971 spin_lock(&sctx->stat_lock);
972 sctx->stat.read_errors++;
973 sctx->stat.uncorrectable_errors++;
974 spin_unlock(&sctx->stat_lock);
975 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
976 goto out;
977 }
978 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
979 sblock_bad = sblocks_for_recheck + failed_mirror_index;
980
981
982 scrub_recheck_block(fs_info, sblock_bad, 1);
983
984 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
985 sblock_bad->no_io_error_seen) {
986
987
988
989
990
991
992
993
994 spin_lock(&sctx->stat_lock);
995 sctx->stat.unverified_errors++;
996 sblock_to_check->data_corrected = 1;
997 spin_unlock(&sctx->stat_lock);
998
999 if (sctx->is_dev_replace)
1000 scrub_write_block_to_dev_replace(sblock_bad);
1001 goto out;
1002 }
1003
1004 if (!sblock_bad->no_io_error_seen) {
1005 spin_lock(&sctx->stat_lock);
1006 sctx->stat.read_errors++;
1007 spin_unlock(&sctx->stat_lock);
1008 if (__ratelimit(&_rs))
1009 scrub_print_warning("i/o error", sblock_to_check);
1010 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
1011 } else if (sblock_bad->checksum_error) {
1012 spin_lock(&sctx->stat_lock);
1013 sctx->stat.csum_errors++;
1014 spin_unlock(&sctx->stat_lock);
1015 if (__ratelimit(&_rs))
1016 scrub_print_warning("checksum error", sblock_to_check);
1017 btrfs_dev_stat_inc_and_print(dev,
1018 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1019 } else if (sblock_bad->header_error) {
1020 spin_lock(&sctx->stat_lock);
1021 sctx->stat.verify_errors++;
1022 spin_unlock(&sctx->stat_lock);
1023 if (__ratelimit(&_rs))
1024 scrub_print_warning("checksum/header error",
1025 sblock_to_check);
1026 if (sblock_bad->generation_error)
1027 btrfs_dev_stat_inc_and_print(dev,
1028 BTRFS_DEV_STAT_GENERATION_ERRS);
1029 else
1030 btrfs_dev_stat_inc_and_print(dev,
1031 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1032 }
1033
1034 if (sctx->readonly) {
1035 ASSERT(!sctx->is_dev_replace);
1036 goto out;
1037 }
1038
1039 if (!is_metadata && !have_csum) {
1040 struct scrub_fixup_nodatasum *fixup_nodatasum;
1041
1042 WARN_ON(sctx->is_dev_replace);
1043
1044nodatasum_case:
1045
1046
1047
1048
1049
1050
1051
1052
1053 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
1054 if (!fixup_nodatasum)
1055 goto did_not_correct_error;
1056 fixup_nodatasum->sctx = sctx;
1057 fixup_nodatasum->dev = dev;
1058 fixup_nodatasum->logical = logical;
1059 fixup_nodatasum->root = fs_info->extent_root;
1060 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1061 scrub_pending_trans_workers_inc(sctx);
1062 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1063 scrub_fixup_nodatasum, NULL, NULL);
1064 btrfs_queue_work(fs_info->scrub_workers,
1065 &fixup_nodatasum->work);
1066 goto out;
1067 }
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084 for (mirror_index = 0;
1085 mirror_index < BTRFS_MAX_MIRRORS &&
1086 sblocks_for_recheck[mirror_index].page_count > 0;
1087 mirror_index++) {
1088 struct scrub_block *sblock_other;
1089
1090 if (mirror_index == failed_mirror_index)
1091 continue;
1092 sblock_other = sblocks_for_recheck + mirror_index;
1093
1094
1095 scrub_recheck_block(fs_info, sblock_other, 0);
1096
1097 if (!sblock_other->header_error &&
1098 !sblock_other->checksum_error &&
1099 sblock_other->no_io_error_seen) {
1100 if (sctx->is_dev_replace) {
1101 scrub_write_block_to_dev_replace(sblock_other);
1102 goto corrected_error;
1103 } else {
1104 ret = scrub_repair_block_from_good_copy(
1105 sblock_bad, sblock_other);
1106 if (!ret)
1107 goto corrected_error;
1108 }
1109 }
1110 }
1111
1112 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1113 goto did_not_correct_error;
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139 success = 1;
1140 for (page_num = 0; page_num < sblock_bad->page_count;
1141 page_num++) {
1142 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1143 struct scrub_block *sblock_other = NULL;
1144
1145
1146 if (!page_bad->io_error && !sctx->is_dev_replace)
1147 continue;
1148
1149
1150 if (page_bad->io_error) {
1151 for (mirror_index = 0;
1152 mirror_index < BTRFS_MAX_MIRRORS &&
1153 sblocks_for_recheck[mirror_index].page_count > 0;
1154 mirror_index++) {
1155 if (!sblocks_for_recheck[mirror_index].
1156 pagev[page_num]->io_error) {
1157 sblock_other = sblocks_for_recheck +
1158 mirror_index;
1159 break;
1160 }
1161 }
1162 if (!sblock_other)
1163 success = 0;
1164 }
1165
1166 if (sctx->is_dev_replace) {
1167
1168
1169
1170
1171
1172
1173
1174 if (!sblock_other)
1175 sblock_other = sblock_bad;
1176
1177 if (scrub_write_page_to_dev_replace(sblock_other,
1178 page_num) != 0) {
1179 btrfs_dev_replace_stats_inc(
1180 &sctx->dev_root->
1181 fs_info->dev_replace.
1182 num_write_errors);
1183 success = 0;
1184 }
1185 } else if (sblock_other) {
1186 ret = scrub_repair_page_from_good_copy(sblock_bad,
1187 sblock_other,
1188 page_num, 0);
1189 if (0 == ret)
1190 page_bad->io_error = 0;
1191 else
1192 success = 0;
1193 }
1194 }
1195
1196 if (success && !sctx->is_dev_replace) {
1197 if (is_metadata || have_csum) {
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207 scrub_recheck_block(fs_info, sblock_bad, 1);
1208 if (!sblock_bad->header_error &&
1209 !sblock_bad->checksum_error &&
1210 sblock_bad->no_io_error_seen)
1211 goto corrected_error;
1212 else
1213 goto did_not_correct_error;
1214 } else {
1215corrected_error:
1216 spin_lock(&sctx->stat_lock);
1217 sctx->stat.corrected_errors++;
1218 sblock_to_check->data_corrected = 1;
1219 spin_unlock(&sctx->stat_lock);
1220 btrfs_err_rl_in_rcu(fs_info,
1221 "fixed up error at logical %llu on dev %s",
1222 logical, rcu_str_deref(dev->name));
1223 }
1224 } else {
1225did_not_correct_error:
1226 spin_lock(&sctx->stat_lock);
1227 sctx->stat.uncorrectable_errors++;
1228 spin_unlock(&sctx->stat_lock);
1229 btrfs_err_rl_in_rcu(fs_info,
1230 "unable to fixup (regular) error at logical %llu on dev %s",
1231 logical, rcu_str_deref(dev->name));
1232 }
1233
1234out:
1235 if (sblocks_for_recheck) {
1236 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1237 mirror_index++) {
1238 struct scrub_block *sblock = sblocks_for_recheck +
1239 mirror_index;
1240 struct scrub_recover *recover;
1241 int page_index;
1242
1243 for (page_index = 0; page_index < sblock->page_count;
1244 page_index++) {
1245 sblock->pagev[page_index]->sblock = NULL;
1246 recover = sblock->pagev[page_index]->recover;
1247 if (recover) {
1248 scrub_put_recover(recover);
1249 sblock->pagev[page_index]->recover =
1250 NULL;
1251 }
1252 scrub_page_put(sblock->pagev[page_index]);
1253 }
1254 }
1255 kfree(sblocks_for_recheck);
1256 }
1257
1258 return 0;
1259}
1260
1261static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1262{
1263 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1264 return 2;
1265 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1266 return 3;
1267 else
1268 return (int)bbio->num_stripes;
1269}
1270
1271static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1272 u64 *raid_map,
1273 u64 mapped_length,
1274 int nstripes, int mirror,
1275 int *stripe_index,
1276 u64 *stripe_offset)
1277{
1278 int i;
1279
1280 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1281
1282 for (i = 0; i < nstripes; i++) {
1283 if (raid_map[i] == RAID6_Q_STRIPE ||
1284 raid_map[i] == RAID5_P_STRIPE)
1285 continue;
1286
1287 if (logical >= raid_map[i] &&
1288 logical < raid_map[i] + mapped_length)
1289 break;
1290 }
1291
1292 *stripe_index = i;
1293 *stripe_offset = logical - raid_map[i];
1294 } else {
1295
1296 *stripe_index = mirror;
1297 *stripe_offset = 0;
1298 }
1299}
1300
1301static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1302 struct scrub_block *sblocks_for_recheck)
1303{
1304 struct scrub_ctx *sctx = original_sblock->sctx;
1305 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1306 u64 length = original_sblock->page_count * PAGE_SIZE;
1307 u64 logical = original_sblock->pagev[0]->logical;
1308 u64 generation = original_sblock->pagev[0]->generation;
1309 u64 flags = original_sblock->pagev[0]->flags;
1310 u64 have_csum = original_sblock->pagev[0]->have_csum;
1311 struct scrub_recover *recover;
1312 struct btrfs_bio *bbio;
1313 u64 sublen;
1314 u64 mapped_length;
1315 u64 stripe_offset;
1316 int stripe_index;
1317 int page_index = 0;
1318 int mirror_index;
1319 int nmirrors;
1320 int ret;
1321
1322
1323
1324
1325
1326
1327
1328 while (length > 0) {
1329 sublen = min_t(u64, length, PAGE_SIZE);
1330 mapped_length = sublen;
1331 bbio = NULL;
1332
1333
1334
1335
1336
1337 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1338 &mapped_length, &bbio, 0, 1);
1339 if (ret || !bbio || mapped_length < sublen) {
1340 btrfs_put_bbio(bbio);
1341 return -EIO;
1342 }
1343
1344 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1345 if (!recover) {
1346 btrfs_put_bbio(bbio);
1347 return -ENOMEM;
1348 }
1349
1350 atomic_set(&recover->refs, 1);
1351 recover->bbio = bbio;
1352 recover->map_length = mapped_length;
1353
1354 BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
1355
1356 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1357
1358 for (mirror_index = 0; mirror_index < nmirrors;
1359 mirror_index++) {
1360 struct scrub_block *sblock;
1361 struct scrub_page *page;
1362
1363 sblock = sblocks_for_recheck + mirror_index;
1364 sblock->sctx = sctx;
1365
1366 page = kzalloc(sizeof(*page), GFP_NOFS);
1367 if (!page) {
1368leave_nomem:
1369 spin_lock(&sctx->stat_lock);
1370 sctx->stat.malloc_errors++;
1371 spin_unlock(&sctx->stat_lock);
1372 scrub_put_recover(recover);
1373 return -ENOMEM;
1374 }
1375 scrub_page_get(page);
1376 sblock->pagev[page_index] = page;
1377 page->sblock = sblock;
1378 page->flags = flags;
1379 page->generation = generation;
1380 page->logical = logical;
1381 page->have_csum = have_csum;
1382 if (have_csum)
1383 memcpy(page->csum,
1384 original_sblock->pagev[0]->csum,
1385 sctx->csum_size);
1386
1387 scrub_stripe_index_and_offset(logical,
1388 bbio->map_type,
1389 bbio->raid_map,
1390 mapped_length,
1391 bbio->num_stripes -
1392 bbio->num_tgtdevs,
1393 mirror_index,
1394 &stripe_index,
1395 &stripe_offset);
1396 page->physical = bbio->stripes[stripe_index].physical +
1397 stripe_offset;
1398 page->dev = bbio->stripes[stripe_index].dev;
1399
1400 BUG_ON(page_index >= original_sblock->page_count);
1401 page->physical_for_dev_replace =
1402 original_sblock->pagev[page_index]->
1403 physical_for_dev_replace;
1404
1405 page->mirror_num = mirror_index + 1;
1406 sblock->page_count++;
1407 page->page = alloc_page(GFP_NOFS);
1408 if (!page->page)
1409 goto leave_nomem;
1410
1411 scrub_get_recover(recover);
1412 page->recover = recover;
1413 }
1414 scrub_put_recover(recover);
1415 length -= sublen;
1416 logical += sublen;
1417 page_index++;
1418 }
1419
1420 return 0;
1421}
1422
1423struct scrub_bio_ret {
1424 struct completion event;
1425 int error;
1426};
1427
1428static void scrub_bio_wait_endio(struct bio *bio)
1429{
1430 struct scrub_bio_ret *ret = bio->bi_private;
1431
1432 ret->error = bio->bi_error;
1433 complete(&ret->event);
1434}
1435
1436static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1437{
1438 return page->recover &&
1439 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
1440}
1441
1442static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1443 struct bio *bio,
1444 struct scrub_page *page)
1445{
1446 struct scrub_bio_ret done;
1447 int ret;
1448
1449 init_completion(&done.event);
1450 done.error = 0;
1451 bio->bi_iter.bi_sector = page->logical >> 9;
1452 bio->bi_private = &done;
1453 bio->bi_end_io = scrub_bio_wait_endio;
1454
1455 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1456 page->recover->map_length,
1457 page->mirror_num, 0);
1458 if (ret)
1459 return ret;
1460
1461 wait_for_completion(&done.event);
1462 if (done.error)
1463 return -EIO;
1464
1465 return 0;
1466}
1467
1468
1469
1470
1471
1472
1473
1474
1475static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1476 struct scrub_block *sblock,
1477 int retry_failed_mirror)
1478{
1479 int page_num;
1480
1481 sblock->no_io_error_seen = 1;
1482
1483 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1484 struct bio *bio;
1485 struct scrub_page *page = sblock->pagev[page_num];
1486
1487 if (page->dev->bdev == NULL) {
1488 page->io_error = 1;
1489 sblock->no_io_error_seen = 0;
1490 continue;
1491 }
1492
1493 WARN_ON(!page->page);
1494 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1495 if (!bio) {
1496 page->io_error = 1;
1497 sblock->no_io_error_seen = 0;
1498 continue;
1499 }
1500 bio->bi_bdev = page->dev->bdev;
1501
1502 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1503 if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
1504 if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
1505 sblock->no_io_error_seen = 0;
1506 } else {
1507 bio->bi_iter.bi_sector = page->physical >> 9;
1508 bio_set_op_attrs(bio, REQ_OP_READ, 0);
1509
1510 if (btrfsic_submit_bio_wait(bio))
1511 sblock->no_io_error_seen = 0;
1512 }
1513
1514 bio_put(bio);
1515 }
1516
1517 if (sblock->no_io_error_seen)
1518 scrub_recheck_block_checksum(sblock);
1519}
1520
1521static inline int scrub_check_fsid(u8 fsid[],
1522 struct scrub_page *spage)
1523{
1524 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1525 int ret;
1526
1527 ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE);
1528 return !ret;
1529}
1530
1531static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1532{
1533 sblock->header_error = 0;
1534 sblock->checksum_error = 0;
1535 sblock->generation_error = 0;
1536
1537 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1538 scrub_checksum_data(sblock);
1539 else
1540 scrub_checksum_tree_block(sblock);
1541}
1542
1543static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1544 struct scrub_block *sblock_good)
1545{
1546 int page_num;
1547 int ret = 0;
1548
1549 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1550 int ret_sub;
1551
1552 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1553 sblock_good,
1554 page_num, 1);
1555 if (ret_sub)
1556 ret = ret_sub;
1557 }
1558
1559 return ret;
1560}
1561
1562static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1563 struct scrub_block *sblock_good,
1564 int page_num, int force_write)
1565{
1566 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1567 struct scrub_page *page_good = sblock_good->pagev[page_num];
1568
1569 BUG_ON(page_bad->page == NULL);
1570 BUG_ON(page_good->page == NULL);
1571 if (force_write || sblock_bad->header_error ||
1572 sblock_bad->checksum_error || page_bad->io_error) {
1573 struct bio *bio;
1574 int ret;
1575
1576 if (!page_bad->dev->bdev) {
1577 btrfs_warn_rl(sblock_bad->sctx->dev_root->fs_info,
1578 "scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
1579 return -EIO;
1580 }
1581
1582 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1583 if (!bio)
1584 return -EIO;
1585 bio->bi_bdev = page_bad->dev->bdev;
1586 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1587 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1588
1589 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1590 if (PAGE_SIZE != ret) {
1591 bio_put(bio);
1592 return -EIO;
1593 }
1594
1595 if (btrfsic_submit_bio_wait(bio)) {
1596 btrfs_dev_stat_inc_and_print(page_bad->dev,
1597 BTRFS_DEV_STAT_WRITE_ERRS);
1598 btrfs_dev_replace_stats_inc(
1599 &sblock_bad->sctx->dev_root->fs_info->
1600 dev_replace.num_write_errors);
1601 bio_put(bio);
1602 return -EIO;
1603 }
1604 bio_put(bio);
1605 }
1606
1607 return 0;
1608}
1609
1610static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1611{
1612 int page_num;
1613
1614
1615
1616
1617
1618 if (sblock->sparity)
1619 return;
1620
1621 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1622 int ret;
1623
1624 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1625 if (ret)
1626 btrfs_dev_replace_stats_inc(
1627 &sblock->sctx->dev_root->fs_info->dev_replace.
1628 num_write_errors);
1629 }
1630}
1631
1632static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1633 int page_num)
1634{
1635 struct scrub_page *spage = sblock->pagev[page_num];
1636
1637 BUG_ON(spage->page == NULL);
1638 if (spage->io_error) {
1639 void *mapped_buffer = kmap_atomic(spage->page);
1640
1641 memset(mapped_buffer, 0, PAGE_SIZE);
1642 flush_dcache_page(spage->page);
1643 kunmap_atomic(mapped_buffer);
1644 }
1645 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1646}
1647
1648static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1649 struct scrub_page *spage)
1650{
1651 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1652 struct scrub_bio *sbio;
1653 int ret;
1654
1655 mutex_lock(&wr_ctx->wr_lock);
1656again:
1657 if (!wr_ctx->wr_curr_bio) {
1658 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1659 GFP_KERNEL);
1660 if (!wr_ctx->wr_curr_bio) {
1661 mutex_unlock(&wr_ctx->wr_lock);
1662 return -ENOMEM;
1663 }
1664 wr_ctx->wr_curr_bio->sctx = sctx;
1665 wr_ctx->wr_curr_bio->page_count = 0;
1666 }
1667 sbio = wr_ctx->wr_curr_bio;
1668 if (sbio->page_count == 0) {
1669 struct bio *bio;
1670
1671 sbio->physical = spage->physical_for_dev_replace;
1672 sbio->logical = spage->logical;
1673 sbio->dev = wr_ctx->tgtdev;
1674 bio = sbio->bio;
1675 if (!bio) {
1676 bio = btrfs_io_bio_alloc(GFP_KERNEL,
1677 wr_ctx->pages_per_wr_bio);
1678 if (!bio) {
1679 mutex_unlock(&wr_ctx->wr_lock);
1680 return -ENOMEM;
1681 }
1682 sbio->bio = bio;
1683 }
1684
1685 bio->bi_private = sbio;
1686 bio->bi_end_io = scrub_wr_bio_end_io;
1687 bio->bi_bdev = sbio->dev->bdev;
1688 bio->bi_iter.bi_sector = sbio->physical >> 9;
1689 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
1690 sbio->err = 0;
1691 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1692 spage->physical_for_dev_replace ||
1693 sbio->logical + sbio->page_count * PAGE_SIZE !=
1694 spage->logical) {
1695 scrub_wr_submit(sctx);
1696 goto again;
1697 }
1698
1699 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1700 if (ret != PAGE_SIZE) {
1701 if (sbio->page_count < 1) {
1702 bio_put(sbio->bio);
1703 sbio->bio = NULL;
1704 mutex_unlock(&wr_ctx->wr_lock);
1705 return -EIO;
1706 }
1707 scrub_wr_submit(sctx);
1708 goto again;
1709 }
1710
1711 sbio->pagev[sbio->page_count] = spage;
1712 scrub_page_get(spage);
1713 sbio->page_count++;
1714 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1715 scrub_wr_submit(sctx);
1716 mutex_unlock(&wr_ctx->wr_lock);
1717
1718 return 0;
1719}
1720
1721static void scrub_wr_submit(struct scrub_ctx *sctx)
1722{
1723 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1724 struct scrub_bio *sbio;
1725
1726 if (!wr_ctx->wr_curr_bio)
1727 return;
1728
1729 sbio = wr_ctx->wr_curr_bio;
1730 wr_ctx->wr_curr_bio = NULL;
1731 WARN_ON(!sbio->bio->bi_bdev);
1732 scrub_pending_bio_inc(sctx);
1733
1734
1735
1736
1737 btrfsic_submit_bio(sbio->bio);
1738}
1739
1740static void scrub_wr_bio_end_io(struct bio *bio)
1741{
1742 struct scrub_bio *sbio = bio->bi_private;
1743 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1744
1745 sbio->err = bio->bi_error;
1746 sbio->bio = bio;
1747
1748 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1749 scrub_wr_bio_end_io_worker, NULL, NULL);
1750 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1751}
1752
1753static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1754{
1755 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1756 struct scrub_ctx *sctx = sbio->sctx;
1757 int i;
1758
1759 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1760 if (sbio->err) {
1761 struct btrfs_dev_replace *dev_replace =
1762 &sbio->sctx->dev_root->fs_info->dev_replace;
1763
1764 for (i = 0; i < sbio->page_count; i++) {
1765 struct scrub_page *spage = sbio->pagev[i];
1766
1767 spage->io_error = 1;
1768 btrfs_dev_replace_stats_inc(&dev_replace->
1769 num_write_errors);
1770 }
1771 }
1772
1773 for (i = 0; i < sbio->page_count; i++)
1774 scrub_page_put(sbio->pagev[i]);
1775
1776 bio_put(sbio->bio);
1777 kfree(sbio);
1778 scrub_pending_bio_dec(sctx);
1779}
1780
1781static int scrub_checksum(struct scrub_block *sblock)
1782{
1783 u64 flags;
1784 int ret;
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794 sblock->header_error = 0;
1795 sblock->generation_error = 0;
1796 sblock->checksum_error = 0;
1797
1798 WARN_ON(sblock->page_count < 1);
1799 flags = sblock->pagev[0]->flags;
1800 ret = 0;
1801 if (flags & BTRFS_EXTENT_FLAG_DATA)
1802 ret = scrub_checksum_data(sblock);
1803 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1804 ret = scrub_checksum_tree_block(sblock);
1805 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1806 (void)scrub_checksum_super(sblock);
1807 else
1808 WARN_ON(1);
1809 if (ret)
1810 scrub_handle_errored_block(sblock);
1811
1812 return ret;
1813}
1814
1815static int scrub_checksum_data(struct scrub_block *sblock)
1816{
1817 struct scrub_ctx *sctx = sblock->sctx;
1818 u8 csum[BTRFS_CSUM_SIZE];
1819 u8 *on_disk_csum;
1820 struct page *page;
1821 void *buffer;
1822 u32 crc = ~(u32)0;
1823 u64 len;
1824 int index;
1825
1826 BUG_ON(sblock->page_count < 1);
1827 if (!sblock->pagev[0]->have_csum)
1828 return 0;
1829
1830 on_disk_csum = sblock->pagev[0]->csum;
1831 page = sblock->pagev[0]->page;
1832 buffer = kmap_atomic(page);
1833
1834 len = sctx->sectorsize;
1835 index = 0;
1836 for (;;) {
1837 u64 l = min_t(u64, len, PAGE_SIZE);
1838
1839 crc = btrfs_csum_data(buffer, crc, l);
1840 kunmap_atomic(buffer);
1841 len -= l;
1842 if (len == 0)
1843 break;
1844 index++;
1845 BUG_ON(index >= sblock->page_count);
1846 BUG_ON(!sblock->pagev[index]->page);
1847 page = sblock->pagev[index]->page;
1848 buffer = kmap_atomic(page);
1849 }
1850
1851 btrfs_csum_final(crc, csum);
1852 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1853 sblock->checksum_error = 1;
1854
1855 return sblock->checksum_error;
1856}
1857
1858static int scrub_checksum_tree_block(struct scrub_block *sblock)
1859{
1860 struct scrub_ctx *sctx = sblock->sctx;
1861 struct btrfs_header *h;
1862 struct btrfs_root *root = sctx->dev_root;
1863 struct btrfs_fs_info *fs_info = root->fs_info;
1864 u8 calculated_csum[BTRFS_CSUM_SIZE];
1865 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1866 struct page *page;
1867 void *mapped_buffer;
1868 u64 mapped_size;
1869 void *p;
1870 u32 crc = ~(u32)0;
1871 u64 len;
1872 int index;
1873
1874 BUG_ON(sblock->page_count < 1);
1875 page = sblock->pagev[0]->page;
1876 mapped_buffer = kmap_atomic(page);
1877 h = (struct btrfs_header *)mapped_buffer;
1878 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1879
1880
1881
1882
1883
1884
1885 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1886 sblock->header_error = 1;
1887
1888 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1889 sblock->header_error = 1;
1890 sblock->generation_error = 1;
1891 }
1892
1893 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1894 sblock->header_error = 1;
1895
1896 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1897 BTRFS_UUID_SIZE))
1898 sblock->header_error = 1;
1899
1900 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1901 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1902 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1903 index = 0;
1904 for (;;) {
1905 u64 l = min_t(u64, len, mapped_size);
1906
1907 crc = btrfs_csum_data(p, crc, l);
1908 kunmap_atomic(mapped_buffer);
1909 len -= l;
1910 if (len == 0)
1911 break;
1912 index++;
1913 BUG_ON(index >= sblock->page_count);
1914 BUG_ON(!sblock->pagev[index]->page);
1915 page = sblock->pagev[index]->page;
1916 mapped_buffer = kmap_atomic(page);
1917 mapped_size = PAGE_SIZE;
1918 p = mapped_buffer;
1919 }
1920
1921 btrfs_csum_final(crc, calculated_csum);
1922 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1923 sblock->checksum_error = 1;
1924
1925 return sblock->header_error || sblock->checksum_error;
1926}
1927
1928static int scrub_checksum_super(struct scrub_block *sblock)
1929{
1930 struct btrfs_super_block *s;
1931 struct scrub_ctx *sctx = sblock->sctx;
1932 u8 calculated_csum[BTRFS_CSUM_SIZE];
1933 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1934 struct page *page;
1935 void *mapped_buffer;
1936 u64 mapped_size;
1937 void *p;
1938 u32 crc = ~(u32)0;
1939 int fail_gen = 0;
1940 int fail_cor = 0;
1941 u64 len;
1942 int index;
1943
1944 BUG_ON(sblock->page_count < 1);
1945 page = sblock->pagev[0]->page;
1946 mapped_buffer = kmap_atomic(page);
1947 s = (struct btrfs_super_block *)mapped_buffer;
1948 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1949
1950 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1951 ++fail_cor;
1952
1953 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1954 ++fail_gen;
1955
1956 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1957 ++fail_cor;
1958
1959 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1960 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1961 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1962 index = 0;
1963 for (;;) {
1964 u64 l = min_t(u64, len, mapped_size);
1965
1966 crc = btrfs_csum_data(p, crc, l);
1967 kunmap_atomic(mapped_buffer);
1968 len -= l;
1969 if (len == 0)
1970 break;
1971 index++;
1972 BUG_ON(index >= sblock->page_count);
1973 BUG_ON(!sblock->pagev[index]->page);
1974 page = sblock->pagev[index]->page;
1975 mapped_buffer = kmap_atomic(page);
1976 mapped_size = PAGE_SIZE;
1977 p = mapped_buffer;
1978 }
1979
1980 btrfs_csum_final(crc, calculated_csum);
1981 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1982 ++fail_cor;
1983
1984 if (fail_cor + fail_gen) {
1985
1986
1987
1988
1989
1990 spin_lock(&sctx->stat_lock);
1991 ++sctx->stat.super_errors;
1992 spin_unlock(&sctx->stat_lock);
1993 if (fail_cor)
1994 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1995 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1996 else
1997 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1998 BTRFS_DEV_STAT_GENERATION_ERRS);
1999 }
2000
2001 return fail_cor + fail_gen;
2002}
2003
2004static void scrub_block_get(struct scrub_block *sblock)
2005{
2006 atomic_inc(&sblock->refs);
2007}
2008
2009static void scrub_block_put(struct scrub_block *sblock)
2010{
2011 if (atomic_dec_and_test(&sblock->refs)) {
2012 int i;
2013
2014 if (sblock->sparity)
2015 scrub_parity_put(sblock->sparity);
2016
2017 for (i = 0; i < sblock->page_count; i++)
2018 scrub_page_put(sblock->pagev[i]);
2019 kfree(sblock);
2020 }
2021}
2022
2023static void scrub_page_get(struct scrub_page *spage)
2024{
2025 atomic_inc(&spage->refs);
2026}
2027
2028static void scrub_page_put(struct scrub_page *spage)
2029{
2030 if (atomic_dec_and_test(&spage->refs)) {
2031 if (spage->page)
2032 __free_page(spage->page);
2033 kfree(spage);
2034 }
2035}
2036
2037static void scrub_submit(struct scrub_ctx *sctx)
2038{
2039 struct scrub_bio *sbio;
2040
2041 if (sctx->curr == -1)
2042 return;
2043
2044 sbio = sctx->bios[sctx->curr];
2045 sctx->curr = -1;
2046 scrub_pending_bio_inc(sctx);
2047 btrfsic_submit_bio(sbio->bio);
2048}
2049
2050static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2051 struct scrub_page *spage)
2052{
2053 struct scrub_block *sblock = spage->sblock;
2054 struct scrub_bio *sbio;
2055 int ret;
2056
2057again:
2058
2059
2060
2061 while (sctx->curr == -1) {
2062 spin_lock(&sctx->list_lock);
2063 sctx->curr = sctx->first_free;
2064 if (sctx->curr != -1) {
2065 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2066 sctx->bios[sctx->curr]->next_free = -1;
2067 sctx->bios[sctx->curr]->page_count = 0;
2068 spin_unlock(&sctx->list_lock);
2069 } else {
2070 spin_unlock(&sctx->list_lock);
2071 wait_event(sctx->list_wait, sctx->first_free != -1);
2072 }
2073 }
2074 sbio = sctx->bios[sctx->curr];
2075 if (sbio->page_count == 0) {
2076 struct bio *bio;
2077
2078 sbio->physical = spage->physical;
2079 sbio->logical = spage->logical;
2080 sbio->dev = spage->dev;
2081 bio = sbio->bio;
2082 if (!bio) {
2083 bio = btrfs_io_bio_alloc(GFP_KERNEL,
2084 sctx->pages_per_rd_bio);
2085 if (!bio)
2086 return -ENOMEM;
2087 sbio->bio = bio;
2088 }
2089
2090 bio->bi_private = sbio;
2091 bio->bi_end_io = scrub_bio_end_io;
2092 bio->bi_bdev = sbio->dev->bdev;
2093 bio->bi_iter.bi_sector = sbio->physical >> 9;
2094 bio_set_op_attrs(bio, REQ_OP_READ, 0);
2095 sbio->err = 0;
2096 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2097 spage->physical ||
2098 sbio->logical + sbio->page_count * PAGE_SIZE !=
2099 spage->logical ||
2100 sbio->dev != spage->dev) {
2101 scrub_submit(sctx);
2102 goto again;
2103 }
2104
2105 sbio->pagev[sbio->page_count] = spage;
2106 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2107 if (ret != PAGE_SIZE) {
2108 if (sbio->page_count < 1) {
2109 bio_put(sbio->bio);
2110 sbio->bio = NULL;
2111 return -EIO;
2112 }
2113 scrub_submit(sctx);
2114 goto again;
2115 }
2116
2117 scrub_block_get(sblock);
2118 atomic_inc(&sblock->outstanding_pages);
2119 sbio->page_count++;
2120 if (sbio->page_count == sctx->pages_per_rd_bio)
2121 scrub_submit(sctx);
2122
2123 return 0;
2124}
2125
2126static void scrub_missing_raid56_end_io(struct bio *bio)
2127{
2128 struct scrub_block *sblock = bio->bi_private;
2129 struct btrfs_fs_info *fs_info = sblock->sctx->dev_root->fs_info;
2130
2131 if (bio->bi_error)
2132 sblock->no_io_error_seen = 0;
2133
2134 bio_put(bio);
2135
2136 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2137}
2138
2139static void scrub_missing_raid56_worker(struct btrfs_work *work)
2140{
2141 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2142 struct scrub_ctx *sctx = sblock->sctx;
2143 u64 logical;
2144 struct btrfs_device *dev;
2145
2146 logical = sblock->pagev[0]->logical;
2147 dev = sblock->pagev[0]->dev;
2148
2149 if (sblock->no_io_error_seen)
2150 scrub_recheck_block_checksum(sblock);
2151
2152 if (!sblock->no_io_error_seen) {
2153 spin_lock(&sctx->stat_lock);
2154 sctx->stat.read_errors++;
2155 spin_unlock(&sctx->stat_lock);
2156 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
2157 "IO error rebuilding logical %llu for dev %s",
2158 logical, rcu_str_deref(dev->name));
2159 } else if (sblock->header_error || sblock->checksum_error) {
2160 spin_lock(&sctx->stat_lock);
2161 sctx->stat.uncorrectable_errors++;
2162 spin_unlock(&sctx->stat_lock);
2163 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
2164 "failed to rebuild valid logical %llu for dev %s",
2165 logical, rcu_str_deref(dev->name));
2166 } else {
2167 scrub_write_block_to_dev_replace(sblock);
2168 }
2169
2170 scrub_block_put(sblock);
2171
2172 if (sctx->is_dev_replace &&
2173 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2174 mutex_lock(&sctx->wr_ctx.wr_lock);
2175 scrub_wr_submit(sctx);
2176 mutex_unlock(&sctx->wr_ctx.wr_lock);
2177 }
2178
2179 scrub_pending_bio_dec(sctx);
2180}
2181
2182static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2183{
2184 struct scrub_ctx *sctx = sblock->sctx;
2185 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2186 u64 length = sblock->page_count * PAGE_SIZE;
2187 u64 logical = sblock->pagev[0]->logical;
2188 struct btrfs_bio *bbio = NULL;
2189 struct bio *bio;
2190 struct btrfs_raid_bio *rbio;
2191 int ret;
2192 int i;
2193
2194 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
2195 &bbio, 0, 1);
2196 if (ret || !bbio || !bbio->raid_map)
2197 goto bbio_out;
2198
2199 if (WARN_ON(!sctx->is_dev_replace ||
2200 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2201
2202
2203
2204
2205
2206
2207 goto bbio_out;
2208 }
2209
2210 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2211 if (!bio)
2212 goto bbio_out;
2213
2214 bio->bi_iter.bi_sector = logical >> 9;
2215 bio->bi_private = sblock;
2216 bio->bi_end_io = scrub_missing_raid56_end_io;
2217
2218 rbio = raid56_alloc_missing_rbio(sctx->dev_root, bio, bbio, length);
2219 if (!rbio)
2220 goto rbio_out;
2221
2222 for (i = 0; i < sblock->page_count; i++) {
2223 struct scrub_page *spage = sblock->pagev[i];
2224
2225 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2226 }
2227
2228 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2229 scrub_missing_raid56_worker, NULL, NULL);
2230 scrub_block_get(sblock);
2231 scrub_pending_bio_inc(sctx);
2232 raid56_submit_missing_rbio(rbio);
2233 return;
2234
2235rbio_out:
2236 bio_put(bio);
2237bbio_out:
2238 btrfs_put_bbio(bbio);
2239 spin_lock(&sctx->stat_lock);
2240 sctx->stat.malloc_errors++;
2241 spin_unlock(&sctx->stat_lock);
2242}
2243
2244static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2245 u64 physical, struct btrfs_device *dev, u64 flags,
2246 u64 gen, int mirror_num, u8 *csum, int force,
2247 u64 physical_for_dev_replace)
2248{
2249 struct scrub_block *sblock;
2250 int index;
2251
2252 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2253 if (!sblock) {
2254 spin_lock(&sctx->stat_lock);
2255 sctx->stat.malloc_errors++;
2256 spin_unlock(&sctx->stat_lock);
2257 return -ENOMEM;
2258 }
2259
2260
2261
2262 atomic_set(&sblock->refs, 1);
2263 sblock->sctx = sctx;
2264 sblock->no_io_error_seen = 1;
2265
2266 for (index = 0; len > 0; index++) {
2267 struct scrub_page *spage;
2268 u64 l = min_t(u64, len, PAGE_SIZE);
2269
2270 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2271 if (!spage) {
2272leave_nomem:
2273 spin_lock(&sctx->stat_lock);
2274 sctx->stat.malloc_errors++;
2275 spin_unlock(&sctx->stat_lock);
2276 scrub_block_put(sblock);
2277 return -ENOMEM;
2278 }
2279 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2280 scrub_page_get(spage);
2281 sblock->pagev[index] = spage;
2282 spage->sblock = sblock;
2283 spage->dev = dev;
2284 spage->flags = flags;
2285 spage->generation = gen;
2286 spage->logical = logical;
2287 spage->physical = physical;
2288 spage->physical_for_dev_replace = physical_for_dev_replace;
2289 spage->mirror_num = mirror_num;
2290 if (csum) {
2291 spage->have_csum = 1;
2292 memcpy(spage->csum, csum, sctx->csum_size);
2293 } else {
2294 spage->have_csum = 0;
2295 }
2296 sblock->page_count++;
2297 spage->page = alloc_page(GFP_KERNEL);
2298 if (!spage->page)
2299 goto leave_nomem;
2300 len -= l;
2301 logical += l;
2302 physical += l;
2303 physical_for_dev_replace += l;
2304 }
2305
2306 WARN_ON(sblock->page_count == 0);
2307 if (dev->missing) {
2308
2309
2310
2311
2312 scrub_missing_raid56_pages(sblock);
2313 } else {
2314 for (index = 0; index < sblock->page_count; index++) {
2315 struct scrub_page *spage = sblock->pagev[index];
2316 int ret;
2317
2318 ret = scrub_add_page_to_rd_bio(sctx, spage);
2319 if (ret) {
2320 scrub_block_put(sblock);
2321 return ret;
2322 }
2323 }
2324
2325 if (force)
2326 scrub_submit(sctx);
2327 }
2328
2329
2330 scrub_block_put(sblock);
2331 return 0;
2332}
2333
2334static void scrub_bio_end_io(struct bio *bio)
2335{
2336 struct scrub_bio *sbio = bio->bi_private;
2337 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2338
2339 sbio->err = bio->bi_error;
2340 sbio->bio = bio;
2341
2342 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2343}
2344
2345static void scrub_bio_end_io_worker(struct btrfs_work *work)
2346{
2347 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2348 struct scrub_ctx *sctx = sbio->sctx;
2349 int i;
2350
2351 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2352 if (sbio->err) {
2353 for (i = 0; i < sbio->page_count; i++) {
2354 struct scrub_page *spage = sbio->pagev[i];
2355
2356 spage->io_error = 1;
2357 spage->sblock->no_io_error_seen = 0;
2358 }
2359 }
2360
2361
2362 for (i = 0; i < sbio->page_count; i++) {
2363 struct scrub_page *spage = sbio->pagev[i];
2364 struct scrub_block *sblock = spage->sblock;
2365
2366 if (atomic_dec_and_test(&sblock->outstanding_pages))
2367 scrub_block_complete(sblock);
2368 scrub_block_put(sblock);
2369 }
2370
2371 bio_put(sbio->bio);
2372 sbio->bio = NULL;
2373 spin_lock(&sctx->list_lock);
2374 sbio->next_free = sctx->first_free;
2375 sctx->first_free = sbio->index;
2376 spin_unlock(&sctx->list_lock);
2377
2378 if (sctx->is_dev_replace &&
2379 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2380 mutex_lock(&sctx->wr_ctx.wr_lock);
2381 scrub_wr_submit(sctx);
2382 mutex_unlock(&sctx->wr_ctx.wr_lock);
2383 }
2384
2385 scrub_pending_bio_dec(sctx);
2386}
2387
2388static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2389 unsigned long *bitmap,
2390 u64 start, u64 len)
2391{
2392 u32 offset;
2393 int nsectors;
2394 int sectorsize = sparity->sctx->dev_root->sectorsize;
2395
2396 if (len >= sparity->stripe_len) {
2397 bitmap_set(bitmap, 0, sparity->nsectors);
2398 return;
2399 }
2400
2401 start -= sparity->logic_start;
2402 start = div_u64_rem(start, sparity->stripe_len, &offset);
2403 offset /= sectorsize;
2404 nsectors = (int)len / sectorsize;
2405
2406 if (offset + nsectors <= sparity->nsectors) {
2407 bitmap_set(bitmap, offset, nsectors);
2408 return;
2409 }
2410
2411 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2412 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2413}
2414
2415static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2416 u64 start, u64 len)
2417{
2418 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2419}
2420
2421static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2422 u64 start, u64 len)
2423{
2424 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2425}
2426
2427static void scrub_block_complete(struct scrub_block *sblock)
2428{
2429 int corrupted = 0;
2430
2431 if (!sblock->no_io_error_seen) {
2432 corrupted = 1;
2433 scrub_handle_errored_block(sblock);
2434 } else {
2435
2436
2437
2438
2439
2440 corrupted = scrub_checksum(sblock);
2441 if (!corrupted && sblock->sctx->is_dev_replace)
2442 scrub_write_block_to_dev_replace(sblock);
2443 }
2444
2445 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2446 u64 start = sblock->pagev[0]->logical;
2447 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2448 PAGE_SIZE;
2449
2450 scrub_parity_mark_sectors_error(sblock->sparity,
2451 start, end - start);
2452 }
2453}
2454
2455static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2456{
2457 struct btrfs_ordered_sum *sum = NULL;
2458 unsigned long index;
2459 unsigned long num_sectors;
2460
2461 while (!list_empty(&sctx->csum_list)) {
2462 sum = list_first_entry(&sctx->csum_list,
2463 struct btrfs_ordered_sum, list);
2464 if (sum->bytenr > logical)
2465 return 0;
2466 if (sum->bytenr + sum->len > logical)
2467 break;
2468
2469 ++sctx->stat.csum_discards;
2470 list_del(&sum->list);
2471 kfree(sum);
2472 sum = NULL;
2473 }
2474 if (!sum)
2475 return 0;
2476
2477 index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
2478 num_sectors = sum->len / sctx->sectorsize;
2479 memcpy(csum, sum->sums + index, sctx->csum_size);
2480 if (index == num_sectors - 1) {
2481 list_del(&sum->list);
2482 kfree(sum);
2483 }
2484 return 1;
2485}
2486
2487
2488static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2489 u64 physical, struct btrfs_device *dev, u64 flags,
2490 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2491{
2492 int ret;
2493 u8 csum[BTRFS_CSUM_SIZE];
2494 u32 blocksize;
2495
2496 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2497 blocksize = sctx->sectorsize;
2498 spin_lock(&sctx->stat_lock);
2499 sctx->stat.data_extents_scrubbed++;
2500 sctx->stat.data_bytes_scrubbed += len;
2501 spin_unlock(&sctx->stat_lock);
2502 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2503 blocksize = sctx->nodesize;
2504 spin_lock(&sctx->stat_lock);
2505 sctx->stat.tree_extents_scrubbed++;
2506 sctx->stat.tree_bytes_scrubbed += len;
2507 spin_unlock(&sctx->stat_lock);
2508 } else {
2509 blocksize = sctx->sectorsize;
2510 WARN_ON(1);
2511 }
2512
2513 while (len) {
2514 u64 l = min_t(u64, len, blocksize);
2515 int have_csum = 0;
2516
2517 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2518
2519 have_csum = scrub_find_csum(sctx, logical, csum);
2520 if (have_csum == 0)
2521 ++sctx->stat.no_csum;
2522 if (sctx->is_dev_replace && !have_csum) {
2523 ret = copy_nocow_pages(sctx, logical, l,
2524 mirror_num,
2525 physical_for_dev_replace);
2526 goto behind_scrub_pages;
2527 }
2528 }
2529 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2530 mirror_num, have_csum ? csum : NULL, 0,
2531 physical_for_dev_replace);
2532behind_scrub_pages:
2533 if (ret)
2534 return ret;
2535 len -= l;
2536 logical += l;
2537 physical += l;
2538 physical_for_dev_replace += l;
2539 }
2540 return 0;
2541}
2542
2543static int scrub_pages_for_parity(struct scrub_parity *sparity,
2544 u64 logical, u64 len,
2545 u64 physical, struct btrfs_device *dev,
2546 u64 flags, u64 gen, int mirror_num, u8 *csum)
2547{
2548 struct scrub_ctx *sctx = sparity->sctx;
2549 struct scrub_block *sblock;
2550 int index;
2551
2552 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2553 if (!sblock) {
2554 spin_lock(&sctx->stat_lock);
2555 sctx->stat.malloc_errors++;
2556 spin_unlock(&sctx->stat_lock);
2557 return -ENOMEM;
2558 }
2559
2560
2561
2562 atomic_set(&sblock->refs, 1);
2563 sblock->sctx = sctx;
2564 sblock->no_io_error_seen = 1;
2565 sblock->sparity = sparity;
2566 scrub_parity_get(sparity);
2567
2568 for (index = 0; len > 0; index++) {
2569 struct scrub_page *spage;
2570 u64 l = min_t(u64, len, PAGE_SIZE);
2571
2572 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2573 if (!spage) {
2574leave_nomem:
2575 spin_lock(&sctx->stat_lock);
2576 sctx->stat.malloc_errors++;
2577 spin_unlock(&sctx->stat_lock);
2578 scrub_block_put(sblock);
2579 return -ENOMEM;
2580 }
2581 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2582
2583 scrub_page_get(spage);
2584 sblock->pagev[index] = spage;
2585
2586 scrub_page_get(spage);
2587 list_add_tail(&spage->list, &sparity->spages);
2588 spage->sblock = sblock;
2589 spage->dev = dev;
2590 spage->flags = flags;
2591 spage->generation = gen;
2592 spage->logical = logical;
2593 spage->physical = physical;
2594 spage->mirror_num = mirror_num;
2595 if (csum) {
2596 spage->have_csum = 1;
2597 memcpy(spage->csum, csum, sctx->csum_size);
2598 } else {
2599 spage->have_csum = 0;
2600 }
2601 sblock->page_count++;
2602 spage->page = alloc_page(GFP_KERNEL);
2603 if (!spage->page)
2604 goto leave_nomem;
2605 len -= l;
2606 logical += l;
2607 physical += l;
2608 }
2609
2610 WARN_ON(sblock->page_count == 0);
2611 for (index = 0; index < sblock->page_count; index++) {
2612 struct scrub_page *spage = sblock->pagev[index];
2613 int ret;
2614
2615 ret = scrub_add_page_to_rd_bio(sctx, spage);
2616 if (ret) {
2617 scrub_block_put(sblock);
2618 return ret;
2619 }
2620 }
2621
2622
2623 scrub_block_put(sblock);
2624 return 0;
2625}
2626
2627static int scrub_extent_for_parity(struct scrub_parity *sparity,
2628 u64 logical, u64 len,
2629 u64 physical, struct btrfs_device *dev,
2630 u64 flags, u64 gen, int mirror_num)
2631{
2632 struct scrub_ctx *sctx = sparity->sctx;
2633 int ret;
2634 u8 csum[BTRFS_CSUM_SIZE];
2635 u32 blocksize;
2636
2637 if (dev->missing) {
2638 scrub_parity_mark_sectors_error(sparity, logical, len);
2639 return 0;
2640 }
2641
2642 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2643 blocksize = sctx->sectorsize;
2644 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2645 blocksize = sctx->nodesize;
2646 } else {
2647 blocksize = sctx->sectorsize;
2648 WARN_ON(1);
2649 }
2650
2651 while (len) {
2652 u64 l = min_t(u64, len, blocksize);
2653 int have_csum = 0;
2654
2655 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2656
2657 have_csum = scrub_find_csum(sctx, logical, csum);
2658 if (have_csum == 0)
2659 goto skip;
2660 }
2661 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2662 flags, gen, mirror_num,
2663 have_csum ? csum : NULL);
2664 if (ret)
2665 return ret;
2666skip:
2667 len -= l;
2668 logical += l;
2669 physical += l;
2670 }
2671 return 0;
2672}
2673
2674
2675
2676
2677
2678
2679
2680
2681static int get_raid56_logic_offset(u64 physical, int num,
2682 struct map_lookup *map, u64 *offset,
2683 u64 *stripe_start)
2684{
2685 int i;
2686 int j = 0;
2687 u64 stripe_nr;
2688 u64 last_offset;
2689 u32 stripe_index;
2690 u32 rot;
2691
2692 last_offset = (physical - map->stripes[num].physical) *
2693 nr_data_stripes(map);
2694 if (stripe_start)
2695 *stripe_start = last_offset;
2696
2697 *offset = last_offset;
2698 for (i = 0; i < nr_data_stripes(map); i++) {
2699 *offset = last_offset + i * map->stripe_len;
2700
2701 stripe_nr = div_u64(*offset, map->stripe_len);
2702 stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
2703
2704
2705 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2706
2707 rot += i;
2708 stripe_index = rot % map->num_stripes;
2709 if (stripe_index == num)
2710 return 0;
2711 if (stripe_index < num)
2712 j++;
2713 }
2714 *offset = last_offset + j * map->stripe_len;
2715 return 1;
2716}
2717
2718static void scrub_free_parity(struct scrub_parity *sparity)
2719{
2720 struct scrub_ctx *sctx = sparity->sctx;
2721 struct scrub_page *curr, *next;
2722 int nbits;
2723
2724 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2725 if (nbits) {
2726 spin_lock(&sctx->stat_lock);
2727 sctx->stat.read_errors += nbits;
2728 sctx->stat.uncorrectable_errors += nbits;
2729 spin_unlock(&sctx->stat_lock);
2730 }
2731
2732 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2733 list_del_init(&curr->list);
2734 scrub_page_put(curr);
2735 }
2736
2737 kfree(sparity);
2738}
2739
2740static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2741{
2742 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2743 work);
2744 struct scrub_ctx *sctx = sparity->sctx;
2745
2746 scrub_free_parity(sparity);
2747 scrub_pending_bio_dec(sctx);
2748}
2749
2750static void scrub_parity_bio_endio(struct bio *bio)
2751{
2752 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2753
2754 if (bio->bi_error)
2755 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2756 sparity->nsectors);
2757
2758 bio_put(bio);
2759
2760 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
2761 scrub_parity_bio_endio_worker, NULL, NULL);
2762 btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
2763 &sparity->work);
2764}
2765
2766static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2767{
2768 struct scrub_ctx *sctx = sparity->sctx;
2769 struct bio *bio;
2770 struct btrfs_raid_bio *rbio;
2771 struct scrub_page *spage;
2772 struct btrfs_bio *bbio = NULL;
2773 u64 length;
2774 int ret;
2775
2776 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2777 sparity->nsectors))
2778 goto out;
2779
2780 length = sparity->logic_end - sparity->logic_start;
2781 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2782 sparity->logic_start,
2783 &length, &bbio, 0, 1);
2784 if (ret || !bbio || !bbio->raid_map)
2785 goto bbio_out;
2786
2787 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2788 if (!bio)
2789 goto bbio_out;
2790
2791 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2792 bio->bi_private = sparity;
2793 bio->bi_end_io = scrub_parity_bio_endio;
2794
2795 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2796 length, sparity->scrub_dev,
2797 sparity->dbitmap,
2798 sparity->nsectors);
2799 if (!rbio)
2800 goto rbio_out;
2801
2802 list_for_each_entry(spage, &sparity->spages, list)
2803 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2804
2805 scrub_pending_bio_inc(sctx);
2806 raid56_parity_submit_scrub_rbio(rbio);
2807 return;
2808
2809rbio_out:
2810 bio_put(bio);
2811bbio_out:
2812 btrfs_put_bbio(bbio);
2813 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2814 sparity->nsectors);
2815 spin_lock(&sctx->stat_lock);
2816 sctx->stat.malloc_errors++;
2817 spin_unlock(&sctx->stat_lock);
2818out:
2819 scrub_free_parity(sparity);
2820}
2821
2822static inline int scrub_calc_parity_bitmap_len(int nsectors)
2823{
2824 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2825}
2826
2827static void scrub_parity_get(struct scrub_parity *sparity)
2828{
2829 atomic_inc(&sparity->refs);
2830}
2831
2832static void scrub_parity_put(struct scrub_parity *sparity)
2833{
2834 if (!atomic_dec_and_test(&sparity->refs))
2835 return;
2836
2837 scrub_parity_check_and_repair(sparity);
2838}
2839
2840static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2841 struct map_lookup *map,
2842 struct btrfs_device *sdev,
2843 struct btrfs_path *path,
2844 u64 logic_start,
2845 u64 logic_end)
2846{
2847 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2848 struct btrfs_root *root = fs_info->extent_root;
2849 struct btrfs_root *csum_root = fs_info->csum_root;
2850 struct btrfs_extent_item *extent;
2851 struct btrfs_bio *bbio = NULL;
2852 u64 flags;
2853 int ret;
2854 int slot;
2855 struct extent_buffer *l;
2856 struct btrfs_key key;
2857 u64 generation;
2858 u64 extent_logical;
2859 u64 extent_physical;
2860 u64 extent_len;
2861 u64 mapped_length;
2862 struct btrfs_device *extent_dev;
2863 struct scrub_parity *sparity;
2864 int nsectors;
2865 int bitmap_len;
2866 int extent_mirror_num;
2867 int stop_loop = 0;
2868
2869 nsectors = div_u64(map->stripe_len, root->sectorsize);
2870 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2871 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2872 GFP_NOFS);
2873 if (!sparity) {
2874 spin_lock(&sctx->stat_lock);
2875 sctx->stat.malloc_errors++;
2876 spin_unlock(&sctx->stat_lock);
2877 return -ENOMEM;
2878 }
2879
2880 sparity->stripe_len = map->stripe_len;
2881 sparity->nsectors = nsectors;
2882 sparity->sctx = sctx;
2883 sparity->scrub_dev = sdev;
2884 sparity->logic_start = logic_start;
2885 sparity->logic_end = logic_end;
2886 atomic_set(&sparity->refs, 1);
2887 INIT_LIST_HEAD(&sparity->spages);
2888 sparity->dbitmap = sparity->bitmap;
2889 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2890
2891 ret = 0;
2892 while (logic_start < logic_end) {
2893 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2894 key.type = BTRFS_METADATA_ITEM_KEY;
2895 else
2896 key.type = BTRFS_EXTENT_ITEM_KEY;
2897 key.objectid = logic_start;
2898 key.offset = (u64)-1;
2899
2900 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2901 if (ret < 0)
2902 goto out;
2903
2904 if (ret > 0) {
2905 ret = btrfs_previous_extent_item(root, path, 0);
2906 if (ret < 0)
2907 goto out;
2908 if (ret > 0) {
2909 btrfs_release_path(path);
2910 ret = btrfs_search_slot(NULL, root, &key,
2911 path, 0, 0);
2912 if (ret < 0)
2913 goto out;
2914 }
2915 }
2916
2917 stop_loop = 0;
2918 while (1) {
2919 u64 bytes;
2920
2921 l = path->nodes[0];
2922 slot = path->slots[0];
2923 if (slot >= btrfs_header_nritems(l)) {
2924 ret = btrfs_next_leaf(root, path);
2925 if (ret == 0)
2926 continue;
2927 if (ret < 0)
2928 goto out;
2929
2930 stop_loop = 1;
2931 break;
2932 }
2933 btrfs_item_key_to_cpu(l, &key, slot);
2934
2935 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2936 key.type != BTRFS_METADATA_ITEM_KEY)
2937 goto next;
2938
2939 if (key.type == BTRFS_METADATA_ITEM_KEY)
2940 bytes = root->nodesize;
2941 else
2942 bytes = key.offset;
2943
2944 if (key.objectid + bytes <= logic_start)
2945 goto next;
2946
2947 if (key.objectid >= logic_end) {
2948 stop_loop = 1;
2949 break;
2950 }
2951
2952 while (key.objectid >= logic_start + map->stripe_len)
2953 logic_start += map->stripe_len;
2954
2955 extent = btrfs_item_ptr(l, slot,
2956 struct btrfs_extent_item);
2957 flags = btrfs_extent_flags(l, extent);
2958 generation = btrfs_extent_generation(l, extent);
2959
2960 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2961 (key.objectid < logic_start ||
2962 key.objectid + bytes >
2963 logic_start + map->stripe_len)) {
2964 btrfs_err(fs_info,
2965 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2966 key.objectid, logic_start);
2967 spin_lock(&sctx->stat_lock);
2968 sctx->stat.uncorrectable_errors++;
2969 spin_unlock(&sctx->stat_lock);
2970 goto next;
2971 }
2972again:
2973 extent_logical = key.objectid;
2974 extent_len = bytes;
2975
2976 if (extent_logical < logic_start) {
2977 extent_len -= logic_start - extent_logical;
2978 extent_logical = logic_start;
2979 }
2980
2981 if (extent_logical + extent_len >
2982 logic_start + map->stripe_len)
2983 extent_len = logic_start + map->stripe_len -
2984 extent_logical;
2985
2986 scrub_parity_mark_sectors_data(sparity, extent_logical,
2987 extent_len);
2988
2989 mapped_length = extent_len;
2990 bbio = NULL;
2991 ret = btrfs_map_block(fs_info, READ, extent_logical,
2992 &mapped_length, &bbio, 0);
2993 if (!ret) {
2994 if (!bbio || mapped_length < extent_len)
2995 ret = -EIO;
2996 }
2997 if (ret) {
2998 btrfs_put_bbio(bbio);
2999 goto out;
3000 }
3001 extent_physical = bbio->stripes[0].physical;
3002 extent_mirror_num = bbio->mirror_num;
3003 extent_dev = bbio->stripes[0].dev;
3004 btrfs_put_bbio(bbio);
3005
3006 ret = btrfs_lookup_csums_range(csum_root,
3007 extent_logical,
3008 extent_logical + extent_len - 1,
3009 &sctx->csum_list, 1);
3010 if (ret)
3011 goto out;
3012
3013 ret = scrub_extent_for_parity(sparity, extent_logical,
3014 extent_len,
3015 extent_physical,
3016 extent_dev, flags,
3017 generation,
3018 extent_mirror_num);
3019
3020 scrub_free_csums(sctx);
3021
3022 if (ret)
3023 goto out;
3024
3025 if (extent_logical + extent_len <
3026 key.objectid + bytes) {
3027 logic_start += map->stripe_len;
3028
3029 if (logic_start >= logic_end) {
3030 stop_loop = 1;
3031 break;
3032 }
3033
3034 if (logic_start < key.objectid + bytes) {
3035 cond_resched();
3036 goto again;
3037 }
3038 }
3039next:
3040 path->slots[0]++;
3041 }
3042
3043 btrfs_release_path(path);
3044
3045 if (stop_loop)
3046 break;
3047
3048 logic_start += map->stripe_len;
3049 }
3050out:
3051 if (ret < 0)
3052 scrub_parity_mark_sectors_error(sparity, logic_start,
3053 logic_end - logic_start);
3054 scrub_parity_put(sparity);
3055 scrub_submit(sctx);
3056 mutex_lock(&sctx->wr_ctx.wr_lock);
3057 scrub_wr_submit(sctx);
3058 mutex_unlock(&sctx->wr_ctx.wr_lock);
3059
3060 btrfs_release_path(path);
3061 return ret < 0 ? ret : 0;
3062}
3063
3064static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3065 struct map_lookup *map,
3066 struct btrfs_device *scrub_dev,
3067 int num, u64 base, u64 length,
3068 int is_dev_replace)
3069{
3070 struct btrfs_path *path, *ppath;
3071 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3072 struct btrfs_root *root = fs_info->extent_root;
3073 struct btrfs_root *csum_root = fs_info->csum_root;
3074 struct btrfs_extent_item *extent;
3075 struct blk_plug plug;
3076 u64 flags;
3077 int ret;
3078 int slot;
3079 u64 nstripes;
3080 struct extent_buffer *l;
3081 u64 physical;
3082 u64 logical;
3083 u64 logic_end;
3084 u64 physical_end;
3085 u64 generation;
3086 int mirror_num;
3087 struct reada_control *reada1;
3088 struct reada_control *reada2;
3089 struct btrfs_key key;
3090 struct btrfs_key key_end;
3091 u64 increment = map->stripe_len;
3092 u64 offset;
3093 u64 extent_logical;
3094 u64 extent_physical;
3095 u64 extent_len;
3096 u64 stripe_logical;
3097 u64 stripe_end;
3098 struct btrfs_device *extent_dev;
3099 int extent_mirror_num;
3100 int stop_loop = 0;
3101
3102 physical = map->stripes[num].physical;
3103 offset = 0;
3104 nstripes = div_u64(length, map->stripe_len);
3105 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3106 offset = map->stripe_len * num;
3107 increment = map->stripe_len * map->num_stripes;
3108 mirror_num = 1;
3109 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3110 int factor = map->num_stripes / map->sub_stripes;
3111 offset = map->stripe_len * (num / map->sub_stripes);
3112 increment = map->stripe_len * factor;
3113 mirror_num = num % map->sub_stripes + 1;
3114 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3115 increment = map->stripe_len;
3116 mirror_num = num % map->num_stripes + 1;
3117 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3118 increment = map->stripe_len;
3119 mirror_num = num % map->num_stripes + 1;
3120 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3121 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3122 increment = map->stripe_len * nr_data_stripes(map);
3123 mirror_num = 1;
3124 } else {
3125 increment = map->stripe_len;
3126 mirror_num = 1;
3127 }
3128
3129 path = btrfs_alloc_path();
3130 if (!path)
3131 return -ENOMEM;
3132
3133 ppath = btrfs_alloc_path();
3134 if (!ppath) {
3135 btrfs_free_path(path);
3136 return -ENOMEM;
3137 }
3138
3139
3140
3141
3142
3143
3144 path->search_commit_root = 1;
3145 path->skip_locking = 1;
3146
3147 ppath->search_commit_root = 1;
3148 ppath->skip_locking = 1;
3149
3150
3151
3152
3153
3154 logical = base + offset;
3155 physical_end = physical + nstripes * map->stripe_len;
3156 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3157 get_raid56_logic_offset(physical_end, num,
3158 map, &logic_end, NULL);
3159 logic_end += base;
3160 } else {
3161 logic_end = logical + increment * nstripes;
3162 }
3163 wait_event(sctx->list_wait,
3164 atomic_read(&sctx->bios_in_flight) == 0);
3165 scrub_blocked_if_needed(fs_info);
3166
3167
3168 key.objectid = logical;
3169 key.type = BTRFS_EXTENT_ITEM_KEY;
3170 key.offset = (u64)0;
3171 key_end.objectid = logic_end;
3172 key_end.type = BTRFS_METADATA_ITEM_KEY;
3173 key_end.offset = (u64)-1;
3174 reada1 = btrfs_reada_add(root, &key, &key_end);
3175
3176 key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3177 key.type = BTRFS_EXTENT_CSUM_KEY;
3178 key.offset = logical;
3179 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3180 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3181 key_end.offset = logic_end;
3182 reada2 = btrfs_reada_add(csum_root, &key, &key_end);
3183
3184 if (!IS_ERR(reada1))
3185 btrfs_reada_wait(reada1);
3186 if (!IS_ERR(reada2))
3187 btrfs_reada_wait(reada2);
3188
3189
3190
3191
3192
3193
3194 blk_start_plug(&plug);
3195
3196
3197
3198
3199 ret = 0;
3200 while (physical < physical_end) {
3201
3202
3203
3204 if (atomic_read(&fs_info->scrub_cancel_req) ||
3205 atomic_read(&sctx->cancel_req)) {
3206 ret = -ECANCELED;
3207 goto out;
3208 }
3209
3210
3211
3212 if (atomic_read(&fs_info->scrub_pause_req)) {
3213
3214 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
3215 scrub_submit(sctx);
3216 mutex_lock(&sctx->wr_ctx.wr_lock);
3217 scrub_wr_submit(sctx);
3218 mutex_unlock(&sctx->wr_ctx.wr_lock);
3219 wait_event(sctx->list_wait,
3220 atomic_read(&sctx->bios_in_flight) == 0);
3221 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
3222 scrub_blocked_if_needed(fs_info);
3223 }
3224
3225 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3226 ret = get_raid56_logic_offset(physical, num, map,
3227 &logical,
3228 &stripe_logical);
3229 logical += base;
3230 if (ret) {
3231
3232 stripe_logical += base;
3233 stripe_end = stripe_logical + increment;
3234 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3235 ppath, stripe_logical,
3236 stripe_end);
3237 if (ret)
3238 goto out;
3239 goto skip;
3240 }
3241 }
3242
3243 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3244 key.type = BTRFS_METADATA_ITEM_KEY;
3245 else
3246 key.type = BTRFS_EXTENT_ITEM_KEY;
3247 key.objectid = logical;
3248 key.offset = (u64)-1;
3249
3250 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3251 if (ret < 0)
3252 goto out;
3253
3254 if (ret > 0) {
3255 ret = btrfs_previous_extent_item(root, path, 0);
3256 if (ret < 0)
3257 goto out;
3258 if (ret > 0) {
3259
3260
3261 btrfs_release_path(path);
3262 ret = btrfs_search_slot(NULL, root, &key,
3263 path, 0, 0);
3264 if (ret < 0)
3265 goto out;
3266 }
3267 }
3268
3269 stop_loop = 0;
3270 while (1) {
3271 u64 bytes;
3272
3273 l = path->nodes[0];
3274 slot = path->slots[0];
3275 if (slot >= btrfs_header_nritems(l)) {
3276 ret = btrfs_next_leaf(root, path);
3277 if (ret == 0)
3278 continue;
3279 if (ret < 0)
3280 goto out;
3281
3282 stop_loop = 1;
3283 break;
3284 }
3285 btrfs_item_key_to_cpu(l, &key, slot);
3286
3287 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3288 key.type != BTRFS_METADATA_ITEM_KEY)
3289 goto next;
3290
3291 if (key.type == BTRFS_METADATA_ITEM_KEY)
3292 bytes = root->nodesize;
3293 else
3294 bytes = key.offset;
3295
3296 if (key.objectid + bytes <= logical)
3297 goto next;
3298
3299 if (key.objectid >= logical + map->stripe_len) {
3300
3301 if (key.objectid >= logic_end)
3302 stop_loop = 1;
3303 break;
3304 }
3305
3306 extent = btrfs_item_ptr(l, slot,
3307 struct btrfs_extent_item);
3308 flags = btrfs_extent_flags(l, extent);
3309 generation = btrfs_extent_generation(l, extent);
3310
3311 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3312 (key.objectid < logical ||
3313 key.objectid + bytes >
3314 logical + map->stripe_len)) {
3315 btrfs_err(fs_info,
3316 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
3317 key.objectid, logical);
3318 spin_lock(&sctx->stat_lock);
3319 sctx->stat.uncorrectable_errors++;
3320 spin_unlock(&sctx->stat_lock);
3321 goto next;
3322 }
3323
3324again:
3325 extent_logical = key.objectid;
3326 extent_len = bytes;
3327
3328
3329
3330
3331 if (extent_logical < logical) {
3332 extent_len -= logical - extent_logical;
3333 extent_logical = logical;
3334 }
3335 if (extent_logical + extent_len >
3336 logical + map->stripe_len) {
3337 extent_len = logical + map->stripe_len -
3338 extent_logical;
3339 }
3340
3341 extent_physical = extent_logical - logical + physical;
3342 extent_dev = scrub_dev;
3343 extent_mirror_num = mirror_num;
3344 if (is_dev_replace)
3345 scrub_remap_extent(fs_info, extent_logical,
3346 extent_len, &extent_physical,
3347 &extent_dev,
3348 &extent_mirror_num);
3349
3350 ret = btrfs_lookup_csums_range(csum_root,
3351 extent_logical,
3352 extent_logical +
3353 extent_len - 1,
3354 &sctx->csum_list, 1);
3355 if (ret)
3356 goto out;
3357
3358 ret = scrub_extent(sctx, extent_logical, extent_len,
3359 extent_physical, extent_dev, flags,
3360 generation, extent_mirror_num,
3361 extent_logical - logical + physical);
3362
3363 scrub_free_csums(sctx);
3364
3365 if (ret)
3366 goto out;
3367
3368 if (extent_logical + extent_len <
3369 key.objectid + bytes) {
3370 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3371
3372
3373
3374
3375loop:
3376 physical += map->stripe_len;
3377 ret = get_raid56_logic_offset(physical,
3378 num, map, &logical,
3379 &stripe_logical);
3380 logical += base;
3381
3382 if (ret && physical < physical_end) {
3383 stripe_logical += base;
3384 stripe_end = stripe_logical +
3385 increment;
3386 ret = scrub_raid56_parity(sctx,
3387 map, scrub_dev, ppath,
3388 stripe_logical,
3389 stripe_end);
3390 if (ret)
3391 goto out;
3392 goto loop;
3393 }
3394 } else {
3395 physical += map->stripe_len;
3396 logical += increment;
3397 }
3398 if (logical < key.objectid + bytes) {
3399 cond_resched();
3400 goto again;
3401 }
3402
3403 if (physical >= physical_end) {
3404 stop_loop = 1;
3405 break;
3406 }
3407 }
3408next:
3409 path->slots[0]++;
3410 }
3411 btrfs_release_path(path);
3412skip:
3413 logical += increment;
3414 physical += map->stripe_len;
3415 spin_lock(&sctx->stat_lock);
3416 if (stop_loop)
3417 sctx->stat.last_physical = map->stripes[num].physical +
3418 length;
3419 else
3420 sctx->stat.last_physical = physical;
3421 spin_unlock(&sctx->stat_lock);
3422 if (stop_loop)
3423 break;
3424 }
3425out:
3426
3427 scrub_submit(sctx);
3428 mutex_lock(&sctx->wr_ctx.wr_lock);
3429 scrub_wr_submit(sctx);
3430 mutex_unlock(&sctx->wr_ctx.wr_lock);
3431
3432 blk_finish_plug(&plug);
3433 btrfs_free_path(path);
3434 btrfs_free_path(ppath);
3435 return ret < 0 ? ret : 0;
3436}
3437
3438static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3439 struct btrfs_device *scrub_dev,
3440 u64 chunk_offset, u64 length,
3441 u64 dev_offset,
3442 struct btrfs_block_group_cache *cache,
3443 int is_dev_replace)
3444{
3445 struct btrfs_mapping_tree *map_tree =
3446 &sctx->dev_root->fs_info->mapping_tree;
3447 struct map_lookup *map;
3448 struct extent_map *em;
3449 int i;
3450 int ret = 0;
3451
3452 read_lock(&map_tree->map_tree.lock);
3453 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
3454 read_unlock(&map_tree->map_tree.lock);
3455
3456 if (!em) {
3457
3458
3459
3460
3461 spin_lock(&cache->lock);
3462 if (!cache->removed)
3463 ret = -EINVAL;
3464 spin_unlock(&cache->lock);
3465
3466 return ret;
3467 }
3468
3469 map = em->map_lookup;
3470 if (em->start != chunk_offset)
3471 goto out;
3472
3473 if (em->len < length)
3474 goto out;
3475
3476 for (i = 0; i < map->num_stripes; ++i) {
3477 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3478 map->stripes[i].physical == dev_offset) {
3479 ret = scrub_stripe(sctx, map, scrub_dev, i,
3480 chunk_offset, length,
3481 is_dev_replace);
3482 if (ret)
3483 goto out;
3484 }
3485 }
3486out:
3487 free_extent_map(em);
3488
3489 return ret;
3490}
3491
3492static noinline_for_stack
3493int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3494 struct btrfs_device *scrub_dev, u64 start, u64 end,
3495 int is_dev_replace)
3496{
3497 struct btrfs_dev_extent *dev_extent = NULL;
3498 struct btrfs_path *path;
3499 struct btrfs_root *root = sctx->dev_root;
3500 struct btrfs_fs_info *fs_info = root->fs_info;
3501 u64 length;
3502 u64 chunk_offset;
3503 int ret = 0;
3504 int ro_set;
3505 int slot;
3506 struct extent_buffer *l;
3507 struct btrfs_key key;
3508 struct btrfs_key found_key;
3509 struct btrfs_block_group_cache *cache;
3510 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3511
3512 path = btrfs_alloc_path();
3513 if (!path)
3514 return -ENOMEM;
3515
3516 path->reada = READA_FORWARD;
3517 path->search_commit_root = 1;
3518 path->skip_locking = 1;
3519
3520 key.objectid = scrub_dev->devid;
3521 key.offset = 0ull;
3522 key.type = BTRFS_DEV_EXTENT_KEY;
3523
3524 while (1) {
3525 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3526 if (ret < 0)
3527 break;
3528 if (ret > 0) {
3529 if (path->slots[0] >=
3530 btrfs_header_nritems(path->nodes[0])) {
3531 ret = btrfs_next_leaf(root, path);
3532 if (ret < 0)
3533 break;
3534 if (ret > 0) {
3535 ret = 0;
3536 break;
3537 }
3538 } else {
3539 ret = 0;
3540 }
3541 }
3542
3543 l = path->nodes[0];
3544 slot = path->slots[0];
3545
3546 btrfs_item_key_to_cpu(l, &found_key, slot);
3547
3548 if (found_key.objectid != scrub_dev->devid)
3549 break;
3550
3551 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3552 break;
3553
3554 if (found_key.offset >= end)
3555 break;
3556
3557 if (found_key.offset < key.offset)
3558 break;
3559
3560 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3561 length = btrfs_dev_extent_length(l, dev_extent);
3562
3563 if (found_key.offset + length <= start)
3564 goto skip;
3565
3566 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3567
3568
3569
3570
3571
3572 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3573
3574
3575
3576 if (!cache)
3577 goto skip;
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587 scrub_pause_on(fs_info);
3588 ret = btrfs_inc_block_group_ro(root, cache);
3589 if (!ret && is_dev_replace) {
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608 btrfs_wait_block_group_reservations(cache);
3609 btrfs_wait_nocow_writers(cache);
3610 ret = btrfs_wait_ordered_roots(fs_info, -1,
3611 cache->key.objectid,
3612 cache->key.offset);
3613 if (ret > 0) {
3614 struct btrfs_trans_handle *trans;
3615
3616 trans = btrfs_join_transaction(root);
3617 if (IS_ERR(trans))
3618 ret = PTR_ERR(trans);
3619 else
3620 ret = btrfs_commit_transaction(trans,
3621 root);
3622 if (ret) {
3623 scrub_pause_off(fs_info);
3624 btrfs_put_block_group(cache);
3625 break;
3626 }
3627 }
3628 }
3629 scrub_pause_off(fs_info);
3630
3631 if (ret == 0) {
3632 ro_set = 1;
3633 } else if (ret == -ENOSPC) {
3634
3635
3636
3637
3638
3639
3640
3641 ro_set = 0;
3642 } else {
3643 btrfs_warn(fs_info,
3644 "failed setting block group ro, ret=%d\n",
3645 ret);
3646 btrfs_put_block_group(cache);
3647 break;
3648 }
3649
3650 btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
3651 dev_replace->cursor_right = found_key.offset + length;
3652 dev_replace->cursor_left = found_key.offset;
3653 dev_replace->item_needs_writeback = 1;
3654 btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
3655 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3656 found_key.offset, cache, is_dev_replace);
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
3669 scrub_submit(sctx);
3670 mutex_lock(&sctx->wr_ctx.wr_lock);
3671 scrub_wr_submit(sctx);
3672 mutex_unlock(&sctx->wr_ctx.wr_lock);
3673
3674 wait_event(sctx->list_wait,
3675 atomic_read(&sctx->bios_in_flight) == 0);
3676
3677 scrub_pause_on(fs_info);
3678
3679
3680
3681
3682
3683
3684 wait_event(sctx->list_wait,
3685 atomic_read(&sctx->workers_pending) == 0);
3686 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
3687
3688 scrub_pause_off(fs_info);
3689
3690 btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
3691 dev_replace->cursor_left = dev_replace->cursor_right;
3692 dev_replace->item_needs_writeback = 1;
3693 btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
3694
3695 if (ro_set)
3696 btrfs_dec_block_group_ro(root, cache);
3697
3698
3699
3700
3701
3702
3703
3704
3705 spin_lock(&cache->lock);
3706 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3707 btrfs_block_group_used(&cache->item) == 0) {
3708 spin_unlock(&cache->lock);
3709 spin_lock(&fs_info->unused_bgs_lock);
3710 if (list_empty(&cache->bg_list)) {
3711 btrfs_get_block_group(cache);
3712 list_add_tail(&cache->bg_list,
3713 &fs_info->unused_bgs);
3714 }
3715 spin_unlock(&fs_info->unused_bgs_lock);
3716 } else {
3717 spin_unlock(&cache->lock);
3718 }
3719
3720 btrfs_put_block_group(cache);
3721 if (ret)
3722 break;
3723 if (is_dev_replace &&
3724 atomic64_read(&dev_replace->num_write_errors) > 0) {
3725 ret = -EIO;
3726 break;
3727 }
3728 if (sctx->stat.malloc_errors > 0) {
3729 ret = -ENOMEM;
3730 break;
3731 }
3732skip:
3733 key.offset = found_key.offset + length;
3734 btrfs_release_path(path);
3735 }
3736
3737 btrfs_free_path(path);
3738
3739 return ret;
3740}
3741
3742static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3743 struct btrfs_device *scrub_dev)
3744{
3745 int i;
3746 u64 bytenr;
3747 u64 gen;
3748 int ret;
3749 struct btrfs_root *root = sctx->dev_root;
3750
3751 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
3752 return -EIO;
3753
3754
3755 if (scrub_dev->fs_devices != root->fs_info->fs_devices)
3756 gen = scrub_dev->generation;
3757 else
3758 gen = root->fs_info->last_trans_committed;
3759
3760 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3761 bytenr = btrfs_sb_offset(i);
3762 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3763 scrub_dev->commit_total_bytes)
3764 break;
3765
3766 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3767 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3768 NULL, 1, bytenr);
3769 if (ret)
3770 return ret;
3771 }
3772 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3773
3774 return 0;
3775}
3776
3777
3778
3779
3780static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3781 int is_dev_replace)
3782{
3783 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3784 int max_active = fs_info->thread_pool_size;
3785
3786 if (fs_info->scrub_workers_refcnt == 0) {
3787 if (is_dev_replace)
3788 fs_info->scrub_workers =
3789 btrfs_alloc_workqueue(fs_info, "scrub", flags,
3790 1, 4);
3791 else
3792 fs_info->scrub_workers =
3793 btrfs_alloc_workqueue(fs_info, "scrub", flags,
3794 max_active, 4);
3795 if (!fs_info->scrub_workers)
3796 goto fail_scrub_workers;
3797
3798 fs_info->scrub_wr_completion_workers =
3799 btrfs_alloc_workqueue(fs_info, "scrubwrc", flags,
3800 max_active, 2);
3801 if (!fs_info->scrub_wr_completion_workers)
3802 goto fail_scrub_wr_completion_workers;
3803
3804 fs_info->scrub_nocow_workers =
3805 btrfs_alloc_workqueue(fs_info, "scrubnc", flags, 1, 0);
3806 if (!fs_info->scrub_nocow_workers)
3807 goto fail_scrub_nocow_workers;
3808 fs_info->scrub_parity_workers =
3809 btrfs_alloc_workqueue(fs_info, "scrubparity", flags,
3810 max_active, 2);
3811 if (!fs_info->scrub_parity_workers)
3812 goto fail_scrub_parity_workers;
3813 }
3814 ++fs_info->scrub_workers_refcnt;
3815 return 0;
3816
3817fail_scrub_parity_workers:
3818 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3819fail_scrub_nocow_workers:
3820 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3821fail_scrub_wr_completion_workers:
3822 btrfs_destroy_workqueue(fs_info->scrub_workers);
3823fail_scrub_workers:
3824 return -ENOMEM;
3825}
3826
3827static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
3828{
3829 if (--fs_info->scrub_workers_refcnt == 0) {
3830 btrfs_destroy_workqueue(fs_info->scrub_workers);
3831 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3832 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3833 btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
3834 }
3835 WARN_ON(fs_info->scrub_workers_refcnt < 0);
3836}
3837
3838int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3839 u64 end, struct btrfs_scrub_progress *progress,
3840 int readonly, int is_dev_replace)
3841{
3842 struct scrub_ctx *sctx;
3843 int ret;
3844 struct btrfs_device *dev;
3845 struct rcu_string *name;
3846
3847 if (btrfs_fs_closing(fs_info))
3848 return -EINVAL;
3849
3850 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
3851
3852
3853
3854
3855
3856 btrfs_err(fs_info,
3857 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3858 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
3859 return -EINVAL;
3860 }
3861
3862 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
3863
3864 btrfs_err_rl(fs_info,
3865 "scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
3866 fs_info->chunk_root->sectorsize, PAGE_SIZE);
3867 return -EINVAL;
3868 }
3869
3870 if (fs_info->chunk_root->nodesize >
3871 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3872 fs_info->chunk_root->sectorsize >
3873 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3874
3875
3876
3877
3878 btrfs_err(fs_info,
3879 "scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3880 fs_info->chunk_root->nodesize,
3881 SCRUB_MAX_PAGES_PER_BLOCK,
3882 fs_info->chunk_root->sectorsize,
3883 SCRUB_MAX_PAGES_PER_BLOCK);
3884 return -EINVAL;
3885 }
3886
3887
3888 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3889 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3890 if (!dev || (dev->missing && !is_dev_replace)) {
3891 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3892 return -ENODEV;
3893 }
3894
3895 if (!is_dev_replace && !readonly && !dev->writeable) {
3896 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3897 rcu_read_lock();
3898 name = rcu_dereference(dev->name);
3899 btrfs_err(fs_info, "scrub: device %s is not writable",
3900 name->str);
3901 rcu_read_unlock();
3902 return -EROFS;
3903 }
3904
3905 mutex_lock(&fs_info->scrub_lock);
3906 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
3907 mutex_unlock(&fs_info->scrub_lock);
3908 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3909 return -EIO;
3910 }
3911
3912 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
3913 if (dev->scrub_device ||
3914 (!is_dev_replace &&
3915 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3916 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3917 mutex_unlock(&fs_info->scrub_lock);
3918 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3919 return -EINPROGRESS;
3920 }
3921 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3922
3923 ret = scrub_workers_get(fs_info, is_dev_replace);
3924 if (ret) {
3925 mutex_unlock(&fs_info->scrub_lock);
3926 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3927 return ret;
3928 }
3929
3930 sctx = scrub_setup_ctx(dev, is_dev_replace);
3931 if (IS_ERR(sctx)) {
3932 mutex_unlock(&fs_info->scrub_lock);
3933 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3934 scrub_workers_put(fs_info);
3935 return PTR_ERR(sctx);
3936 }
3937 sctx->readonly = readonly;
3938 dev->scrub_device = sctx;
3939 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3940
3941
3942
3943
3944
3945 __scrub_blocked_if_needed(fs_info);
3946 atomic_inc(&fs_info->scrubs_running);
3947 mutex_unlock(&fs_info->scrub_lock);
3948
3949 if (!is_dev_replace) {
3950
3951
3952
3953
3954 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3955 ret = scrub_supers(sctx, dev);
3956 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3957 }
3958
3959 if (!ret)
3960 ret = scrub_enumerate_chunks(sctx, dev, start, end,
3961 is_dev_replace);
3962
3963 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3964 atomic_dec(&fs_info->scrubs_running);
3965 wake_up(&fs_info->scrub_pause_wait);
3966
3967 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3968
3969 if (progress)
3970 memcpy(progress, &sctx->stat, sizeof(*progress));
3971
3972 mutex_lock(&fs_info->scrub_lock);
3973 dev->scrub_device = NULL;
3974 scrub_workers_put(fs_info);
3975 mutex_unlock(&fs_info->scrub_lock);
3976
3977 scrub_put_ctx(sctx);
3978
3979 return ret;
3980}
3981
3982void btrfs_scrub_pause(struct btrfs_root *root)
3983{
3984 struct btrfs_fs_info *fs_info = root->fs_info;
3985
3986 mutex_lock(&fs_info->scrub_lock);
3987 atomic_inc(&fs_info->scrub_pause_req);
3988 while (atomic_read(&fs_info->scrubs_paused) !=
3989 atomic_read(&fs_info->scrubs_running)) {
3990 mutex_unlock(&fs_info->scrub_lock);
3991 wait_event(fs_info->scrub_pause_wait,
3992 atomic_read(&fs_info->scrubs_paused) ==
3993 atomic_read(&fs_info->scrubs_running));
3994 mutex_lock(&fs_info->scrub_lock);
3995 }
3996 mutex_unlock(&fs_info->scrub_lock);
3997}
3998
3999void btrfs_scrub_continue(struct btrfs_root *root)
4000{
4001 struct btrfs_fs_info *fs_info = root->fs_info;
4002
4003 atomic_dec(&fs_info->scrub_pause_req);
4004 wake_up(&fs_info->scrub_pause_wait);
4005}
4006
4007int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
4008{
4009 mutex_lock(&fs_info->scrub_lock);
4010 if (!atomic_read(&fs_info->scrubs_running)) {
4011 mutex_unlock(&fs_info->scrub_lock);
4012 return -ENOTCONN;
4013 }
4014
4015 atomic_inc(&fs_info->scrub_cancel_req);
4016 while (atomic_read(&fs_info->scrubs_running)) {
4017 mutex_unlock(&fs_info->scrub_lock);
4018 wait_event(fs_info->scrub_pause_wait,
4019 atomic_read(&fs_info->scrubs_running) == 0);
4020 mutex_lock(&fs_info->scrub_lock);
4021 }
4022 atomic_dec(&fs_info->scrub_cancel_req);
4023 mutex_unlock(&fs_info->scrub_lock);
4024
4025 return 0;
4026}
4027
4028int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
4029 struct btrfs_device *dev)
4030{
4031 struct scrub_ctx *sctx;
4032
4033 mutex_lock(&fs_info->scrub_lock);
4034 sctx = dev->scrub_device;
4035 if (!sctx) {
4036 mutex_unlock(&fs_info->scrub_lock);
4037 return -ENOTCONN;
4038 }
4039 atomic_inc(&sctx->cancel_req);
4040 while (dev->scrub_device) {
4041 mutex_unlock(&fs_info->scrub_lock);
4042 wait_event(fs_info->scrub_pause_wait,
4043 dev->scrub_device == NULL);
4044 mutex_lock(&fs_info->scrub_lock);
4045 }
4046 mutex_unlock(&fs_info->scrub_lock);
4047
4048 return 0;
4049}
4050
4051int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
4052 struct btrfs_scrub_progress *progress)
4053{
4054 struct btrfs_device *dev;
4055 struct scrub_ctx *sctx = NULL;
4056
4057 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
4058 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
4059 if (dev)
4060 sctx = dev->scrub_device;
4061 if (sctx)
4062 memcpy(progress, &sctx->stat, sizeof(*progress));
4063 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
4064
4065 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4066}
4067
4068static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4069 u64 extent_logical, u64 extent_len,
4070 u64 *extent_physical,
4071 struct btrfs_device **extent_dev,
4072 int *extent_mirror_num)
4073{
4074 u64 mapped_length;
4075 struct btrfs_bio *bbio = NULL;
4076 int ret;
4077
4078 mapped_length = extent_len;
4079 ret = btrfs_map_block(fs_info, READ, extent_logical,
4080 &mapped_length, &bbio, 0);
4081 if (ret || !bbio || mapped_length < extent_len ||
4082 !bbio->stripes[0].dev->bdev) {
4083 btrfs_put_bbio(bbio);
4084 return;
4085 }
4086
4087 *extent_physical = bbio->stripes[0].physical;
4088 *extent_mirror_num = bbio->mirror_num;
4089 *extent_dev = bbio->stripes[0].dev;
4090 btrfs_put_bbio(bbio);
4091}
4092
4093static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
4094 struct scrub_wr_ctx *wr_ctx,
4095 struct btrfs_fs_info *fs_info,
4096 struct btrfs_device *dev,
4097 int is_dev_replace)
4098{
4099 WARN_ON(wr_ctx->wr_curr_bio != NULL);
4100
4101 mutex_init(&wr_ctx->wr_lock);
4102 wr_ctx->wr_curr_bio = NULL;
4103 if (!is_dev_replace)
4104 return 0;
4105
4106 WARN_ON(!dev->bdev);
4107 wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
4108 wr_ctx->tgtdev = dev;
4109 atomic_set(&wr_ctx->flush_all_writes, 0);
4110 return 0;
4111}
4112
4113static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
4114{
4115 mutex_lock(&wr_ctx->wr_lock);
4116 kfree(wr_ctx->wr_curr_bio);
4117 wr_ctx->wr_curr_bio = NULL;
4118 mutex_unlock(&wr_ctx->wr_lock);
4119}
4120
4121static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
4122 int mirror_num, u64 physical_for_dev_replace)
4123{
4124 struct scrub_copy_nocow_ctx *nocow_ctx;
4125 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
4126
4127 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
4128 if (!nocow_ctx) {
4129 spin_lock(&sctx->stat_lock);
4130 sctx->stat.malloc_errors++;
4131 spin_unlock(&sctx->stat_lock);
4132 return -ENOMEM;
4133 }
4134
4135 scrub_pending_trans_workers_inc(sctx);
4136
4137 nocow_ctx->sctx = sctx;
4138 nocow_ctx->logical = logical;
4139 nocow_ctx->len = len;
4140 nocow_ctx->mirror_num = mirror_num;
4141 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
4142 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
4143 copy_nocow_pages_worker, NULL, NULL);
4144 INIT_LIST_HEAD(&nocow_ctx->inodes);
4145 btrfs_queue_work(fs_info->scrub_nocow_workers,
4146 &nocow_ctx->work);
4147
4148 return 0;
4149}
4150
4151static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
4152{
4153 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
4154 struct scrub_nocow_inode *nocow_inode;
4155
4156 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
4157 if (!nocow_inode)
4158 return -ENOMEM;
4159 nocow_inode->inum = inum;
4160 nocow_inode->offset = offset;
4161 nocow_inode->root = root;
4162 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
4163 return 0;
4164}
4165
4166#define COPY_COMPLETE 1
4167
4168static void copy_nocow_pages_worker(struct btrfs_work *work)
4169{
4170 struct scrub_copy_nocow_ctx *nocow_ctx =
4171 container_of(work, struct scrub_copy_nocow_ctx, work);
4172 struct scrub_ctx *sctx = nocow_ctx->sctx;
4173 u64 logical = nocow_ctx->logical;
4174 u64 len = nocow_ctx->len;
4175 int mirror_num = nocow_ctx->mirror_num;
4176 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4177 int ret;
4178 struct btrfs_trans_handle *trans = NULL;
4179 struct btrfs_fs_info *fs_info;
4180 struct btrfs_path *path;
4181 struct btrfs_root *root;
4182 int not_written = 0;
4183
4184 fs_info = sctx->dev_root->fs_info;
4185 root = fs_info->extent_root;
4186
4187 path = btrfs_alloc_path();
4188 if (!path) {
4189 spin_lock(&sctx->stat_lock);
4190 sctx->stat.malloc_errors++;
4191 spin_unlock(&sctx->stat_lock);
4192 not_written = 1;
4193 goto out;
4194 }
4195
4196 trans = btrfs_join_transaction(root);
4197 if (IS_ERR(trans)) {
4198 not_written = 1;
4199 goto out;
4200 }
4201
4202 ret = iterate_inodes_from_logical(logical, fs_info, path,
4203 record_inode_for_nocow, nocow_ctx);
4204 if (ret != 0 && ret != -ENOENT) {
4205 btrfs_warn(fs_info,
4206 "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d",
4207 logical, physical_for_dev_replace, len, mirror_num,
4208 ret);
4209 not_written = 1;
4210 goto out;
4211 }
4212
4213 btrfs_end_transaction(trans, root);
4214 trans = NULL;
4215 while (!list_empty(&nocow_ctx->inodes)) {
4216 struct scrub_nocow_inode *entry;
4217 entry = list_first_entry(&nocow_ctx->inodes,
4218 struct scrub_nocow_inode,
4219 list);
4220 list_del_init(&entry->list);
4221 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
4222 entry->root, nocow_ctx);
4223 kfree(entry);
4224 if (ret == COPY_COMPLETE) {
4225 ret = 0;
4226 break;
4227 } else if (ret) {
4228 break;
4229 }
4230 }
4231out:
4232 while (!list_empty(&nocow_ctx->inodes)) {
4233 struct scrub_nocow_inode *entry;
4234 entry = list_first_entry(&nocow_ctx->inodes,
4235 struct scrub_nocow_inode,
4236 list);
4237 list_del_init(&entry->list);
4238 kfree(entry);
4239 }
4240 if (trans && !IS_ERR(trans))
4241 btrfs_end_transaction(trans, root);
4242 if (not_written)
4243 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
4244 num_uncorrectable_read_errors);
4245
4246 btrfs_free_path(path);
4247 kfree(nocow_ctx);
4248
4249 scrub_pending_trans_workers_dec(sctx);
4250}
4251
4252static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
4253 u64 logical)
4254{
4255 struct extent_state *cached_state = NULL;
4256 struct btrfs_ordered_extent *ordered;
4257 struct extent_io_tree *io_tree;
4258 struct extent_map *em;
4259 u64 lockstart = start, lockend = start + len - 1;
4260 int ret = 0;
4261
4262 io_tree = &BTRFS_I(inode)->io_tree;
4263
4264 lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
4265 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
4266 if (ordered) {
4267 btrfs_put_ordered_extent(ordered);
4268 ret = 1;
4269 goto out_unlock;
4270 }
4271
4272 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
4273 if (IS_ERR(em)) {
4274 ret = PTR_ERR(em);
4275 goto out_unlock;
4276 }
4277
4278
4279
4280
4281
4282 if (em->block_start > logical ||
4283 em->block_start + em->block_len < logical + len) {
4284 free_extent_map(em);
4285 ret = 1;
4286 goto out_unlock;
4287 }
4288 free_extent_map(em);
4289
4290out_unlock:
4291 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
4292 GFP_NOFS);
4293 return ret;
4294}
4295
4296static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
4297 struct scrub_copy_nocow_ctx *nocow_ctx)
4298{
4299 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
4300 struct btrfs_key key;
4301 struct inode *inode;
4302 struct page *page;
4303 struct btrfs_root *local_root;
4304 struct extent_io_tree *io_tree;
4305 u64 physical_for_dev_replace;
4306 u64 nocow_ctx_logical;
4307 u64 len = nocow_ctx->len;
4308 unsigned long index;
4309 int srcu_index;
4310 int ret = 0;
4311 int err = 0;
4312
4313 key.objectid = root;
4314 key.type = BTRFS_ROOT_ITEM_KEY;
4315 key.offset = (u64)-1;
4316
4317 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
4318
4319 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
4320 if (IS_ERR(local_root)) {
4321 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4322 return PTR_ERR(local_root);
4323 }
4324
4325 key.type = BTRFS_INODE_ITEM_KEY;
4326 key.objectid = inum;
4327 key.offset = 0;
4328 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
4329 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4330 if (IS_ERR(inode))
4331 return PTR_ERR(inode);
4332
4333
4334 inode_lock(inode);
4335 inode_dio_wait(inode);
4336
4337 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4338 io_tree = &BTRFS_I(inode)->io_tree;
4339 nocow_ctx_logical = nocow_ctx->logical;
4340
4341 ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
4342 if (ret) {
4343 ret = ret > 0 ? 0 : ret;
4344 goto out;
4345 }
4346
4347 while (len >= PAGE_SIZE) {
4348 index = offset >> PAGE_SHIFT;
4349again:
4350 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4351 if (!page) {
4352 btrfs_err(fs_info, "find_or_create_page() failed");
4353 ret = -ENOMEM;
4354 goto out;
4355 }
4356
4357 if (PageUptodate(page)) {
4358 if (PageDirty(page))
4359 goto next_page;
4360 } else {
4361 ClearPageError(page);
4362 err = extent_read_full_page(io_tree, page,
4363 btrfs_get_extent,
4364 nocow_ctx->mirror_num);
4365 if (err) {
4366 ret = err;
4367 goto next_page;
4368 }
4369
4370 lock_page(page);
4371
4372
4373
4374
4375
4376
4377 if (page->mapping != inode->i_mapping) {
4378 unlock_page(page);
4379 put_page(page);
4380 goto again;
4381 }
4382 if (!PageUptodate(page)) {
4383 ret = -EIO;
4384 goto next_page;
4385 }
4386 }
4387
4388 ret = check_extent_to_block(inode, offset, len,
4389 nocow_ctx_logical);
4390 if (ret) {
4391 ret = ret > 0 ? 0 : ret;
4392 goto next_page;
4393 }
4394
4395 err = write_page_nocow(nocow_ctx->sctx,
4396 physical_for_dev_replace, page);
4397 if (err)
4398 ret = err;
4399next_page:
4400 unlock_page(page);
4401 put_page(page);
4402
4403 if (ret)
4404 break;
4405
4406 offset += PAGE_SIZE;
4407 physical_for_dev_replace += PAGE_SIZE;
4408 nocow_ctx_logical += PAGE_SIZE;
4409 len -= PAGE_SIZE;
4410 }
4411 ret = COPY_COMPLETE;
4412out:
4413 inode_unlock(inode);
4414 iput(inode);
4415 return ret;
4416}
4417
4418static int write_page_nocow(struct scrub_ctx *sctx,
4419 u64 physical_for_dev_replace, struct page *page)
4420{
4421 struct bio *bio;
4422 struct btrfs_device *dev;
4423 int ret;
4424
4425 dev = sctx->wr_ctx.tgtdev;
4426 if (!dev)
4427 return -EIO;
4428 if (!dev->bdev) {
4429 btrfs_warn_rl(dev->dev_root->fs_info,
4430 "scrub write_page_nocow(bdev == NULL) is unexpected");
4431 return -EIO;
4432 }
4433 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
4434 if (!bio) {
4435 spin_lock(&sctx->stat_lock);
4436 sctx->stat.malloc_errors++;
4437 spin_unlock(&sctx->stat_lock);
4438 return -ENOMEM;
4439 }
4440 bio->bi_iter.bi_size = 0;
4441 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
4442 bio->bi_bdev = dev->bdev;
4443 bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_SYNC);
4444 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
4445 if (ret != PAGE_SIZE) {
4446leave_with_eio:
4447 bio_put(bio);
4448 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
4449 return -EIO;
4450 }
4451
4452 if (btrfsic_submit_bio_wait(bio))
4453 goto leave_with_eio;
4454
4455 bio_put(bio);
4456 return 0;
4457}
4458