1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_recover {
67 atomic_t refs;
68 struct btrfs_bio *bbio;
69 u64 map_length;
70};
71
72struct scrub_page {
73 struct scrub_block *sblock;
74 struct page *page;
75 struct btrfs_device *dev;
76 struct list_head list;
77 u64 flags;
78 u64 generation;
79 u64 logical;
80 u64 physical;
81 u64 physical_for_dev_replace;
82 atomic_t refs;
83 struct {
84 unsigned int mirror_num:8;
85 unsigned int have_csum:1;
86 unsigned int io_error:1;
87 };
88 u8 csum[BTRFS_CSUM_SIZE];
89
90 struct scrub_recover *recover;
91};
92
93struct scrub_bio {
94 int index;
95 struct scrub_ctx *sctx;
96 struct btrfs_device *dev;
97 struct bio *bio;
98 int err;
99 u64 logical;
100 u64 physical;
101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
102 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
103#else
104 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
105#endif
106 int page_count;
107 int next_free;
108 struct btrfs_work work;
109};
110
111struct scrub_block {
112 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
113 int page_count;
114 atomic_t outstanding_pages;
115 atomic_t refs;
116 struct scrub_ctx *sctx;
117 struct scrub_parity *sparity;
118 struct {
119 unsigned int header_error:1;
120 unsigned int checksum_error:1;
121 unsigned int no_io_error_seen:1;
122 unsigned int generation_error:1;
123
124
125
126 unsigned int data_corrected:1;
127 };
128 struct btrfs_work work;
129};
130
131
132struct scrub_parity {
133 struct scrub_ctx *sctx;
134
135 struct btrfs_device *scrub_dev;
136
137 u64 logic_start;
138
139 u64 logic_end;
140
141 int nsectors;
142
143 int stripe_len;
144
145 atomic_t refs;
146
147 struct list_head spages;
148
149
150 struct btrfs_work work;
151
152
153 unsigned long *dbitmap;
154
155
156
157
158
159 unsigned long *ebitmap;
160
161 unsigned long bitmap[0];
162};
163
164struct scrub_wr_ctx {
165 struct scrub_bio *wr_curr_bio;
166 struct btrfs_device *tgtdev;
167 int pages_per_wr_bio;
168 atomic_t flush_all_writes;
169 struct mutex wr_lock;
170};
171
172struct scrub_ctx {
173 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
174 struct btrfs_root *dev_root;
175 int first_free;
176 int curr;
177 atomic_t bios_in_flight;
178 atomic_t workers_pending;
179 spinlock_t list_lock;
180 wait_queue_head_t list_wait;
181 u16 csum_size;
182 struct list_head csum_list;
183 atomic_t cancel_req;
184 int readonly;
185 int pages_per_rd_bio;
186 u32 sectorsize;
187 u32 nodesize;
188
189 int is_dev_replace;
190 struct scrub_wr_ctx wr_ctx;
191
192
193
194
195 struct btrfs_scrub_progress stat;
196 spinlock_t stat_lock;
197
198
199
200
201
202
203
204
205 atomic_t refs;
206};
207
208struct scrub_fixup_nodatasum {
209 struct scrub_ctx *sctx;
210 struct btrfs_device *dev;
211 u64 logical;
212 struct btrfs_root *root;
213 struct btrfs_work work;
214 int mirror_num;
215};
216
217struct scrub_nocow_inode {
218 u64 inum;
219 u64 offset;
220 u64 root;
221 struct list_head list;
222};
223
224struct scrub_copy_nocow_ctx {
225 struct scrub_ctx *sctx;
226 u64 logical;
227 u64 len;
228 int mirror_num;
229 u64 physical_for_dev_replace;
230 struct list_head inodes;
231 struct btrfs_work work;
232};
233
234struct scrub_warning {
235 struct btrfs_path *path;
236 u64 extent_item_size;
237 const char *errstr;
238 sector_t sector;
239 u64 logical;
240 struct btrfs_device *dev;
241};
242
243static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
244static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
245static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
246static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
247static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
248static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
249 struct scrub_block *sblocks_for_recheck);
250static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
251 struct scrub_block *sblock,
252 int retry_failed_mirror);
253static void scrub_recheck_block_checksum(struct scrub_block *sblock);
254static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
255 struct scrub_block *sblock_good);
256static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
257 struct scrub_block *sblock_good,
258 int page_num, int force_write);
259static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
260static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
261 int page_num);
262static int scrub_checksum_data(struct scrub_block *sblock);
263static int scrub_checksum_tree_block(struct scrub_block *sblock);
264static int scrub_checksum_super(struct scrub_block *sblock);
265static void scrub_block_get(struct scrub_block *sblock);
266static void scrub_block_put(struct scrub_block *sblock);
267static void scrub_page_get(struct scrub_page *spage);
268static void scrub_page_put(struct scrub_page *spage);
269static void scrub_parity_get(struct scrub_parity *sparity);
270static void scrub_parity_put(struct scrub_parity *sparity);
271static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
272 struct scrub_page *spage);
273static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
274 u64 physical, struct btrfs_device *dev, u64 flags,
275 u64 gen, int mirror_num, u8 *csum, int force,
276 u64 physical_for_dev_replace);
277static void scrub_bio_end_io(struct bio *bio);
278static void scrub_bio_end_io_worker(struct btrfs_work *work);
279static void scrub_block_complete(struct scrub_block *sblock);
280static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
281 u64 extent_logical, u64 extent_len,
282 u64 *extent_physical,
283 struct btrfs_device **extent_dev,
284 int *extent_mirror_num);
285static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
286 struct scrub_wr_ctx *wr_ctx,
287 struct btrfs_fs_info *fs_info,
288 struct btrfs_device *dev,
289 int is_dev_replace);
290static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
291static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
292 struct scrub_page *spage);
293static void scrub_wr_submit(struct scrub_ctx *sctx);
294static void scrub_wr_bio_end_io(struct bio *bio);
295static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
296static int write_page_nocow(struct scrub_ctx *sctx,
297 u64 physical_for_dev_replace, struct page *page);
298static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
299 struct scrub_copy_nocow_ctx *ctx);
300static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
301 int mirror_num, u64 physical_for_dev_replace);
302static void copy_nocow_pages_worker(struct btrfs_work *work);
303static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
304static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
305static void scrub_put_ctx(struct scrub_ctx *sctx);
306
307
308static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
309{
310 atomic_inc(&sctx->refs);
311 atomic_inc(&sctx->bios_in_flight);
312}
313
314static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
315{
316 atomic_dec(&sctx->bios_in_flight);
317 wake_up(&sctx->list_wait);
318 scrub_put_ctx(sctx);
319}
320
321static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
322{
323 while (atomic_read(&fs_info->scrub_pause_req)) {
324 mutex_unlock(&fs_info->scrub_lock);
325 wait_event(fs_info->scrub_pause_wait,
326 atomic_read(&fs_info->scrub_pause_req) == 0);
327 mutex_lock(&fs_info->scrub_lock);
328 }
329}
330
331static void scrub_pause_on(struct btrfs_fs_info *fs_info)
332{
333 atomic_inc(&fs_info->scrubs_paused);
334 wake_up(&fs_info->scrub_pause_wait);
335}
336
337static void scrub_pause_off(struct btrfs_fs_info *fs_info)
338{
339 mutex_lock(&fs_info->scrub_lock);
340 __scrub_blocked_if_needed(fs_info);
341 atomic_dec(&fs_info->scrubs_paused);
342 mutex_unlock(&fs_info->scrub_lock);
343
344 wake_up(&fs_info->scrub_pause_wait);
345}
346
347static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
348{
349 scrub_pause_on(fs_info);
350 scrub_pause_off(fs_info);
351}
352
353
354
355
356
357static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
358{
359 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
360
361 atomic_inc(&sctx->refs);
362
363
364
365
366
367
368
369
370
371 mutex_lock(&fs_info->scrub_lock);
372 atomic_inc(&fs_info->scrubs_running);
373 atomic_inc(&fs_info->scrubs_paused);
374 mutex_unlock(&fs_info->scrub_lock);
375
376
377
378
379
380
381
382
383 wake_up(&fs_info->scrub_pause_wait);
384
385 atomic_inc(&sctx->workers_pending);
386}
387
388
389static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
390{
391 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
392
393
394
395
396
397 mutex_lock(&fs_info->scrub_lock);
398 atomic_dec(&fs_info->scrubs_running);
399 atomic_dec(&fs_info->scrubs_paused);
400 mutex_unlock(&fs_info->scrub_lock);
401 atomic_dec(&sctx->workers_pending);
402 wake_up(&fs_info->scrub_pause_wait);
403 wake_up(&sctx->list_wait);
404 scrub_put_ctx(sctx);
405}
406
407static void scrub_free_csums(struct scrub_ctx *sctx)
408{
409 while (!list_empty(&sctx->csum_list)) {
410 struct btrfs_ordered_sum *sum;
411 sum = list_first_entry(&sctx->csum_list,
412 struct btrfs_ordered_sum, list);
413 list_del(&sum->list);
414 kfree(sum);
415 }
416}
417
418static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
419{
420 int i;
421
422 if (!sctx)
423 return;
424
425 scrub_free_wr_ctx(&sctx->wr_ctx);
426
427
428 if (sctx->curr != -1) {
429 struct scrub_bio *sbio = sctx->bios[sctx->curr];
430
431 for (i = 0; i < sbio->page_count; i++) {
432 WARN_ON(!sbio->pagev[i]->page);
433 scrub_block_put(sbio->pagev[i]->sblock);
434 }
435 bio_put(sbio->bio);
436 }
437
438 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
439 struct scrub_bio *sbio = sctx->bios[i];
440
441 if (!sbio)
442 break;
443 kfree(sbio);
444 }
445
446 scrub_free_csums(sctx);
447 kfree(sctx);
448}
449
450static void scrub_put_ctx(struct scrub_ctx *sctx)
451{
452 if (atomic_dec_and_test(&sctx->refs))
453 scrub_free_ctx(sctx);
454}
455
456static noinline_for_stack
457struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
458{
459 struct scrub_ctx *sctx;
460 int i;
461 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
462 int ret;
463
464 sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
465 if (!sctx)
466 goto nomem;
467 atomic_set(&sctx->refs, 1);
468 sctx->is_dev_replace = is_dev_replace;
469 sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
470 sctx->curr = -1;
471 sctx->dev_root = dev->dev_root;
472 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
473 struct scrub_bio *sbio;
474
475 sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
476 if (!sbio)
477 goto nomem;
478 sctx->bios[i] = sbio;
479
480 sbio->index = i;
481 sbio->sctx = sctx;
482 sbio->page_count = 0;
483 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
484 scrub_bio_end_io_worker, NULL, NULL);
485
486 if (i != SCRUB_BIOS_PER_SCTX - 1)
487 sctx->bios[i]->next_free = i + 1;
488 else
489 sctx->bios[i]->next_free = -1;
490 }
491 sctx->first_free = 0;
492 sctx->nodesize = dev->dev_root->nodesize;
493 sctx->sectorsize = dev->dev_root->sectorsize;
494 atomic_set(&sctx->bios_in_flight, 0);
495 atomic_set(&sctx->workers_pending, 0);
496 atomic_set(&sctx->cancel_req, 0);
497 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
498 INIT_LIST_HEAD(&sctx->csum_list);
499
500 spin_lock_init(&sctx->list_lock);
501 spin_lock_init(&sctx->stat_lock);
502 init_waitqueue_head(&sctx->list_wait);
503
504 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
505 fs_info->dev_replace.tgtdev, is_dev_replace);
506 if (ret) {
507 scrub_free_ctx(sctx);
508 return ERR_PTR(ret);
509 }
510 return sctx;
511
512nomem:
513 scrub_free_ctx(sctx);
514 return ERR_PTR(-ENOMEM);
515}
516
517static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
518 void *warn_ctx)
519{
520 u64 isize;
521 u32 nlink;
522 int ret;
523 int i;
524 struct extent_buffer *eb;
525 struct btrfs_inode_item *inode_item;
526 struct scrub_warning *swarn = warn_ctx;
527 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
528 struct inode_fs_paths *ipath = NULL;
529 struct btrfs_root *local_root;
530 struct btrfs_key root_key;
531 struct btrfs_key key;
532
533 root_key.objectid = root;
534 root_key.type = BTRFS_ROOT_ITEM_KEY;
535 root_key.offset = (u64)-1;
536 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
537 if (IS_ERR(local_root)) {
538 ret = PTR_ERR(local_root);
539 goto err;
540 }
541
542
543
544
545 key.objectid = inum;
546 key.type = BTRFS_INODE_ITEM_KEY;
547 key.offset = 0;
548
549 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
550 if (ret) {
551 btrfs_release_path(swarn->path);
552 goto err;
553 }
554
555 eb = swarn->path->nodes[0];
556 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
557 struct btrfs_inode_item);
558 isize = btrfs_inode_size(eb, inode_item);
559 nlink = btrfs_inode_nlink(eb, inode_item);
560 btrfs_release_path(swarn->path);
561
562 ipath = init_ipath(4096, local_root, swarn->path);
563 if (IS_ERR(ipath)) {
564 ret = PTR_ERR(ipath);
565 ipath = NULL;
566 goto err;
567 }
568 ret = paths_from_inode(inum, ipath);
569
570 if (ret < 0)
571 goto err;
572
573
574
575
576
577 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
578 btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev "
579 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
580 "length %llu, links %u (path: %s)", swarn->errstr,
581 swarn->logical, rcu_str_deref(swarn->dev->name),
582 (unsigned long long)swarn->sector, root, inum, offset,
583 min(isize - offset, (u64)PAGE_SIZE), nlink,
584 (char *)(unsigned long)ipath->fspath->val[i]);
585
586 free_ipath(ipath);
587 return 0;
588
589err:
590 btrfs_warn_in_rcu(fs_info, "%s at logical %llu on dev "
591 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
592 "resolving failed with ret=%d", swarn->errstr,
593 swarn->logical, rcu_str_deref(swarn->dev->name),
594 (unsigned long long)swarn->sector, root, inum, offset, ret);
595
596 free_ipath(ipath);
597 return 0;
598}
599
600static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
601{
602 struct btrfs_device *dev;
603 struct btrfs_fs_info *fs_info;
604 struct btrfs_path *path;
605 struct btrfs_key found_key;
606 struct extent_buffer *eb;
607 struct btrfs_extent_item *ei;
608 struct scrub_warning swarn;
609 unsigned long ptr = 0;
610 u64 extent_item_pos;
611 u64 flags = 0;
612 u64 ref_root;
613 u32 item_size;
614 u8 ref_level = 0;
615 int ret;
616
617 WARN_ON(sblock->page_count < 1);
618 dev = sblock->pagev[0]->dev;
619 fs_info = sblock->sctx->dev_root->fs_info;
620
621 path = btrfs_alloc_path();
622 if (!path)
623 return;
624
625 swarn.sector = (sblock->pagev[0]->physical) >> 9;
626 swarn.logical = sblock->pagev[0]->logical;
627 swarn.errstr = errstr;
628 swarn.dev = NULL;
629
630 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
631 &flags);
632 if (ret < 0)
633 goto out;
634
635 extent_item_pos = swarn.logical - found_key.objectid;
636 swarn.extent_item_size = found_key.offset;
637
638 eb = path->nodes[0];
639 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
640 item_size = btrfs_item_size_nr(eb, path->slots[0]);
641
642 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
643 do {
644 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
645 item_size, &ref_root,
646 &ref_level);
647 btrfs_warn_in_rcu(fs_info,
648 "%s at logical %llu on dev %s, "
649 "sector %llu: metadata %s (level %d) in tree "
650 "%llu", errstr, swarn.logical,
651 rcu_str_deref(dev->name),
652 (unsigned long long)swarn.sector,
653 ref_level ? "node" : "leaf",
654 ret < 0 ? -1 : ref_level,
655 ret < 0 ? -1 : ref_root);
656 } while (ret != 1);
657 btrfs_release_path(path);
658 } else {
659 btrfs_release_path(path);
660 swarn.path = path;
661 swarn.dev = dev;
662 iterate_extent_inodes(fs_info, found_key.objectid,
663 extent_item_pos, 1,
664 scrub_print_warning_inode, &swarn);
665 }
666
667out:
668 btrfs_free_path(path);
669}
670
671static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
672{
673 struct page *page = NULL;
674 unsigned long index;
675 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
676 int ret;
677 int corrected = 0;
678 struct btrfs_key key;
679 struct inode *inode = NULL;
680 struct btrfs_fs_info *fs_info;
681 u64 end = offset + PAGE_SIZE - 1;
682 struct btrfs_root *local_root;
683 int srcu_index;
684
685 key.objectid = root;
686 key.type = BTRFS_ROOT_ITEM_KEY;
687 key.offset = (u64)-1;
688
689 fs_info = fixup->root->fs_info;
690 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
691
692 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
693 if (IS_ERR(local_root)) {
694 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
695 return PTR_ERR(local_root);
696 }
697
698 key.type = BTRFS_INODE_ITEM_KEY;
699 key.objectid = inum;
700 key.offset = 0;
701 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
702 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
703 if (IS_ERR(inode))
704 return PTR_ERR(inode);
705
706 index = offset >> PAGE_SHIFT;
707
708 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
709 if (!page) {
710 ret = -ENOMEM;
711 goto out;
712 }
713
714 if (PageUptodate(page)) {
715 if (PageDirty(page)) {
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732 ret = -EIO;
733 goto out;
734 }
735 ret = repair_io_failure(inode, offset, PAGE_SIZE,
736 fixup->logical, page,
737 offset - page_offset(page),
738 fixup->mirror_num);
739 unlock_page(page);
740 corrected = !ret;
741 } else {
742
743
744
745
746
747 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
748 EXTENT_DAMAGED, GFP_NOFS);
749 if (ret) {
750
751 WARN_ON(ret > 0);
752 if (ret > 0)
753 ret = -EFAULT;
754 goto out;
755 }
756
757 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
758 btrfs_get_extent,
759 fixup->mirror_num);
760 wait_on_page_locked(page);
761
762 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
763 end, EXTENT_DAMAGED, 0, NULL);
764 if (!corrected)
765 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
766 EXTENT_DAMAGED, GFP_NOFS);
767 }
768
769out:
770 if (page)
771 put_page(page);
772
773 iput(inode);
774
775 if (ret < 0)
776 return ret;
777
778 if (ret == 0 && corrected) {
779
780
781
782
783 return 1;
784 }
785
786 return -EIO;
787}
788
789static void scrub_fixup_nodatasum(struct btrfs_work *work)
790{
791 int ret;
792 struct scrub_fixup_nodatasum *fixup;
793 struct scrub_ctx *sctx;
794 struct btrfs_trans_handle *trans = NULL;
795 struct btrfs_path *path;
796 int uncorrectable = 0;
797
798 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
799 sctx = fixup->sctx;
800
801 path = btrfs_alloc_path();
802 if (!path) {
803 spin_lock(&sctx->stat_lock);
804 ++sctx->stat.malloc_errors;
805 spin_unlock(&sctx->stat_lock);
806 uncorrectable = 1;
807 goto out;
808 }
809
810 trans = btrfs_join_transaction(fixup->root);
811 if (IS_ERR(trans)) {
812 uncorrectable = 1;
813 goto out;
814 }
815
816
817
818
819
820
821
822
823
824
825 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
826 path, scrub_fixup_readpage,
827 fixup);
828 if (ret < 0) {
829 uncorrectable = 1;
830 goto out;
831 }
832 WARN_ON(ret != 1);
833
834 spin_lock(&sctx->stat_lock);
835 ++sctx->stat.corrected_errors;
836 spin_unlock(&sctx->stat_lock);
837
838out:
839 if (trans && !IS_ERR(trans))
840 btrfs_end_transaction(trans, fixup->root);
841 if (uncorrectable) {
842 spin_lock(&sctx->stat_lock);
843 ++sctx->stat.uncorrectable_errors;
844 spin_unlock(&sctx->stat_lock);
845 btrfs_dev_replace_stats_inc(
846 &sctx->dev_root->fs_info->dev_replace.
847 num_uncorrectable_read_errors);
848 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
849 "unable to fixup (nodatasum) error at logical %llu on dev %s",
850 fixup->logical, rcu_str_deref(fixup->dev->name));
851 }
852
853 btrfs_free_path(path);
854 kfree(fixup);
855
856 scrub_pending_trans_workers_dec(sctx);
857}
858
859static inline void scrub_get_recover(struct scrub_recover *recover)
860{
861 atomic_inc(&recover->refs);
862}
863
864static inline void scrub_put_recover(struct scrub_recover *recover)
865{
866 if (atomic_dec_and_test(&recover->refs)) {
867 btrfs_put_bbio(recover->bbio);
868 kfree(recover);
869 }
870}
871
872
873
874
875
876
877
878
879
880static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
881{
882 struct scrub_ctx *sctx = sblock_to_check->sctx;
883 struct btrfs_device *dev;
884 struct btrfs_fs_info *fs_info;
885 u64 length;
886 u64 logical;
887 unsigned int failed_mirror_index;
888 unsigned int is_metadata;
889 unsigned int have_csum;
890 struct scrub_block *sblocks_for_recheck;
891 struct scrub_block *sblock_bad;
892 int ret;
893 int mirror_index;
894 int page_num;
895 int success;
896 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
897 DEFAULT_RATELIMIT_BURST);
898
899 BUG_ON(sblock_to_check->page_count < 1);
900 fs_info = sctx->dev_root->fs_info;
901 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
902
903
904
905
906
907 spin_lock(&sctx->stat_lock);
908 ++sctx->stat.super_errors;
909 spin_unlock(&sctx->stat_lock);
910 return 0;
911 }
912 length = sblock_to_check->page_count * PAGE_SIZE;
913 logical = sblock_to_check->pagev[0]->logical;
914 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
915 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
916 is_metadata = !(sblock_to_check->pagev[0]->flags &
917 BTRFS_EXTENT_FLAG_DATA);
918 have_csum = sblock_to_check->pagev[0]->have_csum;
919 dev = sblock_to_check->pagev[0]->dev;
920
921 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
922 sblocks_for_recheck = NULL;
923 goto nodatasum_case;
924 }
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955 sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
956 sizeof(*sblocks_for_recheck), GFP_NOFS);
957 if (!sblocks_for_recheck) {
958 spin_lock(&sctx->stat_lock);
959 sctx->stat.malloc_errors++;
960 sctx->stat.read_errors++;
961 sctx->stat.uncorrectable_errors++;
962 spin_unlock(&sctx->stat_lock);
963 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
964 goto out;
965 }
966
967
968 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
969 if (ret) {
970 spin_lock(&sctx->stat_lock);
971 sctx->stat.read_errors++;
972 sctx->stat.uncorrectable_errors++;
973 spin_unlock(&sctx->stat_lock);
974 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
975 goto out;
976 }
977 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
978 sblock_bad = sblocks_for_recheck + failed_mirror_index;
979
980
981 scrub_recheck_block(fs_info, sblock_bad, 1);
982
983 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
984 sblock_bad->no_io_error_seen) {
985
986
987
988
989
990
991
992
993 spin_lock(&sctx->stat_lock);
994 sctx->stat.unverified_errors++;
995 sblock_to_check->data_corrected = 1;
996 spin_unlock(&sctx->stat_lock);
997
998 if (sctx->is_dev_replace)
999 scrub_write_block_to_dev_replace(sblock_bad);
1000 goto out;
1001 }
1002
1003 if (!sblock_bad->no_io_error_seen) {
1004 spin_lock(&sctx->stat_lock);
1005 sctx->stat.read_errors++;
1006 spin_unlock(&sctx->stat_lock);
1007 if (__ratelimit(&_rs))
1008 scrub_print_warning("i/o error", sblock_to_check);
1009 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
1010 } else if (sblock_bad->checksum_error) {
1011 spin_lock(&sctx->stat_lock);
1012 sctx->stat.csum_errors++;
1013 spin_unlock(&sctx->stat_lock);
1014 if (__ratelimit(&_rs))
1015 scrub_print_warning("checksum error", sblock_to_check);
1016 btrfs_dev_stat_inc_and_print(dev,
1017 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1018 } else if (sblock_bad->header_error) {
1019 spin_lock(&sctx->stat_lock);
1020 sctx->stat.verify_errors++;
1021 spin_unlock(&sctx->stat_lock);
1022 if (__ratelimit(&_rs))
1023 scrub_print_warning("checksum/header error",
1024 sblock_to_check);
1025 if (sblock_bad->generation_error)
1026 btrfs_dev_stat_inc_and_print(dev,
1027 BTRFS_DEV_STAT_GENERATION_ERRS);
1028 else
1029 btrfs_dev_stat_inc_and_print(dev,
1030 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1031 }
1032
1033 if (sctx->readonly) {
1034 ASSERT(!sctx->is_dev_replace);
1035 goto out;
1036 }
1037
1038 if (!is_metadata && !have_csum) {
1039 struct scrub_fixup_nodatasum *fixup_nodatasum;
1040
1041 WARN_ON(sctx->is_dev_replace);
1042
1043nodatasum_case:
1044
1045
1046
1047
1048
1049
1050
1051
1052 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
1053 if (!fixup_nodatasum)
1054 goto did_not_correct_error;
1055 fixup_nodatasum->sctx = sctx;
1056 fixup_nodatasum->dev = dev;
1057 fixup_nodatasum->logical = logical;
1058 fixup_nodatasum->root = fs_info->extent_root;
1059 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1060 scrub_pending_trans_workers_inc(sctx);
1061 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1062 scrub_fixup_nodatasum, NULL, NULL);
1063 btrfs_queue_work(fs_info->scrub_workers,
1064 &fixup_nodatasum->work);
1065 goto out;
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083 for (mirror_index = 0;
1084 mirror_index < BTRFS_MAX_MIRRORS &&
1085 sblocks_for_recheck[mirror_index].page_count > 0;
1086 mirror_index++) {
1087 struct scrub_block *sblock_other;
1088
1089 if (mirror_index == failed_mirror_index)
1090 continue;
1091 sblock_other = sblocks_for_recheck + mirror_index;
1092
1093
1094 scrub_recheck_block(fs_info, sblock_other, 0);
1095
1096 if (!sblock_other->header_error &&
1097 !sblock_other->checksum_error &&
1098 sblock_other->no_io_error_seen) {
1099 if (sctx->is_dev_replace) {
1100 scrub_write_block_to_dev_replace(sblock_other);
1101 goto corrected_error;
1102 } else {
1103 ret = scrub_repair_block_from_good_copy(
1104 sblock_bad, sblock_other);
1105 if (!ret)
1106 goto corrected_error;
1107 }
1108 }
1109 }
1110
1111 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1112 goto did_not_correct_error;
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138 success = 1;
1139 for (page_num = 0; page_num < sblock_bad->page_count;
1140 page_num++) {
1141 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1142 struct scrub_block *sblock_other = NULL;
1143
1144
1145 if (!page_bad->io_error && !sctx->is_dev_replace)
1146 continue;
1147
1148
1149 if (page_bad->io_error) {
1150 for (mirror_index = 0;
1151 mirror_index < BTRFS_MAX_MIRRORS &&
1152 sblocks_for_recheck[mirror_index].page_count > 0;
1153 mirror_index++) {
1154 if (!sblocks_for_recheck[mirror_index].
1155 pagev[page_num]->io_error) {
1156 sblock_other = sblocks_for_recheck +
1157 mirror_index;
1158 break;
1159 }
1160 }
1161 if (!sblock_other)
1162 success = 0;
1163 }
1164
1165 if (sctx->is_dev_replace) {
1166
1167
1168
1169
1170
1171
1172
1173 if (!sblock_other)
1174 sblock_other = sblock_bad;
1175
1176 if (scrub_write_page_to_dev_replace(sblock_other,
1177 page_num) != 0) {
1178 btrfs_dev_replace_stats_inc(
1179 &sctx->dev_root->
1180 fs_info->dev_replace.
1181 num_write_errors);
1182 success = 0;
1183 }
1184 } else if (sblock_other) {
1185 ret = scrub_repair_page_from_good_copy(sblock_bad,
1186 sblock_other,
1187 page_num, 0);
1188 if (0 == ret)
1189 page_bad->io_error = 0;
1190 else
1191 success = 0;
1192 }
1193 }
1194
1195 if (success && !sctx->is_dev_replace) {
1196 if (is_metadata || have_csum) {
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206 scrub_recheck_block(fs_info, sblock_bad, 1);
1207 if (!sblock_bad->header_error &&
1208 !sblock_bad->checksum_error &&
1209 sblock_bad->no_io_error_seen)
1210 goto corrected_error;
1211 else
1212 goto did_not_correct_error;
1213 } else {
1214corrected_error:
1215 spin_lock(&sctx->stat_lock);
1216 sctx->stat.corrected_errors++;
1217 sblock_to_check->data_corrected = 1;
1218 spin_unlock(&sctx->stat_lock);
1219 btrfs_err_rl_in_rcu(fs_info,
1220 "fixed up error at logical %llu on dev %s",
1221 logical, rcu_str_deref(dev->name));
1222 }
1223 } else {
1224did_not_correct_error:
1225 spin_lock(&sctx->stat_lock);
1226 sctx->stat.uncorrectable_errors++;
1227 spin_unlock(&sctx->stat_lock);
1228 btrfs_err_rl_in_rcu(fs_info,
1229 "unable to fixup (regular) error at logical %llu on dev %s",
1230 logical, rcu_str_deref(dev->name));
1231 }
1232
1233out:
1234 if (sblocks_for_recheck) {
1235 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1236 mirror_index++) {
1237 struct scrub_block *sblock = sblocks_for_recheck +
1238 mirror_index;
1239 struct scrub_recover *recover;
1240 int page_index;
1241
1242 for (page_index = 0; page_index < sblock->page_count;
1243 page_index++) {
1244 sblock->pagev[page_index]->sblock = NULL;
1245 recover = sblock->pagev[page_index]->recover;
1246 if (recover) {
1247 scrub_put_recover(recover);
1248 sblock->pagev[page_index]->recover =
1249 NULL;
1250 }
1251 scrub_page_put(sblock->pagev[page_index]);
1252 }
1253 }
1254 kfree(sblocks_for_recheck);
1255 }
1256
1257 return 0;
1258}
1259
1260static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1261{
1262 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1263 return 2;
1264 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1265 return 3;
1266 else
1267 return (int)bbio->num_stripes;
1268}
1269
1270static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1271 u64 *raid_map,
1272 u64 mapped_length,
1273 int nstripes, int mirror,
1274 int *stripe_index,
1275 u64 *stripe_offset)
1276{
1277 int i;
1278
1279 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1280
1281 for (i = 0; i < nstripes; i++) {
1282 if (raid_map[i] == RAID6_Q_STRIPE ||
1283 raid_map[i] == RAID5_P_STRIPE)
1284 continue;
1285
1286 if (logical >= raid_map[i] &&
1287 logical < raid_map[i] + mapped_length)
1288 break;
1289 }
1290
1291 *stripe_index = i;
1292 *stripe_offset = logical - raid_map[i];
1293 } else {
1294
1295 *stripe_index = mirror;
1296 *stripe_offset = 0;
1297 }
1298}
1299
1300static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1301 struct scrub_block *sblocks_for_recheck)
1302{
1303 struct scrub_ctx *sctx = original_sblock->sctx;
1304 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1305 u64 length = original_sblock->page_count * PAGE_SIZE;
1306 u64 logical = original_sblock->pagev[0]->logical;
1307 u64 generation = original_sblock->pagev[0]->generation;
1308 u64 flags = original_sblock->pagev[0]->flags;
1309 u64 have_csum = original_sblock->pagev[0]->have_csum;
1310 struct scrub_recover *recover;
1311 struct btrfs_bio *bbio;
1312 u64 sublen;
1313 u64 mapped_length;
1314 u64 stripe_offset;
1315 int stripe_index;
1316 int page_index = 0;
1317 int mirror_index;
1318 int nmirrors;
1319 int ret;
1320
1321
1322
1323
1324
1325
1326
1327 while (length > 0) {
1328 sublen = min_t(u64, length, PAGE_SIZE);
1329 mapped_length = sublen;
1330 bbio = NULL;
1331
1332
1333
1334
1335
1336 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1337 &mapped_length, &bbio, 0, 1);
1338 if (ret || !bbio || mapped_length < sublen) {
1339 btrfs_put_bbio(bbio);
1340 return -EIO;
1341 }
1342
1343 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1344 if (!recover) {
1345 btrfs_put_bbio(bbio);
1346 return -ENOMEM;
1347 }
1348
1349 atomic_set(&recover->refs, 1);
1350 recover->bbio = bbio;
1351 recover->map_length = mapped_length;
1352
1353 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1354
1355 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1356
1357 for (mirror_index = 0; mirror_index < nmirrors;
1358 mirror_index++) {
1359 struct scrub_block *sblock;
1360 struct scrub_page *page;
1361
1362 sblock = sblocks_for_recheck + mirror_index;
1363 sblock->sctx = sctx;
1364
1365 page = kzalloc(sizeof(*page), GFP_NOFS);
1366 if (!page) {
1367leave_nomem:
1368 spin_lock(&sctx->stat_lock);
1369 sctx->stat.malloc_errors++;
1370 spin_unlock(&sctx->stat_lock);
1371 scrub_put_recover(recover);
1372 return -ENOMEM;
1373 }
1374 scrub_page_get(page);
1375 sblock->pagev[page_index] = page;
1376 page->sblock = sblock;
1377 page->flags = flags;
1378 page->generation = generation;
1379 page->logical = logical;
1380 page->have_csum = have_csum;
1381 if (have_csum)
1382 memcpy(page->csum,
1383 original_sblock->pagev[0]->csum,
1384 sctx->csum_size);
1385
1386 scrub_stripe_index_and_offset(logical,
1387 bbio->map_type,
1388 bbio->raid_map,
1389 mapped_length,
1390 bbio->num_stripes -
1391 bbio->num_tgtdevs,
1392 mirror_index,
1393 &stripe_index,
1394 &stripe_offset);
1395 page->physical = bbio->stripes[stripe_index].physical +
1396 stripe_offset;
1397 page->dev = bbio->stripes[stripe_index].dev;
1398
1399 BUG_ON(page_index >= original_sblock->page_count);
1400 page->physical_for_dev_replace =
1401 original_sblock->pagev[page_index]->
1402 physical_for_dev_replace;
1403
1404 page->mirror_num = mirror_index + 1;
1405 sblock->page_count++;
1406 page->page = alloc_page(GFP_NOFS);
1407 if (!page->page)
1408 goto leave_nomem;
1409
1410 scrub_get_recover(recover);
1411 page->recover = recover;
1412 }
1413 scrub_put_recover(recover);
1414 length -= sublen;
1415 logical += sublen;
1416 page_index++;
1417 }
1418
1419 return 0;
1420}
1421
1422struct scrub_bio_ret {
1423 struct completion event;
1424 int error;
1425};
1426
1427static void scrub_bio_wait_endio(struct bio *bio)
1428{
1429 struct scrub_bio_ret *ret = bio->bi_private;
1430
1431 ret->error = bio->bi_error;
1432 complete(&ret->event);
1433}
1434
1435static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1436{
1437 return page->recover &&
1438 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
1439}
1440
1441static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1442 struct bio *bio,
1443 struct scrub_page *page)
1444{
1445 struct scrub_bio_ret done;
1446 int ret;
1447
1448 init_completion(&done.event);
1449 done.error = 0;
1450 bio->bi_iter.bi_sector = page->logical >> 9;
1451 bio->bi_private = &done;
1452 bio->bi_end_io = scrub_bio_wait_endio;
1453
1454 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1455 page->recover->map_length,
1456 page->mirror_num, 0);
1457 if (ret)
1458 return ret;
1459
1460 wait_for_completion(&done.event);
1461 if (done.error)
1462 return -EIO;
1463
1464 return 0;
1465}
1466
1467
1468
1469
1470
1471
1472
1473
1474static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1475 struct scrub_block *sblock,
1476 int retry_failed_mirror)
1477{
1478 int page_num;
1479
1480 sblock->no_io_error_seen = 1;
1481
1482 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1483 struct bio *bio;
1484 struct scrub_page *page = sblock->pagev[page_num];
1485
1486 if (page->dev->bdev == NULL) {
1487 page->io_error = 1;
1488 sblock->no_io_error_seen = 0;
1489 continue;
1490 }
1491
1492 WARN_ON(!page->page);
1493 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1494 if (!bio) {
1495 page->io_error = 1;
1496 sblock->no_io_error_seen = 0;
1497 continue;
1498 }
1499 bio->bi_bdev = page->dev->bdev;
1500
1501 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1502 if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
1503 if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
1504 sblock->no_io_error_seen = 0;
1505 } else {
1506 bio->bi_iter.bi_sector = page->physical >> 9;
1507
1508 if (btrfsic_submit_bio_wait(READ, bio))
1509 sblock->no_io_error_seen = 0;
1510 }
1511
1512 bio_put(bio);
1513 }
1514
1515 if (sblock->no_io_error_seen)
1516 scrub_recheck_block_checksum(sblock);
1517}
1518
1519static inline int scrub_check_fsid(u8 fsid[],
1520 struct scrub_page *spage)
1521{
1522 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1523 int ret;
1524
1525 ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE);
1526 return !ret;
1527}
1528
1529static void scrub_recheck_block_checksum(struct scrub_block *sblock)
1530{
1531 sblock->header_error = 0;
1532 sblock->checksum_error = 0;
1533 sblock->generation_error = 0;
1534
1535 if (sblock->pagev[0]->flags & BTRFS_EXTENT_FLAG_DATA)
1536 scrub_checksum_data(sblock);
1537 else
1538 scrub_checksum_tree_block(sblock);
1539}
1540
1541static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1542 struct scrub_block *sblock_good)
1543{
1544 int page_num;
1545 int ret = 0;
1546
1547 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1548 int ret_sub;
1549
1550 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1551 sblock_good,
1552 page_num, 1);
1553 if (ret_sub)
1554 ret = ret_sub;
1555 }
1556
1557 return ret;
1558}
1559
1560static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1561 struct scrub_block *sblock_good,
1562 int page_num, int force_write)
1563{
1564 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1565 struct scrub_page *page_good = sblock_good->pagev[page_num];
1566
1567 BUG_ON(page_bad->page == NULL);
1568 BUG_ON(page_good->page == NULL);
1569 if (force_write || sblock_bad->header_error ||
1570 sblock_bad->checksum_error || page_bad->io_error) {
1571 struct bio *bio;
1572 int ret;
1573
1574 if (!page_bad->dev->bdev) {
1575 btrfs_warn_rl(sblock_bad->sctx->dev_root->fs_info,
1576 "scrub_repair_page_from_good_copy(bdev == NULL) "
1577 "is unexpected");
1578 return -EIO;
1579 }
1580
1581 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1582 if (!bio)
1583 return -EIO;
1584 bio->bi_bdev = page_bad->dev->bdev;
1585 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1586
1587 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1588 if (PAGE_SIZE != ret) {
1589 bio_put(bio);
1590 return -EIO;
1591 }
1592
1593 if (btrfsic_submit_bio_wait(WRITE, bio)) {
1594 btrfs_dev_stat_inc_and_print(page_bad->dev,
1595 BTRFS_DEV_STAT_WRITE_ERRS);
1596 btrfs_dev_replace_stats_inc(
1597 &sblock_bad->sctx->dev_root->fs_info->
1598 dev_replace.num_write_errors);
1599 bio_put(bio);
1600 return -EIO;
1601 }
1602 bio_put(bio);
1603 }
1604
1605 return 0;
1606}
1607
1608static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1609{
1610 int page_num;
1611
1612
1613
1614
1615
1616 if (sblock->sparity)
1617 return;
1618
1619 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1620 int ret;
1621
1622 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1623 if (ret)
1624 btrfs_dev_replace_stats_inc(
1625 &sblock->sctx->dev_root->fs_info->dev_replace.
1626 num_write_errors);
1627 }
1628}
1629
1630static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1631 int page_num)
1632{
1633 struct scrub_page *spage = sblock->pagev[page_num];
1634
1635 BUG_ON(spage->page == NULL);
1636 if (spage->io_error) {
1637 void *mapped_buffer = kmap_atomic(spage->page);
1638
1639 memset(mapped_buffer, 0, PAGE_SIZE);
1640 flush_dcache_page(spage->page);
1641 kunmap_atomic(mapped_buffer);
1642 }
1643 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1644}
1645
1646static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1647 struct scrub_page *spage)
1648{
1649 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1650 struct scrub_bio *sbio;
1651 int ret;
1652
1653 mutex_lock(&wr_ctx->wr_lock);
1654again:
1655 if (!wr_ctx->wr_curr_bio) {
1656 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1657 GFP_KERNEL);
1658 if (!wr_ctx->wr_curr_bio) {
1659 mutex_unlock(&wr_ctx->wr_lock);
1660 return -ENOMEM;
1661 }
1662 wr_ctx->wr_curr_bio->sctx = sctx;
1663 wr_ctx->wr_curr_bio->page_count = 0;
1664 }
1665 sbio = wr_ctx->wr_curr_bio;
1666 if (sbio->page_count == 0) {
1667 struct bio *bio;
1668
1669 sbio->physical = spage->physical_for_dev_replace;
1670 sbio->logical = spage->logical;
1671 sbio->dev = wr_ctx->tgtdev;
1672 bio = sbio->bio;
1673 if (!bio) {
1674 bio = btrfs_io_bio_alloc(GFP_KERNEL,
1675 wr_ctx->pages_per_wr_bio);
1676 if (!bio) {
1677 mutex_unlock(&wr_ctx->wr_lock);
1678 return -ENOMEM;
1679 }
1680 sbio->bio = bio;
1681 }
1682
1683 bio->bi_private = sbio;
1684 bio->bi_end_io = scrub_wr_bio_end_io;
1685 bio->bi_bdev = sbio->dev->bdev;
1686 bio->bi_iter.bi_sector = sbio->physical >> 9;
1687 sbio->err = 0;
1688 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1689 spage->physical_for_dev_replace ||
1690 sbio->logical + sbio->page_count * PAGE_SIZE !=
1691 spage->logical) {
1692 scrub_wr_submit(sctx);
1693 goto again;
1694 }
1695
1696 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1697 if (ret != PAGE_SIZE) {
1698 if (sbio->page_count < 1) {
1699 bio_put(sbio->bio);
1700 sbio->bio = NULL;
1701 mutex_unlock(&wr_ctx->wr_lock);
1702 return -EIO;
1703 }
1704 scrub_wr_submit(sctx);
1705 goto again;
1706 }
1707
1708 sbio->pagev[sbio->page_count] = spage;
1709 scrub_page_get(spage);
1710 sbio->page_count++;
1711 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1712 scrub_wr_submit(sctx);
1713 mutex_unlock(&wr_ctx->wr_lock);
1714
1715 return 0;
1716}
1717
1718static void scrub_wr_submit(struct scrub_ctx *sctx)
1719{
1720 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1721 struct scrub_bio *sbio;
1722
1723 if (!wr_ctx->wr_curr_bio)
1724 return;
1725
1726 sbio = wr_ctx->wr_curr_bio;
1727 wr_ctx->wr_curr_bio = NULL;
1728 WARN_ON(!sbio->bio->bi_bdev);
1729 scrub_pending_bio_inc(sctx);
1730
1731
1732
1733
1734 btrfsic_submit_bio(WRITE, sbio->bio);
1735}
1736
1737static void scrub_wr_bio_end_io(struct bio *bio)
1738{
1739 struct scrub_bio *sbio = bio->bi_private;
1740 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1741
1742 sbio->err = bio->bi_error;
1743 sbio->bio = bio;
1744
1745 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1746 scrub_wr_bio_end_io_worker, NULL, NULL);
1747 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1748}
1749
1750static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1751{
1752 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1753 struct scrub_ctx *sctx = sbio->sctx;
1754 int i;
1755
1756 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1757 if (sbio->err) {
1758 struct btrfs_dev_replace *dev_replace =
1759 &sbio->sctx->dev_root->fs_info->dev_replace;
1760
1761 for (i = 0; i < sbio->page_count; i++) {
1762 struct scrub_page *spage = sbio->pagev[i];
1763
1764 spage->io_error = 1;
1765 btrfs_dev_replace_stats_inc(&dev_replace->
1766 num_write_errors);
1767 }
1768 }
1769
1770 for (i = 0; i < sbio->page_count; i++)
1771 scrub_page_put(sbio->pagev[i]);
1772
1773 bio_put(sbio->bio);
1774 kfree(sbio);
1775 scrub_pending_bio_dec(sctx);
1776}
1777
1778static int scrub_checksum(struct scrub_block *sblock)
1779{
1780 u64 flags;
1781 int ret;
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791 sblock->header_error = 0;
1792 sblock->generation_error = 0;
1793 sblock->checksum_error = 0;
1794
1795 WARN_ON(sblock->page_count < 1);
1796 flags = sblock->pagev[0]->flags;
1797 ret = 0;
1798 if (flags & BTRFS_EXTENT_FLAG_DATA)
1799 ret = scrub_checksum_data(sblock);
1800 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1801 ret = scrub_checksum_tree_block(sblock);
1802 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1803 (void)scrub_checksum_super(sblock);
1804 else
1805 WARN_ON(1);
1806 if (ret)
1807 scrub_handle_errored_block(sblock);
1808
1809 return ret;
1810}
1811
1812static int scrub_checksum_data(struct scrub_block *sblock)
1813{
1814 struct scrub_ctx *sctx = sblock->sctx;
1815 u8 csum[BTRFS_CSUM_SIZE];
1816 u8 *on_disk_csum;
1817 struct page *page;
1818 void *buffer;
1819 u32 crc = ~(u32)0;
1820 u64 len;
1821 int index;
1822
1823 BUG_ON(sblock->page_count < 1);
1824 if (!sblock->pagev[0]->have_csum)
1825 return 0;
1826
1827 on_disk_csum = sblock->pagev[0]->csum;
1828 page = sblock->pagev[0]->page;
1829 buffer = kmap_atomic(page);
1830
1831 len = sctx->sectorsize;
1832 index = 0;
1833 for (;;) {
1834 u64 l = min_t(u64, len, PAGE_SIZE);
1835
1836 crc = btrfs_csum_data(buffer, crc, l);
1837 kunmap_atomic(buffer);
1838 len -= l;
1839 if (len == 0)
1840 break;
1841 index++;
1842 BUG_ON(index >= sblock->page_count);
1843 BUG_ON(!sblock->pagev[index]->page);
1844 page = sblock->pagev[index]->page;
1845 buffer = kmap_atomic(page);
1846 }
1847
1848 btrfs_csum_final(crc, csum);
1849 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1850 sblock->checksum_error = 1;
1851
1852 return sblock->checksum_error;
1853}
1854
1855static int scrub_checksum_tree_block(struct scrub_block *sblock)
1856{
1857 struct scrub_ctx *sctx = sblock->sctx;
1858 struct btrfs_header *h;
1859 struct btrfs_root *root = sctx->dev_root;
1860 struct btrfs_fs_info *fs_info = root->fs_info;
1861 u8 calculated_csum[BTRFS_CSUM_SIZE];
1862 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1863 struct page *page;
1864 void *mapped_buffer;
1865 u64 mapped_size;
1866 void *p;
1867 u32 crc = ~(u32)0;
1868 u64 len;
1869 int index;
1870
1871 BUG_ON(sblock->page_count < 1);
1872 page = sblock->pagev[0]->page;
1873 mapped_buffer = kmap_atomic(page);
1874 h = (struct btrfs_header *)mapped_buffer;
1875 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1876
1877
1878
1879
1880
1881
1882 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1883 sblock->header_error = 1;
1884
1885 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h)) {
1886 sblock->header_error = 1;
1887 sblock->generation_error = 1;
1888 }
1889
1890 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1891 sblock->header_error = 1;
1892
1893 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1894 BTRFS_UUID_SIZE))
1895 sblock->header_error = 1;
1896
1897 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1898 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1899 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1900 index = 0;
1901 for (;;) {
1902 u64 l = min_t(u64, len, mapped_size);
1903
1904 crc = btrfs_csum_data(p, crc, l);
1905 kunmap_atomic(mapped_buffer);
1906 len -= l;
1907 if (len == 0)
1908 break;
1909 index++;
1910 BUG_ON(index >= sblock->page_count);
1911 BUG_ON(!sblock->pagev[index]->page);
1912 page = sblock->pagev[index]->page;
1913 mapped_buffer = kmap_atomic(page);
1914 mapped_size = PAGE_SIZE;
1915 p = mapped_buffer;
1916 }
1917
1918 btrfs_csum_final(crc, calculated_csum);
1919 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1920 sblock->checksum_error = 1;
1921
1922 return sblock->header_error || sblock->checksum_error;
1923}
1924
1925static int scrub_checksum_super(struct scrub_block *sblock)
1926{
1927 struct btrfs_super_block *s;
1928 struct scrub_ctx *sctx = sblock->sctx;
1929 u8 calculated_csum[BTRFS_CSUM_SIZE];
1930 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1931 struct page *page;
1932 void *mapped_buffer;
1933 u64 mapped_size;
1934 void *p;
1935 u32 crc = ~(u32)0;
1936 int fail_gen = 0;
1937 int fail_cor = 0;
1938 u64 len;
1939 int index;
1940
1941 BUG_ON(sblock->page_count < 1);
1942 page = sblock->pagev[0]->page;
1943 mapped_buffer = kmap_atomic(page);
1944 s = (struct btrfs_super_block *)mapped_buffer;
1945 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1946
1947 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1948 ++fail_cor;
1949
1950 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1951 ++fail_gen;
1952
1953 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
1954 ++fail_cor;
1955
1956 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1957 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1958 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1959 index = 0;
1960 for (;;) {
1961 u64 l = min_t(u64, len, mapped_size);
1962
1963 crc = btrfs_csum_data(p, crc, l);
1964 kunmap_atomic(mapped_buffer);
1965 len -= l;
1966 if (len == 0)
1967 break;
1968 index++;
1969 BUG_ON(index >= sblock->page_count);
1970 BUG_ON(!sblock->pagev[index]->page);
1971 page = sblock->pagev[index]->page;
1972 mapped_buffer = kmap_atomic(page);
1973 mapped_size = PAGE_SIZE;
1974 p = mapped_buffer;
1975 }
1976
1977 btrfs_csum_final(crc, calculated_csum);
1978 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1979 ++fail_cor;
1980
1981 if (fail_cor + fail_gen) {
1982
1983
1984
1985
1986
1987 spin_lock(&sctx->stat_lock);
1988 ++sctx->stat.super_errors;
1989 spin_unlock(&sctx->stat_lock);
1990 if (fail_cor)
1991 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1992 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1993 else
1994 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1995 BTRFS_DEV_STAT_GENERATION_ERRS);
1996 }
1997
1998 return fail_cor + fail_gen;
1999}
2000
2001static void scrub_block_get(struct scrub_block *sblock)
2002{
2003 atomic_inc(&sblock->refs);
2004}
2005
2006static void scrub_block_put(struct scrub_block *sblock)
2007{
2008 if (atomic_dec_and_test(&sblock->refs)) {
2009 int i;
2010
2011 if (sblock->sparity)
2012 scrub_parity_put(sblock->sparity);
2013
2014 for (i = 0; i < sblock->page_count; i++)
2015 scrub_page_put(sblock->pagev[i]);
2016 kfree(sblock);
2017 }
2018}
2019
2020static void scrub_page_get(struct scrub_page *spage)
2021{
2022 atomic_inc(&spage->refs);
2023}
2024
2025static void scrub_page_put(struct scrub_page *spage)
2026{
2027 if (atomic_dec_and_test(&spage->refs)) {
2028 if (spage->page)
2029 __free_page(spage->page);
2030 kfree(spage);
2031 }
2032}
2033
2034static void scrub_submit(struct scrub_ctx *sctx)
2035{
2036 struct scrub_bio *sbio;
2037
2038 if (sctx->curr == -1)
2039 return;
2040
2041 sbio = sctx->bios[sctx->curr];
2042 sctx->curr = -1;
2043 scrub_pending_bio_inc(sctx);
2044 btrfsic_submit_bio(READ, sbio->bio);
2045}
2046
2047static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2048 struct scrub_page *spage)
2049{
2050 struct scrub_block *sblock = spage->sblock;
2051 struct scrub_bio *sbio;
2052 int ret;
2053
2054again:
2055
2056
2057
2058 while (sctx->curr == -1) {
2059 spin_lock(&sctx->list_lock);
2060 sctx->curr = sctx->first_free;
2061 if (sctx->curr != -1) {
2062 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2063 sctx->bios[sctx->curr]->next_free = -1;
2064 sctx->bios[sctx->curr]->page_count = 0;
2065 spin_unlock(&sctx->list_lock);
2066 } else {
2067 spin_unlock(&sctx->list_lock);
2068 wait_event(sctx->list_wait, sctx->first_free != -1);
2069 }
2070 }
2071 sbio = sctx->bios[sctx->curr];
2072 if (sbio->page_count == 0) {
2073 struct bio *bio;
2074
2075 sbio->physical = spage->physical;
2076 sbio->logical = spage->logical;
2077 sbio->dev = spage->dev;
2078 bio = sbio->bio;
2079 if (!bio) {
2080 bio = btrfs_io_bio_alloc(GFP_KERNEL,
2081 sctx->pages_per_rd_bio);
2082 if (!bio)
2083 return -ENOMEM;
2084 sbio->bio = bio;
2085 }
2086
2087 bio->bi_private = sbio;
2088 bio->bi_end_io = scrub_bio_end_io;
2089 bio->bi_bdev = sbio->dev->bdev;
2090 bio->bi_iter.bi_sector = sbio->physical >> 9;
2091 sbio->err = 0;
2092 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2093 spage->physical ||
2094 sbio->logical + sbio->page_count * PAGE_SIZE !=
2095 spage->logical ||
2096 sbio->dev != spage->dev) {
2097 scrub_submit(sctx);
2098 goto again;
2099 }
2100
2101 sbio->pagev[sbio->page_count] = spage;
2102 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2103 if (ret != PAGE_SIZE) {
2104 if (sbio->page_count < 1) {
2105 bio_put(sbio->bio);
2106 sbio->bio = NULL;
2107 return -EIO;
2108 }
2109 scrub_submit(sctx);
2110 goto again;
2111 }
2112
2113 scrub_block_get(sblock);
2114 atomic_inc(&sblock->outstanding_pages);
2115 sbio->page_count++;
2116 if (sbio->page_count == sctx->pages_per_rd_bio)
2117 scrub_submit(sctx);
2118
2119 return 0;
2120}
2121
2122static void scrub_missing_raid56_end_io(struct bio *bio)
2123{
2124 struct scrub_block *sblock = bio->bi_private;
2125 struct btrfs_fs_info *fs_info = sblock->sctx->dev_root->fs_info;
2126
2127 if (bio->bi_error)
2128 sblock->no_io_error_seen = 0;
2129
2130 btrfs_queue_work(fs_info->scrub_workers, &sblock->work);
2131}
2132
2133static void scrub_missing_raid56_worker(struct btrfs_work *work)
2134{
2135 struct scrub_block *sblock = container_of(work, struct scrub_block, work);
2136 struct scrub_ctx *sctx = sblock->sctx;
2137 u64 logical;
2138 struct btrfs_device *dev;
2139
2140 logical = sblock->pagev[0]->logical;
2141 dev = sblock->pagev[0]->dev;
2142
2143 if (sblock->no_io_error_seen)
2144 scrub_recheck_block_checksum(sblock);
2145
2146 if (!sblock->no_io_error_seen) {
2147 spin_lock(&sctx->stat_lock);
2148 sctx->stat.read_errors++;
2149 spin_unlock(&sctx->stat_lock);
2150 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
2151 "IO error rebuilding logical %llu for dev %s",
2152 logical, rcu_str_deref(dev->name));
2153 } else if (sblock->header_error || sblock->checksum_error) {
2154 spin_lock(&sctx->stat_lock);
2155 sctx->stat.uncorrectable_errors++;
2156 spin_unlock(&sctx->stat_lock);
2157 btrfs_err_rl_in_rcu(sctx->dev_root->fs_info,
2158 "failed to rebuild valid logical %llu for dev %s",
2159 logical, rcu_str_deref(dev->name));
2160 } else {
2161 scrub_write_block_to_dev_replace(sblock);
2162 }
2163
2164 scrub_block_put(sblock);
2165
2166 if (sctx->is_dev_replace &&
2167 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2168 mutex_lock(&sctx->wr_ctx.wr_lock);
2169 scrub_wr_submit(sctx);
2170 mutex_unlock(&sctx->wr_ctx.wr_lock);
2171 }
2172
2173 scrub_pending_bio_dec(sctx);
2174}
2175
2176static void scrub_missing_raid56_pages(struct scrub_block *sblock)
2177{
2178 struct scrub_ctx *sctx = sblock->sctx;
2179 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2180 u64 length = sblock->page_count * PAGE_SIZE;
2181 u64 logical = sblock->pagev[0]->logical;
2182 struct btrfs_bio *bbio;
2183 struct bio *bio;
2184 struct btrfs_raid_bio *rbio;
2185 int ret;
2186 int i;
2187
2188 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &length,
2189 &bbio, 0, 1);
2190 if (ret || !bbio || !bbio->raid_map)
2191 goto bbio_out;
2192
2193 if (WARN_ON(!sctx->is_dev_replace ||
2194 !(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
2195
2196
2197
2198
2199
2200
2201 goto bbio_out;
2202 }
2203
2204 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2205 if (!bio)
2206 goto bbio_out;
2207
2208 bio->bi_iter.bi_sector = logical >> 9;
2209 bio->bi_private = sblock;
2210 bio->bi_end_io = scrub_missing_raid56_end_io;
2211
2212 rbio = raid56_alloc_missing_rbio(sctx->dev_root, bio, bbio, length);
2213 if (!rbio)
2214 goto rbio_out;
2215
2216 for (i = 0; i < sblock->page_count; i++) {
2217 struct scrub_page *spage = sblock->pagev[i];
2218
2219 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2220 }
2221
2222 btrfs_init_work(&sblock->work, btrfs_scrub_helper,
2223 scrub_missing_raid56_worker, NULL, NULL);
2224 scrub_block_get(sblock);
2225 scrub_pending_bio_inc(sctx);
2226 raid56_submit_missing_rbio(rbio);
2227 return;
2228
2229rbio_out:
2230 bio_put(bio);
2231bbio_out:
2232 btrfs_put_bbio(bbio);
2233 spin_lock(&sctx->stat_lock);
2234 sctx->stat.malloc_errors++;
2235 spin_unlock(&sctx->stat_lock);
2236}
2237
2238static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2239 u64 physical, struct btrfs_device *dev, u64 flags,
2240 u64 gen, int mirror_num, u8 *csum, int force,
2241 u64 physical_for_dev_replace)
2242{
2243 struct scrub_block *sblock;
2244 int index;
2245
2246 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2247 if (!sblock) {
2248 spin_lock(&sctx->stat_lock);
2249 sctx->stat.malloc_errors++;
2250 spin_unlock(&sctx->stat_lock);
2251 return -ENOMEM;
2252 }
2253
2254
2255
2256 atomic_set(&sblock->refs, 1);
2257 sblock->sctx = sctx;
2258 sblock->no_io_error_seen = 1;
2259
2260 for (index = 0; len > 0; index++) {
2261 struct scrub_page *spage;
2262 u64 l = min_t(u64, len, PAGE_SIZE);
2263
2264 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2265 if (!spage) {
2266leave_nomem:
2267 spin_lock(&sctx->stat_lock);
2268 sctx->stat.malloc_errors++;
2269 spin_unlock(&sctx->stat_lock);
2270 scrub_block_put(sblock);
2271 return -ENOMEM;
2272 }
2273 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2274 scrub_page_get(spage);
2275 sblock->pagev[index] = spage;
2276 spage->sblock = sblock;
2277 spage->dev = dev;
2278 spage->flags = flags;
2279 spage->generation = gen;
2280 spage->logical = logical;
2281 spage->physical = physical;
2282 spage->physical_for_dev_replace = physical_for_dev_replace;
2283 spage->mirror_num = mirror_num;
2284 if (csum) {
2285 spage->have_csum = 1;
2286 memcpy(spage->csum, csum, sctx->csum_size);
2287 } else {
2288 spage->have_csum = 0;
2289 }
2290 sblock->page_count++;
2291 spage->page = alloc_page(GFP_KERNEL);
2292 if (!spage->page)
2293 goto leave_nomem;
2294 len -= l;
2295 logical += l;
2296 physical += l;
2297 physical_for_dev_replace += l;
2298 }
2299
2300 WARN_ON(sblock->page_count == 0);
2301 if (dev->missing) {
2302
2303
2304
2305
2306 scrub_missing_raid56_pages(sblock);
2307 } else {
2308 for (index = 0; index < sblock->page_count; index++) {
2309 struct scrub_page *spage = sblock->pagev[index];
2310 int ret;
2311
2312 ret = scrub_add_page_to_rd_bio(sctx, spage);
2313 if (ret) {
2314 scrub_block_put(sblock);
2315 return ret;
2316 }
2317 }
2318
2319 if (force)
2320 scrub_submit(sctx);
2321 }
2322
2323
2324 scrub_block_put(sblock);
2325 return 0;
2326}
2327
2328static void scrub_bio_end_io(struct bio *bio)
2329{
2330 struct scrub_bio *sbio = bio->bi_private;
2331 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2332
2333 sbio->err = bio->bi_error;
2334 sbio->bio = bio;
2335
2336 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2337}
2338
2339static void scrub_bio_end_io_worker(struct btrfs_work *work)
2340{
2341 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2342 struct scrub_ctx *sctx = sbio->sctx;
2343 int i;
2344
2345 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2346 if (sbio->err) {
2347 for (i = 0; i < sbio->page_count; i++) {
2348 struct scrub_page *spage = sbio->pagev[i];
2349
2350 spage->io_error = 1;
2351 spage->sblock->no_io_error_seen = 0;
2352 }
2353 }
2354
2355
2356 for (i = 0; i < sbio->page_count; i++) {
2357 struct scrub_page *spage = sbio->pagev[i];
2358 struct scrub_block *sblock = spage->sblock;
2359
2360 if (atomic_dec_and_test(&sblock->outstanding_pages))
2361 scrub_block_complete(sblock);
2362 scrub_block_put(sblock);
2363 }
2364
2365 bio_put(sbio->bio);
2366 sbio->bio = NULL;
2367 spin_lock(&sctx->list_lock);
2368 sbio->next_free = sctx->first_free;
2369 sctx->first_free = sbio->index;
2370 spin_unlock(&sctx->list_lock);
2371
2372 if (sctx->is_dev_replace &&
2373 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2374 mutex_lock(&sctx->wr_ctx.wr_lock);
2375 scrub_wr_submit(sctx);
2376 mutex_unlock(&sctx->wr_ctx.wr_lock);
2377 }
2378
2379 scrub_pending_bio_dec(sctx);
2380}
2381
2382static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2383 unsigned long *bitmap,
2384 u64 start, u64 len)
2385{
2386 u32 offset;
2387 int nsectors;
2388 int sectorsize = sparity->sctx->dev_root->sectorsize;
2389
2390 if (len >= sparity->stripe_len) {
2391 bitmap_set(bitmap, 0, sparity->nsectors);
2392 return;
2393 }
2394
2395 start -= sparity->logic_start;
2396 start = div_u64_rem(start, sparity->stripe_len, &offset);
2397 offset /= sectorsize;
2398 nsectors = (int)len / sectorsize;
2399
2400 if (offset + nsectors <= sparity->nsectors) {
2401 bitmap_set(bitmap, offset, nsectors);
2402 return;
2403 }
2404
2405 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2406 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2407}
2408
2409static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2410 u64 start, u64 len)
2411{
2412 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2413}
2414
2415static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2416 u64 start, u64 len)
2417{
2418 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2419}
2420
2421static void scrub_block_complete(struct scrub_block *sblock)
2422{
2423 int corrupted = 0;
2424
2425 if (!sblock->no_io_error_seen) {
2426 corrupted = 1;
2427 scrub_handle_errored_block(sblock);
2428 } else {
2429
2430
2431
2432
2433
2434 corrupted = scrub_checksum(sblock);
2435 if (!corrupted && sblock->sctx->is_dev_replace)
2436 scrub_write_block_to_dev_replace(sblock);
2437 }
2438
2439 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2440 u64 start = sblock->pagev[0]->logical;
2441 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2442 PAGE_SIZE;
2443
2444 scrub_parity_mark_sectors_error(sblock->sparity,
2445 start, end - start);
2446 }
2447}
2448
2449static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
2450{
2451 struct btrfs_ordered_sum *sum = NULL;
2452 unsigned long index;
2453 unsigned long num_sectors;
2454
2455 while (!list_empty(&sctx->csum_list)) {
2456 sum = list_first_entry(&sctx->csum_list,
2457 struct btrfs_ordered_sum, list);
2458 if (sum->bytenr > logical)
2459 return 0;
2460 if (sum->bytenr + sum->len > logical)
2461 break;
2462
2463 ++sctx->stat.csum_discards;
2464 list_del(&sum->list);
2465 kfree(sum);
2466 sum = NULL;
2467 }
2468 if (!sum)
2469 return 0;
2470
2471 index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
2472 num_sectors = sum->len / sctx->sectorsize;
2473 memcpy(csum, sum->sums + index, sctx->csum_size);
2474 if (index == num_sectors - 1) {
2475 list_del(&sum->list);
2476 kfree(sum);
2477 }
2478 return 1;
2479}
2480
2481
2482static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2483 u64 physical, struct btrfs_device *dev, u64 flags,
2484 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2485{
2486 int ret;
2487 u8 csum[BTRFS_CSUM_SIZE];
2488 u32 blocksize;
2489
2490 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2491 blocksize = sctx->sectorsize;
2492 spin_lock(&sctx->stat_lock);
2493 sctx->stat.data_extents_scrubbed++;
2494 sctx->stat.data_bytes_scrubbed += len;
2495 spin_unlock(&sctx->stat_lock);
2496 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2497 blocksize = sctx->nodesize;
2498 spin_lock(&sctx->stat_lock);
2499 sctx->stat.tree_extents_scrubbed++;
2500 sctx->stat.tree_bytes_scrubbed += len;
2501 spin_unlock(&sctx->stat_lock);
2502 } else {
2503 blocksize = sctx->sectorsize;
2504 WARN_ON(1);
2505 }
2506
2507 while (len) {
2508 u64 l = min_t(u64, len, blocksize);
2509 int have_csum = 0;
2510
2511 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2512
2513 have_csum = scrub_find_csum(sctx, logical, csum);
2514 if (have_csum == 0)
2515 ++sctx->stat.no_csum;
2516 if (sctx->is_dev_replace && !have_csum) {
2517 ret = copy_nocow_pages(sctx, logical, l,
2518 mirror_num,
2519 physical_for_dev_replace);
2520 goto behind_scrub_pages;
2521 }
2522 }
2523 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2524 mirror_num, have_csum ? csum : NULL, 0,
2525 physical_for_dev_replace);
2526behind_scrub_pages:
2527 if (ret)
2528 return ret;
2529 len -= l;
2530 logical += l;
2531 physical += l;
2532 physical_for_dev_replace += l;
2533 }
2534 return 0;
2535}
2536
2537static int scrub_pages_for_parity(struct scrub_parity *sparity,
2538 u64 logical, u64 len,
2539 u64 physical, struct btrfs_device *dev,
2540 u64 flags, u64 gen, int mirror_num, u8 *csum)
2541{
2542 struct scrub_ctx *sctx = sparity->sctx;
2543 struct scrub_block *sblock;
2544 int index;
2545
2546 sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
2547 if (!sblock) {
2548 spin_lock(&sctx->stat_lock);
2549 sctx->stat.malloc_errors++;
2550 spin_unlock(&sctx->stat_lock);
2551 return -ENOMEM;
2552 }
2553
2554
2555
2556 atomic_set(&sblock->refs, 1);
2557 sblock->sctx = sctx;
2558 sblock->no_io_error_seen = 1;
2559 sblock->sparity = sparity;
2560 scrub_parity_get(sparity);
2561
2562 for (index = 0; len > 0; index++) {
2563 struct scrub_page *spage;
2564 u64 l = min_t(u64, len, PAGE_SIZE);
2565
2566 spage = kzalloc(sizeof(*spage), GFP_KERNEL);
2567 if (!spage) {
2568leave_nomem:
2569 spin_lock(&sctx->stat_lock);
2570 sctx->stat.malloc_errors++;
2571 spin_unlock(&sctx->stat_lock);
2572 scrub_block_put(sblock);
2573 return -ENOMEM;
2574 }
2575 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2576
2577 scrub_page_get(spage);
2578 sblock->pagev[index] = spage;
2579
2580 scrub_page_get(spage);
2581 list_add_tail(&spage->list, &sparity->spages);
2582 spage->sblock = sblock;
2583 spage->dev = dev;
2584 spage->flags = flags;
2585 spage->generation = gen;
2586 spage->logical = logical;
2587 spage->physical = physical;
2588 spage->mirror_num = mirror_num;
2589 if (csum) {
2590 spage->have_csum = 1;
2591 memcpy(spage->csum, csum, sctx->csum_size);
2592 } else {
2593 spage->have_csum = 0;
2594 }
2595 sblock->page_count++;
2596 spage->page = alloc_page(GFP_KERNEL);
2597 if (!spage->page)
2598 goto leave_nomem;
2599 len -= l;
2600 logical += l;
2601 physical += l;
2602 }
2603
2604 WARN_ON(sblock->page_count == 0);
2605 for (index = 0; index < sblock->page_count; index++) {
2606 struct scrub_page *spage = sblock->pagev[index];
2607 int ret;
2608
2609 ret = scrub_add_page_to_rd_bio(sctx, spage);
2610 if (ret) {
2611 scrub_block_put(sblock);
2612 return ret;
2613 }
2614 }
2615
2616
2617 scrub_block_put(sblock);
2618 return 0;
2619}
2620
2621static int scrub_extent_for_parity(struct scrub_parity *sparity,
2622 u64 logical, u64 len,
2623 u64 physical, struct btrfs_device *dev,
2624 u64 flags, u64 gen, int mirror_num)
2625{
2626 struct scrub_ctx *sctx = sparity->sctx;
2627 int ret;
2628 u8 csum[BTRFS_CSUM_SIZE];
2629 u32 blocksize;
2630
2631 if (dev->missing) {
2632 scrub_parity_mark_sectors_error(sparity, logical, len);
2633 return 0;
2634 }
2635
2636 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2637 blocksize = sctx->sectorsize;
2638 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2639 blocksize = sctx->nodesize;
2640 } else {
2641 blocksize = sctx->sectorsize;
2642 WARN_ON(1);
2643 }
2644
2645 while (len) {
2646 u64 l = min_t(u64, len, blocksize);
2647 int have_csum = 0;
2648
2649 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2650
2651 have_csum = scrub_find_csum(sctx, logical, csum);
2652 if (have_csum == 0)
2653 goto skip;
2654 }
2655 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2656 flags, gen, mirror_num,
2657 have_csum ? csum : NULL);
2658 if (ret)
2659 return ret;
2660skip:
2661 len -= l;
2662 logical += l;
2663 physical += l;
2664 }
2665 return 0;
2666}
2667
2668
2669
2670
2671
2672
2673
2674
2675static int get_raid56_logic_offset(u64 physical, int num,
2676 struct map_lookup *map, u64 *offset,
2677 u64 *stripe_start)
2678{
2679 int i;
2680 int j = 0;
2681 u64 stripe_nr;
2682 u64 last_offset;
2683 u32 stripe_index;
2684 u32 rot;
2685
2686 last_offset = (physical - map->stripes[num].physical) *
2687 nr_data_stripes(map);
2688 if (stripe_start)
2689 *stripe_start = last_offset;
2690
2691 *offset = last_offset;
2692 for (i = 0; i < nr_data_stripes(map); i++) {
2693 *offset = last_offset + i * map->stripe_len;
2694
2695 stripe_nr = div_u64(*offset, map->stripe_len);
2696 stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
2697
2698
2699 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
2700
2701 rot += i;
2702 stripe_index = rot % map->num_stripes;
2703 if (stripe_index == num)
2704 return 0;
2705 if (stripe_index < num)
2706 j++;
2707 }
2708 *offset = last_offset + j * map->stripe_len;
2709 return 1;
2710}
2711
2712static void scrub_free_parity(struct scrub_parity *sparity)
2713{
2714 struct scrub_ctx *sctx = sparity->sctx;
2715 struct scrub_page *curr, *next;
2716 int nbits;
2717
2718 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2719 if (nbits) {
2720 spin_lock(&sctx->stat_lock);
2721 sctx->stat.read_errors += nbits;
2722 sctx->stat.uncorrectable_errors += nbits;
2723 spin_unlock(&sctx->stat_lock);
2724 }
2725
2726 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2727 list_del_init(&curr->list);
2728 scrub_page_put(curr);
2729 }
2730
2731 kfree(sparity);
2732}
2733
2734static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
2735{
2736 struct scrub_parity *sparity = container_of(work, struct scrub_parity,
2737 work);
2738 struct scrub_ctx *sctx = sparity->sctx;
2739
2740 scrub_free_parity(sparity);
2741 scrub_pending_bio_dec(sctx);
2742}
2743
2744static void scrub_parity_bio_endio(struct bio *bio)
2745{
2746 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2747
2748 if (bio->bi_error)
2749 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2750 sparity->nsectors);
2751
2752 bio_put(bio);
2753
2754 btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
2755 scrub_parity_bio_endio_worker, NULL, NULL);
2756 btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
2757 &sparity->work);
2758}
2759
2760static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2761{
2762 struct scrub_ctx *sctx = sparity->sctx;
2763 struct bio *bio;
2764 struct btrfs_raid_bio *rbio;
2765 struct scrub_page *spage;
2766 struct btrfs_bio *bbio = NULL;
2767 u64 length;
2768 int ret;
2769
2770 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2771 sparity->nsectors))
2772 goto out;
2773
2774 length = sparity->logic_end - sparity->logic_start;
2775 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2776 sparity->logic_start,
2777 &length, &bbio, 0, 1);
2778 if (ret || !bbio || !bbio->raid_map)
2779 goto bbio_out;
2780
2781 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2782 if (!bio)
2783 goto bbio_out;
2784
2785 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2786 bio->bi_private = sparity;
2787 bio->bi_end_io = scrub_parity_bio_endio;
2788
2789 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2790 length, sparity->scrub_dev,
2791 sparity->dbitmap,
2792 sparity->nsectors);
2793 if (!rbio)
2794 goto rbio_out;
2795
2796 list_for_each_entry(spage, &sparity->spages, list)
2797 raid56_add_scrub_pages(rbio, spage->page, spage->logical);
2798
2799 scrub_pending_bio_inc(sctx);
2800 raid56_parity_submit_scrub_rbio(rbio);
2801 return;
2802
2803rbio_out:
2804 bio_put(bio);
2805bbio_out:
2806 btrfs_put_bbio(bbio);
2807 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2808 sparity->nsectors);
2809 spin_lock(&sctx->stat_lock);
2810 sctx->stat.malloc_errors++;
2811 spin_unlock(&sctx->stat_lock);
2812out:
2813 scrub_free_parity(sparity);
2814}
2815
2816static inline int scrub_calc_parity_bitmap_len(int nsectors)
2817{
2818 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
2819}
2820
2821static void scrub_parity_get(struct scrub_parity *sparity)
2822{
2823 atomic_inc(&sparity->refs);
2824}
2825
2826static void scrub_parity_put(struct scrub_parity *sparity)
2827{
2828 if (!atomic_dec_and_test(&sparity->refs))
2829 return;
2830
2831 scrub_parity_check_and_repair(sparity);
2832}
2833
2834static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2835 struct map_lookup *map,
2836 struct btrfs_device *sdev,
2837 struct btrfs_path *path,
2838 u64 logic_start,
2839 u64 logic_end)
2840{
2841 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2842 struct btrfs_root *root = fs_info->extent_root;
2843 struct btrfs_root *csum_root = fs_info->csum_root;
2844 struct btrfs_extent_item *extent;
2845 struct btrfs_bio *bbio = NULL;
2846 u64 flags;
2847 int ret;
2848 int slot;
2849 struct extent_buffer *l;
2850 struct btrfs_key key;
2851 u64 generation;
2852 u64 extent_logical;
2853 u64 extent_physical;
2854 u64 extent_len;
2855 u64 mapped_length;
2856 struct btrfs_device *extent_dev;
2857 struct scrub_parity *sparity;
2858 int nsectors;
2859 int bitmap_len;
2860 int extent_mirror_num;
2861 int stop_loop = 0;
2862
2863 nsectors = map->stripe_len / root->sectorsize;
2864 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2865 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2866 GFP_NOFS);
2867 if (!sparity) {
2868 spin_lock(&sctx->stat_lock);
2869 sctx->stat.malloc_errors++;
2870 spin_unlock(&sctx->stat_lock);
2871 return -ENOMEM;
2872 }
2873
2874 sparity->stripe_len = map->stripe_len;
2875 sparity->nsectors = nsectors;
2876 sparity->sctx = sctx;
2877 sparity->scrub_dev = sdev;
2878 sparity->logic_start = logic_start;
2879 sparity->logic_end = logic_end;
2880 atomic_set(&sparity->refs, 1);
2881 INIT_LIST_HEAD(&sparity->spages);
2882 sparity->dbitmap = sparity->bitmap;
2883 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2884
2885 ret = 0;
2886 while (logic_start < logic_end) {
2887 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2888 key.type = BTRFS_METADATA_ITEM_KEY;
2889 else
2890 key.type = BTRFS_EXTENT_ITEM_KEY;
2891 key.objectid = logic_start;
2892 key.offset = (u64)-1;
2893
2894 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2895 if (ret < 0)
2896 goto out;
2897
2898 if (ret > 0) {
2899 ret = btrfs_previous_extent_item(root, path, 0);
2900 if (ret < 0)
2901 goto out;
2902 if (ret > 0) {
2903 btrfs_release_path(path);
2904 ret = btrfs_search_slot(NULL, root, &key,
2905 path, 0, 0);
2906 if (ret < 0)
2907 goto out;
2908 }
2909 }
2910
2911 stop_loop = 0;
2912 while (1) {
2913 u64 bytes;
2914
2915 l = path->nodes[0];
2916 slot = path->slots[0];
2917 if (slot >= btrfs_header_nritems(l)) {
2918 ret = btrfs_next_leaf(root, path);
2919 if (ret == 0)
2920 continue;
2921 if (ret < 0)
2922 goto out;
2923
2924 stop_loop = 1;
2925 break;
2926 }
2927 btrfs_item_key_to_cpu(l, &key, slot);
2928
2929 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2930 key.type != BTRFS_METADATA_ITEM_KEY)
2931 goto next;
2932
2933 if (key.type == BTRFS_METADATA_ITEM_KEY)
2934 bytes = root->nodesize;
2935 else
2936 bytes = key.offset;
2937
2938 if (key.objectid + bytes <= logic_start)
2939 goto next;
2940
2941 if (key.objectid >= logic_end) {
2942 stop_loop = 1;
2943 break;
2944 }
2945
2946 while (key.objectid >= logic_start + map->stripe_len)
2947 logic_start += map->stripe_len;
2948
2949 extent = btrfs_item_ptr(l, slot,
2950 struct btrfs_extent_item);
2951 flags = btrfs_extent_flags(l, extent);
2952 generation = btrfs_extent_generation(l, extent);
2953
2954 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
2955 (key.objectid < logic_start ||
2956 key.objectid + bytes >
2957 logic_start + map->stripe_len)) {
2958 btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2959 key.objectid, logic_start);
2960 spin_lock(&sctx->stat_lock);
2961 sctx->stat.uncorrectable_errors++;
2962 spin_unlock(&sctx->stat_lock);
2963 goto next;
2964 }
2965again:
2966 extent_logical = key.objectid;
2967 extent_len = bytes;
2968
2969 if (extent_logical < logic_start) {
2970 extent_len -= logic_start - extent_logical;
2971 extent_logical = logic_start;
2972 }
2973
2974 if (extent_logical + extent_len >
2975 logic_start + map->stripe_len)
2976 extent_len = logic_start + map->stripe_len -
2977 extent_logical;
2978
2979 scrub_parity_mark_sectors_data(sparity, extent_logical,
2980 extent_len);
2981
2982 mapped_length = extent_len;
2983 ret = btrfs_map_block(fs_info, READ, extent_logical,
2984 &mapped_length, &bbio, 0);
2985 if (!ret) {
2986 if (!bbio || mapped_length < extent_len)
2987 ret = -EIO;
2988 }
2989 if (ret) {
2990 btrfs_put_bbio(bbio);
2991 goto out;
2992 }
2993 extent_physical = bbio->stripes[0].physical;
2994 extent_mirror_num = bbio->mirror_num;
2995 extent_dev = bbio->stripes[0].dev;
2996 btrfs_put_bbio(bbio);
2997
2998 ret = btrfs_lookup_csums_range(csum_root,
2999 extent_logical,
3000 extent_logical + extent_len - 1,
3001 &sctx->csum_list, 1);
3002 if (ret)
3003 goto out;
3004
3005 ret = scrub_extent_for_parity(sparity, extent_logical,
3006 extent_len,
3007 extent_physical,
3008 extent_dev, flags,
3009 generation,
3010 extent_mirror_num);
3011
3012 scrub_free_csums(sctx);
3013
3014 if (ret)
3015 goto out;
3016
3017 if (extent_logical + extent_len <
3018 key.objectid + bytes) {
3019 logic_start += map->stripe_len;
3020
3021 if (logic_start >= logic_end) {
3022 stop_loop = 1;
3023 break;
3024 }
3025
3026 if (logic_start < key.objectid + bytes) {
3027 cond_resched();
3028 goto again;
3029 }
3030 }
3031next:
3032 path->slots[0]++;
3033 }
3034
3035 btrfs_release_path(path);
3036
3037 if (stop_loop)
3038 break;
3039
3040 logic_start += map->stripe_len;
3041 }
3042out:
3043 if (ret < 0)
3044 scrub_parity_mark_sectors_error(sparity, logic_start,
3045 logic_end - logic_start);
3046 scrub_parity_put(sparity);
3047 scrub_submit(sctx);
3048 mutex_lock(&sctx->wr_ctx.wr_lock);
3049 scrub_wr_submit(sctx);
3050 mutex_unlock(&sctx->wr_ctx.wr_lock);
3051
3052 btrfs_release_path(path);
3053 return ret < 0 ? ret : 0;
3054}
3055
3056static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
3057 struct map_lookup *map,
3058 struct btrfs_device *scrub_dev,
3059 int num, u64 base, u64 length,
3060 int is_dev_replace)
3061{
3062 struct btrfs_path *path, *ppath;
3063 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3064 struct btrfs_root *root = fs_info->extent_root;
3065 struct btrfs_root *csum_root = fs_info->csum_root;
3066 struct btrfs_extent_item *extent;
3067 struct blk_plug plug;
3068 u64 flags;
3069 int ret;
3070 int slot;
3071 u64 nstripes;
3072 struct extent_buffer *l;
3073 struct btrfs_key key;
3074 u64 physical;
3075 u64 logical;
3076 u64 logic_end;
3077 u64 physical_end;
3078 u64 generation;
3079 int mirror_num;
3080 struct reada_control *reada1;
3081 struct reada_control *reada2;
3082 struct btrfs_key key_start;
3083 struct btrfs_key key_end;
3084 u64 increment = map->stripe_len;
3085 u64 offset;
3086 u64 extent_logical;
3087 u64 extent_physical;
3088 u64 extent_len;
3089 u64 stripe_logical;
3090 u64 stripe_end;
3091 struct btrfs_device *extent_dev;
3092 int extent_mirror_num;
3093 int stop_loop = 0;
3094
3095 physical = map->stripes[num].physical;
3096 offset = 0;
3097 nstripes = div_u64(length, map->stripe_len);
3098 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3099 offset = map->stripe_len * num;
3100 increment = map->stripe_len * map->num_stripes;
3101 mirror_num = 1;
3102 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3103 int factor = map->num_stripes / map->sub_stripes;
3104 offset = map->stripe_len * (num / map->sub_stripes);
3105 increment = map->stripe_len * factor;
3106 mirror_num = num % map->sub_stripes + 1;
3107 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3108 increment = map->stripe_len;
3109 mirror_num = num % map->num_stripes + 1;
3110 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3111 increment = map->stripe_len;
3112 mirror_num = num % map->num_stripes + 1;
3113 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3114 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3115 increment = map->stripe_len * nr_data_stripes(map);
3116 mirror_num = 1;
3117 } else {
3118 increment = map->stripe_len;
3119 mirror_num = 1;
3120 }
3121
3122 path = btrfs_alloc_path();
3123 if (!path)
3124 return -ENOMEM;
3125
3126 ppath = btrfs_alloc_path();
3127 if (!ppath) {
3128 btrfs_free_path(path);
3129 return -ENOMEM;
3130 }
3131
3132
3133
3134
3135
3136
3137 path->search_commit_root = 1;
3138 path->skip_locking = 1;
3139
3140 ppath->search_commit_root = 1;
3141 ppath->skip_locking = 1;
3142
3143
3144
3145
3146
3147 logical = base + offset;
3148 physical_end = physical + nstripes * map->stripe_len;
3149 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3150 get_raid56_logic_offset(physical_end, num,
3151 map, &logic_end, NULL);
3152 logic_end += base;
3153 } else {
3154 logic_end = logical + increment * nstripes;
3155 }
3156 wait_event(sctx->list_wait,
3157 atomic_read(&sctx->bios_in_flight) == 0);
3158 scrub_blocked_if_needed(fs_info);
3159
3160
3161 key_start.objectid = logical;
3162 key_start.type = BTRFS_EXTENT_ITEM_KEY;
3163 key_start.offset = (u64)0;
3164 key_end.objectid = logic_end;
3165 key_end.type = BTRFS_METADATA_ITEM_KEY;
3166 key_end.offset = (u64)-1;
3167 reada1 = btrfs_reada_add(root, &key_start, &key_end);
3168
3169 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3170 key_start.type = BTRFS_EXTENT_CSUM_KEY;
3171 key_start.offset = logical;
3172 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3173 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3174 key_end.offset = logic_end;
3175 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
3176
3177 if (!IS_ERR(reada1))
3178 btrfs_reada_wait(reada1);
3179 if (!IS_ERR(reada2))
3180 btrfs_reada_wait(reada2);
3181
3182
3183
3184
3185
3186
3187 blk_start_plug(&plug);
3188
3189
3190
3191
3192 ret = 0;
3193 while (physical < physical_end) {
3194
3195
3196
3197 if (atomic_read(&fs_info->scrub_cancel_req) ||
3198 atomic_read(&sctx->cancel_req)) {
3199 ret = -ECANCELED;
3200 goto out;
3201 }
3202
3203
3204
3205 if (atomic_read(&fs_info->scrub_pause_req)) {
3206
3207 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
3208 scrub_submit(sctx);
3209 mutex_lock(&sctx->wr_ctx.wr_lock);
3210 scrub_wr_submit(sctx);
3211 mutex_unlock(&sctx->wr_ctx.wr_lock);
3212 wait_event(sctx->list_wait,
3213 atomic_read(&sctx->bios_in_flight) == 0);
3214 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
3215 scrub_blocked_if_needed(fs_info);
3216 }
3217
3218 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3219 ret = get_raid56_logic_offset(physical, num, map,
3220 &logical,
3221 &stripe_logical);
3222 logical += base;
3223 if (ret) {
3224
3225 stripe_logical += base;
3226 stripe_end = stripe_logical + increment;
3227 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3228 ppath, stripe_logical,
3229 stripe_end);
3230 if (ret)
3231 goto out;
3232 goto skip;
3233 }
3234 }
3235
3236 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3237 key.type = BTRFS_METADATA_ITEM_KEY;
3238 else
3239 key.type = BTRFS_EXTENT_ITEM_KEY;
3240 key.objectid = logical;
3241 key.offset = (u64)-1;
3242
3243 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3244 if (ret < 0)
3245 goto out;
3246
3247 if (ret > 0) {
3248 ret = btrfs_previous_extent_item(root, path, 0);
3249 if (ret < 0)
3250 goto out;
3251 if (ret > 0) {
3252
3253
3254 btrfs_release_path(path);
3255 ret = btrfs_search_slot(NULL, root, &key,
3256 path, 0, 0);
3257 if (ret < 0)
3258 goto out;
3259 }
3260 }
3261
3262 stop_loop = 0;
3263 while (1) {
3264 u64 bytes;
3265
3266 l = path->nodes[0];
3267 slot = path->slots[0];
3268 if (slot >= btrfs_header_nritems(l)) {
3269 ret = btrfs_next_leaf(root, path);
3270 if (ret == 0)
3271 continue;
3272 if (ret < 0)
3273 goto out;
3274
3275 stop_loop = 1;
3276 break;
3277 }
3278 btrfs_item_key_to_cpu(l, &key, slot);
3279
3280 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3281 key.type != BTRFS_METADATA_ITEM_KEY)
3282 goto next;
3283
3284 if (key.type == BTRFS_METADATA_ITEM_KEY)
3285 bytes = root->nodesize;
3286 else
3287 bytes = key.offset;
3288
3289 if (key.objectid + bytes <= logical)
3290 goto next;
3291
3292 if (key.objectid >= logical + map->stripe_len) {
3293
3294 if (key.objectid >= logic_end)
3295 stop_loop = 1;
3296 break;
3297 }
3298
3299 extent = btrfs_item_ptr(l, slot,
3300 struct btrfs_extent_item);
3301 flags = btrfs_extent_flags(l, extent);
3302 generation = btrfs_extent_generation(l, extent);
3303
3304 if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
3305 (key.objectid < logical ||
3306 key.objectid + bytes >
3307 logical + map->stripe_len)) {
3308 btrfs_err(fs_info,
3309 "scrub: tree block %llu spanning "
3310 "stripes, ignored. logical=%llu",
3311 key.objectid, logical);
3312 spin_lock(&sctx->stat_lock);
3313 sctx->stat.uncorrectable_errors++;
3314 spin_unlock(&sctx->stat_lock);
3315 goto next;
3316 }
3317
3318again:
3319 extent_logical = key.objectid;
3320 extent_len = bytes;
3321
3322
3323
3324
3325 if (extent_logical < logical) {
3326 extent_len -= logical - extent_logical;
3327 extent_logical = logical;
3328 }
3329 if (extent_logical + extent_len >
3330 logical + map->stripe_len) {
3331 extent_len = logical + map->stripe_len -
3332 extent_logical;
3333 }
3334
3335 extent_physical = extent_logical - logical + physical;
3336 extent_dev = scrub_dev;
3337 extent_mirror_num = mirror_num;
3338 if (is_dev_replace)
3339 scrub_remap_extent(fs_info, extent_logical,
3340 extent_len, &extent_physical,
3341 &extent_dev,
3342 &extent_mirror_num);
3343
3344 ret = btrfs_lookup_csums_range(csum_root,
3345 extent_logical,
3346 extent_logical +
3347 extent_len - 1,
3348 &sctx->csum_list, 1);
3349 if (ret)
3350 goto out;
3351
3352 ret = scrub_extent(sctx, extent_logical, extent_len,
3353 extent_physical, extent_dev, flags,
3354 generation, extent_mirror_num,
3355 extent_logical - logical + physical);
3356
3357 scrub_free_csums(sctx);
3358
3359 if (ret)
3360 goto out;
3361
3362 if (extent_logical + extent_len <
3363 key.objectid + bytes) {
3364 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3365
3366
3367
3368
3369loop:
3370 physical += map->stripe_len;
3371 ret = get_raid56_logic_offset(physical,
3372 num, map, &logical,
3373 &stripe_logical);
3374 logical += base;
3375
3376 if (ret && physical < physical_end) {
3377 stripe_logical += base;
3378 stripe_end = stripe_logical +
3379 increment;
3380 ret = scrub_raid56_parity(sctx,
3381 map, scrub_dev, ppath,
3382 stripe_logical,
3383 stripe_end);
3384 if (ret)
3385 goto out;
3386 goto loop;
3387 }
3388 } else {
3389 physical += map->stripe_len;
3390 logical += increment;
3391 }
3392 if (logical < key.objectid + bytes) {
3393 cond_resched();
3394 goto again;
3395 }
3396
3397 if (physical >= physical_end) {
3398 stop_loop = 1;
3399 break;
3400 }
3401 }
3402next:
3403 path->slots[0]++;
3404 }
3405 btrfs_release_path(path);
3406skip:
3407 logical += increment;
3408 physical += map->stripe_len;
3409 spin_lock(&sctx->stat_lock);
3410 if (stop_loop)
3411 sctx->stat.last_physical = map->stripes[num].physical +
3412 length;
3413 else
3414 sctx->stat.last_physical = physical;
3415 spin_unlock(&sctx->stat_lock);
3416 if (stop_loop)
3417 break;
3418 }
3419out:
3420
3421 scrub_submit(sctx);
3422 mutex_lock(&sctx->wr_ctx.wr_lock);
3423 scrub_wr_submit(sctx);
3424 mutex_unlock(&sctx->wr_ctx.wr_lock);
3425
3426 blk_finish_plug(&plug);
3427 btrfs_free_path(path);
3428 btrfs_free_path(ppath);
3429 return ret < 0 ? ret : 0;
3430}
3431
3432static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3433 struct btrfs_device *scrub_dev,
3434 u64 chunk_offset, u64 length,
3435 u64 dev_offset,
3436 struct btrfs_block_group_cache *cache,
3437 int is_dev_replace)
3438{
3439 struct btrfs_mapping_tree *map_tree =
3440 &sctx->dev_root->fs_info->mapping_tree;
3441 struct map_lookup *map;
3442 struct extent_map *em;
3443 int i;
3444 int ret = 0;
3445
3446 read_lock(&map_tree->map_tree.lock);
3447 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
3448 read_unlock(&map_tree->map_tree.lock);
3449
3450 if (!em) {
3451
3452
3453
3454
3455 spin_lock(&cache->lock);
3456 if (!cache->removed)
3457 ret = -EINVAL;
3458 spin_unlock(&cache->lock);
3459
3460 return ret;
3461 }
3462
3463 map = em->map_lookup;
3464 if (em->start != chunk_offset)
3465 goto out;
3466
3467 if (em->len < length)
3468 goto out;
3469
3470 for (i = 0; i < map->num_stripes; ++i) {
3471 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3472 map->stripes[i].physical == dev_offset) {
3473 ret = scrub_stripe(sctx, map, scrub_dev, i,
3474 chunk_offset, length,
3475 is_dev_replace);
3476 if (ret)
3477 goto out;
3478 }
3479 }
3480out:
3481 free_extent_map(em);
3482
3483 return ret;
3484}
3485
3486static noinline_for_stack
3487int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3488 struct btrfs_device *scrub_dev, u64 start, u64 end,
3489 int is_dev_replace)
3490{
3491 struct btrfs_dev_extent *dev_extent = NULL;
3492 struct btrfs_path *path;
3493 struct btrfs_root *root = sctx->dev_root;
3494 struct btrfs_fs_info *fs_info = root->fs_info;
3495 u64 length;
3496 u64 chunk_offset;
3497 int ret = 0;
3498 int ro_set;
3499 int slot;
3500 struct extent_buffer *l;
3501 struct btrfs_key key;
3502 struct btrfs_key found_key;
3503 struct btrfs_block_group_cache *cache;
3504 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3505
3506 path = btrfs_alloc_path();
3507 if (!path)
3508 return -ENOMEM;
3509
3510 path->reada = READA_FORWARD;
3511 path->search_commit_root = 1;
3512 path->skip_locking = 1;
3513
3514 key.objectid = scrub_dev->devid;
3515 key.offset = 0ull;
3516 key.type = BTRFS_DEV_EXTENT_KEY;
3517
3518 while (1) {
3519 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3520 if (ret < 0)
3521 break;
3522 if (ret > 0) {
3523 if (path->slots[0] >=
3524 btrfs_header_nritems(path->nodes[0])) {
3525 ret = btrfs_next_leaf(root, path);
3526 if (ret < 0)
3527 break;
3528 if (ret > 0) {
3529 ret = 0;
3530 break;
3531 }
3532 } else {
3533 ret = 0;
3534 }
3535 }
3536
3537 l = path->nodes[0];
3538 slot = path->slots[0];
3539
3540 btrfs_item_key_to_cpu(l, &found_key, slot);
3541
3542 if (found_key.objectid != scrub_dev->devid)
3543 break;
3544
3545 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3546 break;
3547
3548 if (found_key.offset >= end)
3549 break;
3550
3551 if (found_key.offset < key.offset)
3552 break;
3553
3554 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3555 length = btrfs_dev_extent_length(l, dev_extent);
3556
3557 if (found_key.offset + length <= start)
3558 goto skip;
3559
3560 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3561
3562
3563
3564
3565
3566 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3567
3568
3569
3570 if (!cache)
3571 goto skip;
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581 scrub_pause_on(fs_info);
3582 ret = btrfs_inc_block_group_ro(root, cache);
3583 scrub_pause_off(fs_info);
3584
3585 if (ret == 0) {
3586 ro_set = 1;
3587 } else if (ret == -ENOSPC) {
3588
3589
3590
3591
3592
3593
3594
3595 ro_set = 0;
3596 } else {
3597 btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n",
3598 ret);
3599 btrfs_put_block_group(cache);
3600 break;
3601 }
3602
3603 dev_replace->cursor_right = found_key.offset + length;
3604 dev_replace->cursor_left = found_key.offset;
3605 dev_replace->item_needs_writeback = 1;
3606 ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
3607 found_key.offset, cache, is_dev_replace);
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617
3618
3619 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
3620 scrub_submit(sctx);
3621 mutex_lock(&sctx->wr_ctx.wr_lock);
3622 scrub_wr_submit(sctx);
3623 mutex_unlock(&sctx->wr_ctx.wr_lock);
3624
3625 wait_event(sctx->list_wait,
3626 atomic_read(&sctx->bios_in_flight) == 0);
3627
3628 scrub_pause_on(fs_info);
3629
3630
3631
3632
3633
3634
3635 wait_event(sctx->list_wait,
3636 atomic_read(&sctx->workers_pending) == 0);
3637 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
3638
3639 scrub_pause_off(fs_info);
3640
3641 if (ro_set)
3642 btrfs_dec_block_group_ro(root, cache);
3643
3644
3645
3646
3647
3648
3649
3650
3651 spin_lock(&cache->lock);
3652 if (!cache->removed && !cache->ro && cache->reserved == 0 &&
3653 btrfs_block_group_used(&cache->item) == 0) {
3654 spin_unlock(&cache->lock);
3655 spin_lock(&fs_info->unused_bgs_lock);
3656 if (list_empty(&cache->bg_list)) {
3657 btrfs_get_block_group(cache);
3658 list_add_tail(&cache->bg_list,
3659 &fs_info->unused_bgs);
3660 }
3661 spin_unlock(&fs_info->unused_bgs_lock);
3662 } else {
3663 spin_unlock(&cache->lock);
3664 }
3665
3666 btrfs_put_block_group(cache);
3667 if (ret)
3668 break;
3669 if (is_dev_replace &&
3670 atomic64_read(&dev_replace->num_write_errors) > 0) {
3671 ret = -EIO;
3672 break;
3673 }
3674 if (sctx->stat.malloc_errors > 0) {
3675 ret = -ENOMEM;
3676 break;
3677 }
3678
3679 dev_replace->cursor_left = dev_replace->cursor_right;
3680 dev_replace->item_needs_writeback = 1;
3681skip:
3682 key.offset = found_key.offset + length;
3683 btrfs_release_path(path);
3684 }
3685
3686 btrfs_free_path(path);
3687
3688 return ret;
3689}
3690
3691static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3692 struct btrfs_device *scrub_dev)
3693{
3694 int i;
3695 u64 bytenr;
3696 u64 gen;
3697 int ret;
3698 struct btrfs_root *root = sctx->dev_root;
3699
3700 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
3701 return -EIO;
3702
3703
3704 if (scrub_dev->fs_devices != root->fs_info->fs_devices)
3705 gen = scrub_dev->generation;
3706 else
3707 gen = root->fs_info->last_trans_committed;
3708
3709 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3710 bytenr = btrfs_sb_offset(i);
3711 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3712 scrub_dev->commit_total_bytes)
3713 break;
3714
3715 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3716 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3717 NULL, 1, bytenr);
3718 if (ret)
3719 return ret;
3720 }
3721 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3722
3723 return 0;
3724}
3725
3726
3727
3728
3729static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3730 int is_dev_replace)
3731{
3732 unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
3733 int max_active = fs_info->thread_pool_size;
3734
3735 if (fs_info->scrub_workers_refcnt == 0) {
3736 if (is_dev_replace)
3737 fs_info->scrub_workers =
3738 btrfs_alloc_workqueue("scrub", flags,
3739 1, 4);
3740 else
3741 fs_info->scrub_workers =
3742 btrfs_alloc_workqueue("scrub", flags,
3743 max_active, 4);
3744 if (!fs_info->scrub_workers)
3745 goto fail_scrub_workers;
3746
3747 fs_info->scrub_wr_completion_workers =
3748 btrfs_alloc_workqueue("scrubwrc", flags,
3749 max_active, 2);
3750 if (!fs_info->scrub_wr_completion_workers)
3751 goto fail_scrub_wr_completion_workers;
3752
3753 fs_info->scrub_nocow_workers =
3754 btrfs_alloc_workqueue("scrubnc", flags, 1, 0);
3755 if (!fs_info->scrub_nocow_workers)
3756 goto fail_scrub_nocow_workers;
3757 fs_info->scrub_parity_workers =
3758 btrfs_alloc_workqueue("scrubparity", flags,
3759 max_active, 2);
3760 if (!fs_info->scrub_parity_workers)
3761 goto fail_scrub_parity_workers;
3762 }
3763 ++fs_info->scrub_workers_refcnt;
3764 return 0;
3765
3766fail_scrub_parity_workers:
3767 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3768fail_scrub_nocow_workers:
3769 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3770fail_scrub_wr_completion_workers:
3771 btrfs_destroy_workqueue(fs_info->scrub_workers);
3772fail_scrub_workers:
3773 return -ENOMEM;
3774}
3775
3776static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
3777{
3778 if (--fs_info->scrub_workers_refcnt == 0) {
3779 btrfs_destroy_workqueue(fs_info->scrub_workers);
3780 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3781 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3782 btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
3783 }
3784 WARN_ON(fs_info->scrub_workers_refcnt < 0);
3785}
3786
3787int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3788 u64 end, struct btrfs_scrub_progress *progress,
3789 int readonly, int is_dev_replace)
3790{
3791 struct scrub_ctx *sctx;
3792 int ret;
3793 struct btrfs_device *dev;
3794 struct rcu_string *name;
3795
3796 if (btrfs_fs_closing(fs_info))
3797 return -EINVAL;
3798
3799 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
3800
3801
3802
3803
3804
3805 btrfs_err(fs_info,
3806 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3807 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
3808 return -EINVAL;
3809 }
3810
3811 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
3812
3813 btrfs_err(fs_info,
3814 "scrub: size assumption sectorsize != PAGE_SIZE "
3815 "(%d != %lu) fails",
3816 fs_info->chunk_root->sectorsize, PAGE_SIZE);
3817 return -EINVAL;
3818 }
3819
3820 if (fs_info->chunk_root->nodesize >
3821 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3822 fs_info->chunk_root->sectorsize >
3823 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3824
3825
3826
3827
3828 btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
3829 "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3830 fs_info->chunk_root->nodesize,
3831 SCRUB_MAX_PAGES_PER_BLOCK,
3832 fs_info->chunk_root->sectorsize,
3833 SCRUB_MAX_PAGES_PER_BLOCK);
3834 return -EINVAL;
3835 }
3836
3837
3838 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3839 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3840 if (!dev || (dev->missing && !is_dev_replace)) {
3841 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3842 return -ENODEV;
3843 }
3844
3845 if (!is_dev_replace && !readonly && !dev->writeable) {
3846 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3847 rcu_read_lock();
3848 name = rcu_dereference(dev->name);
3849 btrfs_err(fs_info, "scrub: device %s is not writable",
3850 name->str);
3851 rcu_read_unlock();
3852 return -EROFS;
3853 }
3854
3855 mutex_lock(&fs_info->scrub_lock);
3856 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
3857 mutex_unlock(&fs_info->scrub_lock);
3858 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3859 return -EIO;
3860 }
3861
3862 btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
3863 if (dev->scrub_device ||
3864 (!is_dev_replace &&
3865 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3866 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3867 mutex_unlock(&fs_info->scrub_lock);
3868 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3869 return -EINPROGRESS;
3870 }
3871 btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
3872
3873 ret = scrub_workers_get(fs_info, is_dev_replace);
3874 if (ret) {
3875 mutex_unlock(&fs_info->scrub_lock);
3876 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3877 return ret;
3878 }
3879
3880 sctx = scrub_setup_ctx(dev, is_dev_replace);
3881 if (IS_ERR(sctx)) {
3882 mutex_unlock(&fs_info->scrub_lock);
3883 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3884 scrub_workers_put(fs_info);
3885 return PTR_ERR(sctx);
3886 }
3887 sctx->readonly = readonly;
3888 dev->scrub_device = sctx;
3889 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3890
3891
3892
3893
3894
3895 __scrub_blocked_if_needed(fs_info);
3896 atomic_inc(&fs_info->scrubs_running);
3897 mutex_unlock(&fs_info->scrub_lock);
3898
3899 if (!is_dev_replace) {
3900
3901
3902
3903
3904 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3905 ret = scrub_supers(sctx, dev);
3906 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3907 }
3908
3909 if (!ret)
3910 ret = scrub_enumerate_chunks(sctx, dev, start, end,
3911 is_dev_replace);
3912
3913 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3914 atomic_dec(&fs_info->scrubs_running);
3915 wake_up(&fs_info->scrub_pause_wait);
3916
3917 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3918
3919 if (progress)
3920 memcpy(progress, &sctx->stat, sizeof(*progress));
3921
3922 mutex_lock(&fs_info->scrub_lock);
3923 dev->scrub_device = NULL;
3924 scrub_workers_put(fs_info);
3925 mutex_unlock(&fs_info->scrub_lock);
3926
3927 scrub_put_ctx(sctx);
3928
3929 return ret;
3930}
3931
3932void btrfs_scrub_pause(struct btrfs_root *root)
3933{
3934 struct btrfs_fs_info *fs_info = root->fs_info;
3935
3936 mutex_lock(&fs_info->scrub_lock);
3937 atomic_inc(&fs_info->scrub_pause_req);
3938 while (atomic_read(&fs_info->scrubs_paused) !=
3939 atomic_read(&fs_info->scrubs_running)) {
3940 mutex_unlock(&fs_info->scrub_lock);
3941 wait_event(fs_info->scrub_pause_wait,
3942 atomic_read(&fs_info->scrubs_paused) ==
3943 atomic_read(&fs_info->scrubs_running));
3944 mutex_lock(&fs_info->scrub_lock);
3945 }
3946 mutex_unlock(&fs_info->scrub_lock);
3947}
3948
3949void btrfs_scrub_continue(struct btrfs_root *root)
3950{
3951 struct btrfs_fs_info *fs_info = root->fs_info;
3952
3953 atomic_dec(&fs_info->scrub_pause_req);
3954 wake_up(&fs_info->scrub_pause_wait);
3955}
3956
3957int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3958{
3959 mutex_lock(&fs_info->scrub_lock);
3960 if (!atomic_read(&fs_info->scrubs_running)) {
3961 mutex_unlock(&fs_info->scrub_lock);
3962 return -ENOTCONN;
3963 }
3964
3965 atomic_inc(&fs_info->scrub_cancel_req);
3966 while (atomic_read(&fs_info->scrubs_running)) {
3967 mutex_unlock(&fs_info->scrub_lock);
3968 wait_event(fs_info->scrub_pause_wait,
3969 atomic_read(&fs_info->scrubs_running) == 0);
3970 mutex_lock(&fs_info->scrub_lock);
3971 }
3972 atomic_dec(&fs_info->scrub_cancel_req);
3973 mutex_unlock(&fs_info->scrub_lock);
3974
3975 return 0;
3976}
3977
3978int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3979 struct btrfs_device *dev)
3980{
3981 struct scrub_ctx *sctx;
3982
3983 mutex_lock(&fs_info->scrub_lock);
3984 sctx = dev->scrub_device;
3985 if (!sctx) {
3986 mutex_unlock(&fs_info->scrub_lock);
3987 return -ENOTCONN;
3988 }
3989 atomic_inc(&sctx->cancel_req);
3990 while (dev->scrub_device) {
3991 mutex_unlock(&fs_info->scrub_lock);
3992 wait_event(fs_info->scrub_pause_wait,
3993 dev->scrub_device == NULL);
3994 mutex_lock(&fs_info->scrub_lock);
3995 }
3996 mutex_unlock(&fs_info->scrub_lock);
3997
3998 return 0;
3999}
4000
4001int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
4002 struct btrfs_scrub_progress *progress)
4003{
4004 struct btrfs_device *dev;
4005 struct scrub_ctx *sctx = NULL;
4006
4007 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
4008 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
4009 if (dev)
4010 sctx = dev->scrub_device;
4011 if (sctx)
4012 memcpy(progress, &sctx->stat, sizeof(*progress));
4013 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
4014
4015 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
4016}
4017
4018static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
4019 u64 extent_logical, u64 extent_len,
4020 u64 *extent_physical,
4021 struct btrfs_device **extent_dev,
4022 int *extent_mirror_num)
4023{
4024 u64 mapped_length;
4025 struct btrfs_bio *bbio = NULL;
4026 int ret;
4027
4028 mapped_length = extent_len;
4029 ret = btrfs_map_block(fs_info, READ, extent_logical,
4030 &mapped_length, &bbio, 0);
4031 if (ret || !bbio || mapped_length < extent_len ||
4032 !bbio->stripes[0].dev->bdev) {
4033 btrfs_put_bbio(bbio);
4034 return;
4035 }
4036
4037 *extent_physical = bbio->stripes[0].physical;
4038 *extent_mirror_num = bbio->mirror_num;
4039 *extent_dev = bbio->stripes[0].dev;
4040 btrfs_put_bbio(bbio);
4041}
4042
4043static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
4044 struct scrub_wr_ctx *wr_ctx,
4045 struct btrfs_fs_info *fs_info,
4046 struct btrfs_device *dev,
4047 int is_dev_replace)
4048{
4049 WARN_ON(wr_ctx->wr_curr_bio != NULL);
4050
4051 mutex_init(&wr_ctx->wr_lock);
4052 wr_ctx->wr_curr_bio = NULL;
4053 if (!is_dev_replace)
4054 return 0;
4055
4056 WARN_ON(!dev->bdev);
4057 wr_ctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO;
4058 wr_ctx->tgtdev = dev;
4059 atomic_set(&wr_ctx->flush_all_writes, 0);
4060 return 0;
4061}
4062
4063static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
4064{
4065 mutex_lock(&wr_ctx->wr_lock);
4066 kfree(wr_ctx->wr_curr_bio);
4067 wr_ctx->wr_curr_bio = NULL;
4068 mutex_unlock(&wr_ctx->wr_lock);
4069}
4070
4071static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
4072 int mirror_num, u64 physical_for_dev_replace)
4073{
4074 struct scrub_copy_nocow_ctx *nocow_ctx;
4075 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
4076
4077 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
4078 if (!nocow_ctx) {
4079 spin_lock(&sctx->stat_lock);
4080 sctx->stat.malloc_errors++;
4081 spin_unlock(&sctx->stat_lock);
4082 return -ENOMEM;
4083 }
4084
4085 scrub_pending_trans_workers_inc(sctx);
4086
4087 nocow_ctx->sctx = sctx;
4088 nocow_ctx->logical = logical;
4089 nocow_ctx->len = len;
4090 nocow_ctx->mirror_num = mirror_num;
4091 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
4092 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
4093 copy_nocow_pages_worker, NULL, NULL);
4094 INIT_LIST_HEAD(&nocow_ctx->inodes);
4095 btrfs_queue_work(fs_info->scrub_nocow_workers,
4096 &nocow_ctx->work);
4097
4098 return 0;
4099}
4100
4101static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
4102{
4103 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
4104 struct scrub_nocow_inode *nocow_inode;
4105
4106 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
4107 if (!nocow_inode)
4108 return -ENOMEM;
4109 nocow_inode->inum = inum;
4110 nocow_inode->offset = offset;
4111 nocow_inode->root = root;
4112 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
4113 return 0;
4114}
4115
4116#define COPY_COMPLETE 1
4117
4118static void copy_nocow_pages_worker(struct btrfs_work *work)
4119{
4120 struct scrub_copy_nocow_ctx *nocow_ctx =
4121 container_of(work, struct scrub_copy_nocow_ctx, work);
4122 struct scrub_ctx *sctx = nocow_ctx->sctx;
4123 u64 logical = nocow_ctx->logical;
4124 u64 len = nocow_ctx->len;
4125 int mirror_num = nocow_ctx->mirror_num;
4126 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4127 int ret;
4128 struct btrfs_trans_handle *trans = NULL;
4129 struct btrfs_fs_info *fs_info;
4130 struct btrfs_path *path;
4131 struct btrfs_root *root;
4132 int not_written = 0;
4133
4134 fs_info = sctx->dev_root->fs_info;
4135 root = fs_info->extent_root;
4136
4137 path = btrfs_alloc_path();
4138 if (!path) {
4139 spin_lock(&sctx->stat_lock);
4140 sctx->stat.malloc_errors++;
4141 spin_unlock(&sctx->stat_lock);
4142 not_written = 1;
4143 goto out;
4144 }
4145
4146 trans = btrfs_join_transaction(root);
4147 if (IS_ERR(trans)) {
4148 not_written = 1;
4149 goto out;
4150 }
4151
4152 ret = iterate_inodes_from_logical(logical, fs_info, path,
4153 record_inode_for_nocow, nocow_ctx);
4154 if (ret != 0 && ret != -ENOENT) {
4155 btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
4156 "phys %llu, len %llu, mir %u, ret %d",
4157 logical, physical_for_dev_replace, len, mirror_num,
4158 ret);
4159 not_written = 1;
4160 goto out;
4161 }
4162
4163 btrfs_end_transaction(trans, root);
4164 trans = NULL;
4165 while (!list_empty(&nocow_ctx->inodes)) {
4166 struct scrub_nocow_inode *entry;
4167 entry = list_first_entry(&nocow_ctx->inodes,
4168 struct scrub_nocow_inode,
4169 list);
4170 list_del_init(&entry->list);
4171 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
4172 entry->root, nocow_ctx);
4173 kfree(entry);
4174 if (ret == COPY_COMPLETE) {
4175 ret = 0;
4176 break;
4177 } else if (ret) {
4178 break;
4179 }
4180 }
4181out:
4182 while (!list_empty(&nocow_ctx->inodes)) {
4183 struct scrub_nocow_inode *entry;
4184 entry = list_first_entry(&nocow_ctx->inodes,
4185 struct scrub_nocow_inode,
4186 list);
4187 list_del_init(&entry->list);
4188 kfree(entry);
4189 }
4190 if (trans && !IS_ERR(trans))
4191 btrfs_end_transaction(trans, root);
4192 if (not_written)
4193 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
4194 num_uncorrectable_read_errors);
4195
4196 btrfs_free_path(path);
4197 kfree(nocow_ctx);
4198
4199 scrub_pending_trans_workers_dec(sctx);
4200}
4201
4202static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
4203 u64 logical)
4204{
4205 struct extent_state *cached_state = NULL;
4206 struct btrfs_ordered_extent *ordered;
4207 struct extent_io_tree *io_tree;
4208 struct extent_map *em;
4209 u64 lockstart = start, lockend = start + len - 1;
4210 int ret = 0;
4211
4212 io_tree = &BTRFS_I(inode)->io_tree;
4213
4214 lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
4215 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
4216 if (ordered) {
4217 btrfs_put_ordered_extent(ordered);
4218 ret = 1;
4219 goto out_unlock;
4220 }
4221
4222 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
4223 if (IS_ERR(em)) {
4224 ret = PTR_ERR(em);
4225 goto out_unlock;
4226 }
4227
4228
4229
4230
4231
4232 if (em->block_start > logical ||
4233 em->block_start + em->block_len < logical + len) {
4234 free_extent_map(em);
4235 ret = 1;
4236 goto out_unlock;
4237 }
4238 free_extent_map(em);
4239
4240out_unlock:
4241 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
4242 GFP_NOFS);
4243 return ret;
4244}
4245
4246static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
4247 struct scrub_copy_nocow_ctx *nocow_ctx)
4248{
4249 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
4250 struct btrfs_key key;
4251 struct inode *inode;
4252 struct page *page;
4253 struct btrfs_root *local_root;
4254 struct extent_io_tree *io_tree;
4255 u64 physical_for_dev_replace;
4256 u64 nocow_ctx_logical;
4257 u64 len = nocow_ctx->len;
4258 unsigned long index;
4259 int srcu_index;
4260 int ret = 0;
4261 int err = 0;
4262
4263 key.objectid = root;
4264 key.type = BTRFS_ROOT_ITEM_KEY;
4265 key.offset = (u64)-1;
4266
4267 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
4268
4269 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
4270 if (IS_ERR(local_root)) {
4271 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4272 return PTR_ERR(local_root);
4273 }
4274
4275 key.type = BTRFS_INODE_ITEM_KEY;
4276 key.objectid = inum;
4277 key.offset = 0;
4278 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
4279 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4280 if (IS_ERR(inode))
4281 return PTR_ERR(inode);
4282
4283
4284 inode_lock(inode);
4285 inode_dio_wait(inode);
4286
4287 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4288 io_tree = &BTRFS_I(inode)->io_tree;
4289 nocow_ctx_logical = nocow_ctx->logical;
4290
4291 ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
4292 if (ret) {
4293 ret = ret > 0 ? 0 : ret;
4294 goto out;
4295 }
4296
4297 while (len >= PAGE_SIZE) {
4298 index = offset >> PAGE_SHIFT;
4299again:
4300 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4301 if (!page) {
4302 btrfs_err(fs_info, "find_or_create_page() failed");
4303 ret = -ENOMEM;
4304 goto out;
4305 }
4306
4307 if (PageUptodate(page)) {
4308 if (PageDirty(page))
4309 goto next_page;
4310 } else {
4311 ClearPageError(page);
4312 err = extent_read_full_page(io_tree, page,
4313 btrfs_get_extent,
4314 nocow_ctx->mirror_num);
4315 if (err) {
4316 ret = err;
4317 goto next_page;
4318 }
4319
4320 lock_page(page);
4321
4322
4323
4324
4325
4326
4327 if (page->mapping != inode->i_mapping) {
4328 unlock_page(page);
4329 put_page(page);
4330 goto again;
4331 }
4332 if (!PageUptodate(page)) {
4333 ret = -EIO;
4334 goto next_page;
4335 }
4336 }
4337
4338 ret = check_extent_to_block(inode, offset, len,
4339 nocow_ctx_logical);
4340 if (ret) {
4341 ret = ret > 0 ? 0 : ret;
4342 goto next_page;
4343 }
4344
4345 err = write_page_nocow(nocow_ctx->sctx,
4346 physical_for_dev_replace, page);
4347 if (err)
4348 ret = err;
4349next_page:
4350 unlock_page(page);
4351 put_page(page);
4352
4353 if (ret)
4354 break;
4355
4356 offset += PAGE_SIZE;
4357 physical_for_dev_replace += PAGE_SIZE;
4358 nocow_ctx_logical += PAGE_SIZE;
4359 len -= PAGE_SIZE;
4360 }
4361 ret = COPY_COMPLETE;
4362out:
4363 inode_unlock(inode);
4364 iput(inode);
4365 return ret;
4366}
4367
4368static int write_page_nocow(struct scrub_ctx *sctx,
4369 u64 physical_for_dev_replace, struct page *page)
4370{
4371 struct bio *bio;
4372 struct btrfs_device *dev;
4373 int ret;
4374
4375 dev = sctx->wr_ctx.tgtdev;
4376 if (!dev)
4377 return -EIO;
4378 if (!dev->bdev) {
4379 btrfs_warn_rl(dev->dev_root->fs_info,
4380 "scrub write_page_nocow(bdev == NULL) is unexpected");
4381 return -EIO;
4382 }
4383 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
4384 if (!bio) {
4385 spin_lock(&sctx->stat_lock);
4386 sctx->stat.malloc_errors++;
4387 spin_unlock(&sctx->stat_lock);
4388 return -ENOMEM;
4389 }
4390 bio->bi_iter.bi_size = 0;
4391 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
4392 bio->bi_bdev = dev->bdev;
4393 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
4394 if (ret != PAGE_SIZE) {
4395leave_with_eio:
4396 bio_put(bio);
4397 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
4398 return -EIO;
4399 }
4400
4401 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
4402 goto leave_with_eio;
4403
4404 bio_put(bio);
4405 return 0;
4406}
4407