1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_recover {
67 atomic_t refs;
68 struct btrfs_bio *bbio;
69 u64 map_length;
70};
71
72struct scrub_page {
73 struct scrub_block *sblock;
74 struct page *page;
75 struct btrfs_device *dev;
76 struct list_head list;
77 u64 flags;
78 u64 generation;
79 u64 logical;
80 u64 physical;
81 u64 physical_for_dev_replace;
82 atomic_t refs;
83 struct {
84 unsigned int mirror_num:8;
85 unsigned int have_csum:1;
86 unsigned int io_error:1;
87 };
88 u8 csum[BTRFS_CSUM_SIZE];
89
90 struct scrub_recover *recover;
91};
92
93struct scrub_bio {
94 int index;
95 struct scrub_ctx *sctx;
96 struct btrfs_device *dev;
97 struct bio *bio;
98 int err;
99 u64 logical;
100 u64 physical;
101#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
102 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
103#else
104 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
105#endif
106 int page_count;
107 int next_free;
108 struct btrfs_work work;
109};
110
111struct scrub_block {
112 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
113 int page_count;
114 atomic_t outstanding_pages;
115 atomic_t refs;
116 struct scrub_ctx *sctx;
117 struct scrub_parity *sparity;
118 struct {
119 unsigned int header_error:1;
120 unsigned int checksum_error:1;
121 unsigned int no_io_error_seen:1;
122 unsigned int generation_error:1;
123
124
125
126 unsigned int data_corrected:1;
127 };
128};
129
130
131struct scrub_parity {
132 struct scrub_ctx *sctx;
133
134 struct btrfs_device *scrub_dev;
135
136 u64 logic_start;
137
138 u64 logic_end;
139
140 int nsectors;
141
142 int stripe_len;
143
144 atomic_t refs;
145
146 struct list_head spages;
147
148
149 struct btrfs_work work;
150
151
152 unsigned long *dbitmap;
153
154
155
156
157
158 unsigned long *ebitmap;
159
160 unsigned long bitmap[0];
161};
162
163struct scrub_wr_ctx {
164 struct scrub_bio *wr_curr_bio;
165 struct btrfs_device *tgtdev;
166 int pages_per_wr_bio;
167 atomic_t flush_all_writes;
168 struct mutex wr_lock;
169};
170
171struct scrub_ctx {
172 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
173 struct btrfs_root *dev_root;
174 int first_free;
175 int curr;
176 atomic_t bios_in_flight;
177 atomic_t workers_pending;
178 spinlock_t list_lock;
179 wait_queue_head_t list_wait;
180 u16 csum_size;
181 struct list_head csum_list;
182 atomic_t cancel_req;
183 int readonly;
184 int pages_per_rd_bio;
185 u32 sectorsize;
186 u32 nodesize;
187
188 int is_dev_replace;
189 struct scrub_wr_ctx wr_ctx;
190
191
192
193
194 struct btrfs_scrub_progress stat;
195 spinlock_t stat_lock;
196
197
198
199
200
201
202
203
204 atomic_t refs;
205};
206
207struct scrub_fixup_nodatasum {
208 struct scrub_ctx *sctx;
209 struct btrfs_device *dev;
210 u64 logical;
211 struct btrfs_root *root;
212 struct btrfs_work work;
213 int mirror_num;
214};
215
216struct scrub_nocow_inode {
217 u64 inum;
218 u64 offset;
219 u64 root;
220 struct list_head list;
221};
222
223struct scrub_copy_nocow_ctx {
224 struct scrub_ctx *sctx;
225 u64 logical;
226 u64 len;
227 int mirror_num;
228 u64 physical_for_dev_replace;
229 struct list_head inodes;
230 struct btrfs_work work;
231};
232
233struct scrub_warning {
234 struct btrfs_path *path;
235 u64 extent_item_size;
236 const char *errstr;
237 sector_t sector;
238 u64 logical;
239 struct btrfs_device *dev;
240};
241
242static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
243static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
244static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
245static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
246static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
247static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
248 struct scrub_block *sblocks_for_recheck);
249static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
250 struct scrub_block *sblock, int is_metadata,
251 int have_csum, u8 *csum, u64 generation,
252 u16 csum_size, int retry_failed_mirror);
253static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
254 struct scrub_block *sblock,
255 int is_metadata, int have_csum,
256 const u8 *csum, u64 generation,
257 u16 csum_size);
258static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
259 struct scrub_block *sblock_good);
260static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
261 struct scrub_block *sblock_good,
262 int page_num, int force_write);
263static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
264static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
265 int page_num);
266static int scrub_checksum_data(struct scrub_block *sblock);
267static int scrub_checksum_tree_block(struct scrub_block *sblock);
268static int scrub_checksum_super(struct scrub_block *sblock);
269static void scrub_block_get(struct scrub_block *sblock);
270static void scrub_block_put(struct scrub_block *sblock);
271static void scrub_page_get(struct scrub_page *spage);
272static void scrub_page_put(struct scrub_page *spage);
273static void scrub_parity_get(struct scrub_parity *sparity);
274static void scrub_parity_put(struct scrub_parity *sparity);
275static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
276 struct scrub_page *spage);
277static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
278 u64 physical, struct btrfs_device *dev, u64 flags,
279 u64 gen, int mirror_num, u8 *csum, int force,
280 u64 physical_for_dev_replace);
281static void scrub_bio_end_io(struct bio *bio, int err);
282static void scrub_bio_end_io_worker(struct btrfs_work *work);
283static void scrub_block_complete(struct scrub_block *sblock);
284static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
285 u64 extent_logical, u64 extent_len,
286 u64 *extent_physical,
287 struct btrfs_device **extent_dev,
288 int *extent_mirror_num);
289static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
290 struct scrub_wr_ctx *wr_ctx,
291 struct btrfs_fs_info *fs_info,
292 struct btrfs_device *dev,
293 int is_dev_replace);
294static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
295static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
296 struct scrub_page *spage);
297static void scrub_wr_submit(struct scrub_ctx *sctx);
298static void scrub_wr_bio_end_io(struct bio *bio, int err);
299static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
300static int write_page_nocow(struct scrub_ctx *sctx,
301 u64 physical_for_dev_replace, struct page *page);
302static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
303 struct scrub_copy_nocow_ctx *ctx);
304static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
305 int mirror_num, u64 physical_for_dev_replace);
306static void copy_nocow_pages_worker(struct btrfs_work *work);
307static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
308static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
309static void scrub_put_ctx(struct scrub_ctx *sctx);
310
311
312static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
313{
314 atomic_inc(&sctx->refs);
315 atomic_inc(&sctx->bios_in_flight);
316}
317
318static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
319{
320 atomic_dec(&sctx->bios_in_flight);
321 wake_up(&sctx->list_wait);
322 scrub_put_ctx(sctx);
323}
324
325static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
326{
327 while (atomic_read(&fs_info->scrub_pause_req)) {
328 mutex_unlock(&fs_info->scrub_lock);
329 wait_event(fs_info->scrub_pause_wait,
330 atomic_read(&fs_info->scrub_pause_req) == 0);
331 mutex_lock(&fs_info->scrub_lock);
332 }
333}
334
335static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
336{
337 atomic_inc(&fs_info->scrubs_paused);
338 wake_up(&fs_info->scrub_pause_wait);
339
340 mutex_lock(&fs_info->scrub_lock);
341 __scrub_blocked_if_needed(fs_info);
342 atomic_dec(&fs_info->scrubs_paused);
343 mutex_unlock(&fs_info->scrub_lock);
344
345 wake_up(&fs_info->scrub_pause_wait);
346}
347
348
349
350
351
352static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
353{
354 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
355
356 atomic_inc(&sctx->refs);
357
358
359
360
361
362
363
364
365
366 mutex_lock(&fs_info->scrub_lock);
367 atomic_inc(&fs_info->scrubs_running);
368 atomic_inc(&fs_info->scrubs_paused);
369 mutex_unlock(&fs_info->scrub_lock);
370
371
372
373
374
375
376
377
378 wake_up(&fs_info->scrub_pause_wait);
379
380 atomic_inc(&sctx->workers_pending);
381}
382
383
384static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
385{
386 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
387
388
389
390
391
392 mutex_lock(&fs_info->scrub_lock);
393 atomic_dec(&fs_info->scrubs_running);
394 atomic_dec(&fs_info->scrubs_paused);
395 mutex_unlock(&fs_info->scrub_lock);
396 atomic_dec(&sctx->workers_pending);
397 wake_up(&fs_info->scrub_pause_wait);
398 wake_up(&sctx->list_wait);
399 scrub_put_ctx(sctx);
400}
401
402static void scrub_free_csums(struct scrub_ctx *sctx)
403{
404 while (!list_empty(&sctx->csum_list)) {
405 struct btrfs_ordered_sum *sum;
406 sum = list_first_entry(&sctx->csum_list,
407 struct btrfs_ordered_sum, list);
408 list_del(&sum->list);
409 kfree(sum);
410 }
411}
412
413static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
414{
415 int i;
416
417 if (!sctx)
418 return;
419
420 scrub_free_wr_ctx(&sctx->wr_ctx);
421
422
423 if (sctx->curr != -1) {
424 struct scrub_bio *sbio = sctx->bios[sctx->curr];
425
426 for (i = 0; i < sbio->page_count; i++) {
427 WARN_ON(!sbio->pagev[i]->page);
428 scrub_block_put(sbio->pagev[i]->sblock);
429 }
430 bio_put(sbio->bio);
431 }
432
433 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
434 struct scrub_bio *sbio = sctx->bios[i];
435
436 if (!sbio)
437 break;
438 kfree(sbio);
439 }
440
441 scrub_free_csums(sctx);
442 kfree(sctx);
443}
444
445static void scrub_put_ctx(struct scrub_ctx *sctx)
446{
447 if (atomic_dec_and_test(&sctx->refs))
448 scrub_free_ctx(sctx);
449}
450
451static noinline_for_stack
452struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
453{
454 struct scrub_ctx *sctx;
455 int i;
456 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
457 int pages_per_rd_bio;
458 int ret;
459
460
461
462
463
464
465
466
467 if (dev->bdev)
468 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
469 bio_get_nr_vecs(dev->bdev));
470 else
471 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
472 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
473 if (!sctx)
474 goto nomem;
475 atomic_set(&sctx->refs, 1);
476 sctx->is_dev_replace = is_dev_replace;
477 sctx->pages_per_rd_bio = pages_per_rd_bio;
478 sctx->curr = -1;
479 sctx->dev_root = dev->dev_root;
480 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
481 struct scrub_bio *sbio;
482
483 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
484 if (!sbio)
485 goto nomem;
486 sctx->bios[i] = sbio;
487
488 sbio->index = i;
489 sbio->sctx = sctx;
490 sbio->page_count = 0;
491 btrfs_init_work(&sbio->work, btrfs_scrub_helper,
492 scrub_bio_end_io_worker, NULL, NULL);
493
494 if (i != SCRUB_BIOS_PER_SCTX - 1)
495 sctx->bios[i]->next_free = i + 1;
496 else
497 sctx->bios[i]->next_free = -1;
498 }
499 sctx->first_free = 0;
500 sctx->nodesize = dev->dev_root->nodesize;
501 sctx->sectorsize = dev->dev_root->sectorsize;
502 atomic_set(&sctx->bios_in_flight, 0);
503 atomic_set(&sctx->workers_pending, 0);
504 atomic_set(&sctx->cancel_req, 0);
505 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
506 INIT_LIST_HEAD(&sctx->csum_list);
507
508 spin_lock_init(&sctx->list_lock);
509 spin_lock_init(&sctx->stat_lock);
510 init_waitqueue_head(&sctx->list_wait);
511
512 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
513 fs_info->dev_replace.tgtdev, is_dev_replace);
514 if (ret) {
515 scrub_free_ctx(sctx);
516 return ERR_PTR(ret);
517 }
518 return sctx;
519
520nomem:
521 scrub_free_ctx(sctx);
522 return ERR_PTR(-ENOMEM);
523}
524
525static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
526 void *warn_ctx)
527{
528 u64 isize;
529 u32 nlink;
530 int ret;
531 int i;
532 struct extent_buffer *eb;
533 struct btrfs_inode_item *inode_item;
534 struct scrub_warning *swarn = warn_ctx;
535 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
536 struct inode_fs_paths *ipath = NULL;
537 struct btrfs_root *local_root;
538 struct btrfs_key root_key;
539 struct btrfs_key key;
540
541 root_key.objectid = root;
542 root_key.type = BTRFS_ROOT_ITEM_KEY;
543 root_key.offset = (u64)-1;
544 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
545 if (IS_ERR(local_root)) {
546 ret = PTR_ERR(local_root);
547 goto err;
548 }
549
550
551
552
553 key.objectid = inum;
554 key.type = BTRFS_INODE_ITEM_KEY;
555 key.offset = 0;
556
557 ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
558 if (ret) {
559 btrfs_release_path(swarn->path);
560 goto err;
561 }
562
563 eb = swarn->path->nodes[0];
564 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
565 struct btrfs_inode_item);
566 isize = btrfs_inode_size(eb, inode_item);
567 nlink = btrfs_inode_nlink(eb, inode_item);
568 btrfs_release_path(swarn->path);
569
570 ipath = init_ipath(4096, local_root, swarn->path);
571 if (IS_ERR(ipath)) {
572 ret = PTR_ERR(ipath);
573 ipath = NULL;
574 goto err;
575 }
576 ret = paths_from_inode(inum, ipath);
577
578 if (ret < 0)
579 goto err;
580
581
582
583
584
585 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
586 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
587 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
588 "length %llu, links %u (path: %s)\n", swarn->errstr,
589 swarn->logical, rcu_str_deref(swarn->dev->name),
590 (unsigned long long)swarn->sector, root, inum, offset,
591 min(isize - offset, (u64)PAGE_SIZE), nlink,
592 (char *)(unsigned long)ipath->fspath->val[i]);
593
594 free_ipath(ipath);
595 return 0;
596
597err:
598 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
599 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
600 "resolving failed with ret=%d\n", swarn->errstr,
601 swarn->logical, rcu_str_deref(swarn->dev->name),
602 (unsigned long long)swarn->sector, root, inum, offset, ret);
603
604 free_ipath(ipath);
605 return 0;
606}
607
608static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
609{
610 struct btrfs_device *dev;
611 struct btrfs_fs_info *fs_info;
612 struct btrfs_path *path;
613 struct btrfs_key found_key;
614 struct extent_buffer *eb;
615 struct btrfs_extent_item *ei;
616 struct scrub_warning swarn;
617 unsigned long ptr = 0;
618 u64 extent_item_pos;
619 u64 flags = 0;
620 u64 ref_root;
621 u32 item_size;
622 u8 ref_level;
623 int ret;
624
625 WARN_ON(sblock->page_count < 1);
626 dev = sblock->pagev[0]->dev;
627 fs_info = sblock->sctx->dev_root->fs_info;
628
629 path = btrfs_alloc_path();
630 if (!path)
631 return;
632
633 swarn.sector = (sblock->pagev[0]->physical) >> 9;
634 swarn.logical = sblock->pagev[0]->logical;
635 swarn.errstr = errstr;
636 swarn.dev = NULL;
637
638 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
639 &flags);
640 if (ret < 0)
641 goto out;
642
643 extent_item_pos = swarn.logical - found_key.objectid;
644 swarn.extent_item_size = found_key.offset;
645
646 eb = path->nodes[0];
647 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
648 item_size = btrfs_item_size_nr(eb, path->slots[0]);
649
650 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
651 do {
652 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
653 item_size, &ref_root,
654 &ref_level);
655 printk_in_rcu(KERN_WARNING
656 "BTRFS: %s at logical %llu on dev %s, "
657 "sector %llu: metadata %s (level %d) in tree "
658 "%llu\n", errstr, swarn.logical,
659 rcu_str_deref(dev->name),
660 (unsigned long long)swarn.sector,
661 ref_level ? "node" : "leaf",
662 ret < 0 ? -1 : ref_level,
663 ret < 0 ? -1 : ref_root);
664 } while (ret != 1);
665 btrfs_release_path(path);
666 } else {
667 btrfs_release_path(path);
668 swarn.path = path;
669 swarn.dev = dev;
670 iterate_extent_inodes(fs_info, found_key.objectid,
671 extent_item_pos, 1,
672 scrub_print_warning_inode, &swarn);
673 }
674
675out:
676 btrfs_free_path(path);
677}
678
679static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
680{
681 struct page *page = NULL;
682 unsigned long index;
683 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
684 int ret;
685 int corrected = 0;
686 struct btrfs_key key;
687 struct inode *inode = NULL;
688 struct btrfs_fs_info *fs_info;
689 u64 end = offset + PAGE_SIZE - 1;
690 struct btrfs_root *local_root;
691 int srcu_index;
692
693 key.objectid = root;
694 key.type = BTRFS_ROOT_ITEM_KEY;
695 key.offset = (u64)-1;
696
697 fs_info = fixup->root->fs_info;
698 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
699
700 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
701 if (IS_ERR(local_root)) {
702 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
703 return PTR_ERR(local_root);
704 }
705
706 key.type = BTRFS_INODE_ITEM_KEY;
707 key.objectid = inum;
708 key.offset = 0;
709 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
710 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
711 if (IS_ERR(inode))
712 return PTR_ERR(inode);
713
714 index = offset >> PAGE_CACHE_SHIFT;
715
716 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
717 if (!page) {
718 ret = -ENOMEM;
719 goto out;
720 }
721
722 if (PageUptodate(page)) {
723 if (PageDirty(page)) {
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740 ret = -EIO;
741 goto out;
742 }
743 ret = repair_io_failure(inode, offset, PAGE_SIZE,
744 fixup->logical, page,
745 offset - page_offset(page),
746 fixup->mirror_num);
747 unlock_page(page);
748 corrected = !ret;
749 } else {
750
751
752
753
754
755 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
756 EXTENT_DAMAGED, GFP_NOFS);
757 if (ret) {
758
759 WARN_ON(ret > 0);
760 if (ret > 0)
761 ret = -EFAULT;
762 goto out;
763 }
764
765 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
766 btrfs_get_extent,
767 fixup->mirror_num);
768 wait_on_page_locked(page);
769
770 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
771 end, EXTENT_DAMAGED, 0, NULL);
772 if (!corrected)
773 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
774 EXTENT_DAMAGED, GFP_NOFS);
775 }
776
777out:
778 if (page)
779 put_page(page);
780
781 iput(inode);
782
783 if (ret < 0)
784 return ret;
785
786 if (ret == 0 && corrected) {
787
788
789
790
791 return 1;
792 }
793
794 return -EIO;
795}
796
797static void scrub_fixup_nodatasum(struct btrfs_work *work)
798{
799 int ret;
800 struct scrub_fixup_nodatasum *fixup;
801 struct scrub_ctx *sctx;
802 struct btrfs_trans_handle *trans = NULL;
803 struct btrfs_path *path;
804 int uncorrectable = 0;
805
806 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
807 sctx = fixup->sctx;
808
809 path = btrfs_alloc_path();
810 if (!path) {
811 spin_lock(&sctx->stat_lock);
812 ++sctx->stat.malloc_errors;
813 spin_unlock(&sctx->stat_lock);
814 uncorrectable = 1;
815 goto out;
816 }
817
818 trans = btrfs_join_transaction(fixup->root);
819 if (IS_ERR(trans)) {
820 uncorrectable = 1;
821 goto out;
822 }
823
824
825
826
827
828
829
830
831
832
833 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
834 path, scrub_fixup_readpage,
835 fixup);
836 if (ret < 0) {
837 uncorrectable = 1;
838 goto out;
839 }
840 WARN_ON(ret != 1);
841
842 spin_lock(&sctx->stat_lock);
843 ++sctx->stat.corrected_errors;
844 spin_unlock(&sctx->stat_lock);
845
846out:
847 if (trans && !IS_ERR(trans))
848 btrfs_end_transaction(trans, fixup->root);
849 if (uncorrectable) {
850 spin_lock(&sctx->stat_lock);
851 ++sctx->stat.uncorrectable_errors;
852 spin_unlock(&sctx->stat_lock);
853 btrfs_dev_replace_stats_inc(
854 &sctx->dev_root->fs_info->dev_replace.
855 num_uncorrectable_read_errors);
856 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
857 "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
858 fixup->logical, rcu_str_deref(fixup->dev->name));
859 }
860
861 btrfs_free_path(path);
862 kfree(fixup);
863
864 scrub_pending_trans_workers_dec(sctx);
865}
866
867static inline void scrub_get_recover(struct scrub_recover *recover)
868{
869 atomic_inc(&recover->refs);
870}
871
872static inline void scrub_put_recover(struct scrub_recover *recover)
873{
874 if (atomic_dec_and_test(&recover->refs)) {
875 btrfs_put_bbio(recover->bbio);
876 kfree(recover);
877 }
878}
879
880
881
882
883
884
885
886
887
888static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
889{
890 struct scrub_ctx *sctx = sblock_to_check->sctx;
891 struct btrfs_device *dev;
892 struct btrfs_fs_info *fs_info;
893 u64 length;
894 u64 logical;
895 u64 generation;
896 unsigned int failed_mirror_index;
897 unsigned int is_metadata;
898 unsigned int have_csum;
899 u8 *csum;
900 struct scrub_block *sblocks_for_recheck;
901 struct scrub_block *sblock_bad;
902 int ret;
903 int mirror_index;
904 int page_num;
905 int success;
906 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
907 DEFAULT_RATELIMIT_BURST);
908
909 BUG_ON(sblock_to_check->page_count < 1);
910 fs_info = sctx->dev_root->fs_info;
911 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
912
913
914
915
916
917 spin_lock(&sctx->stat_lock);
918 ++sctx->stat.super_errors;
919 spin_unlock(&sctx->stat_lock);
920 return 0;
921 }
922 length = sblock_to_check->page_count * PAGE_SIZE;
923 logical = sblock_to_check->pagev[0]->logical;
924 generation = sblock_to_check->pagev[0]->generation;
925 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
926 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
927 is_metadata = !(sblock_to_check->pagev[0]->flags &
928 BTRFS_EXTENT_FLAG_DATA);
929 have_csum = sblock_to_check->pagev[0]->have_csum;
930 csum = sblock_to_check->pagev[0]->csum;
931 dev = sblock_to_check->pagev[0]->dev;
932
933 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
934 sblocks_for_recheck = NULL;
935 goto nodatasum_case;
936 }
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
968 sizeof(*sblocks_for_recheck),
969 GFP_NOFS);
970 if (!sblocks_for_recheck) {
971 spin_lock(&sctx->stat_lock);
972 sctx->stat.malloc_errors++;
973 sctx->stat.read_errors++;
974 sctx->stat.uncorrectable_errors++;
975 spin_unlock(&sctx->stat_lock);
976 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
977 goto out;
978 }
979
980
981 ret = scrub_setup_recheck_block(sblock_to_check, sblocks_for_recheck);
982 if (ret) {
983 spin_lock(&sctx->stat_lock);
984 sctx->stat.read_errors++;
985 sctx->stat.uncorrectable_errors++;
986 spin_unlock(&sctx->stat_lock);
987 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
988 goto out;
989 }
990 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
991 sblock_bad = sblocks_for_recheck + failed_mirror_index;
992
993
994 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
995 csum, generation, sctx->csum_size, 1);
996
997 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
998 sblock_bad->no_io_error_seen) {
999
1000
1001
1002
1003
1004
1005
1006
1007 spin_lock(&sctx->stat_lock);
1008 sctx->stat.unverified_errors++;
1009 sblock_to_check->data_corrected = 1;
1010 spin_unlock(&sctx->stat_lock);
1011
1012 if (sctx->is_dev_replace)
1013 scrub_write_block_to_dev_replace(sblock_bad);
1014 goto out;
1015 }
1016
1017 if (!sblock_bad->no_io_error_seen) {
1018 spin_lock(&sctx->stat_lock);
1019 sctx->stat.read_errors++;
1020 spin_unlock(&sctx->stat_lock);
1021 if (__ratelimit(&_rs))
1022 scrub_print_warning("i/o error", sblock_to_check);
1023 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
1024 } else if (sblock_bad->checksum_error) {
1025 spin_lock(&sctx->stat_lock);
1026 sctx->stat.csum_errors++;
1027 spin_unlock(&sctx->stat_lock);
1028 if (__ratelimit(&_rs))
1029 scrub_print_warning("checksum error", sblock_to_check);
1030 btrfs_dev_stat_inc_and_print(dev,
1031 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1032 } else if (sblock_bad->header_error) {
1033 spin_lock(&sctx->stat_lock);
1034 sctx->stat.verify_errors++;
1035 spin_unlock(&sctx->stat_lock);
1036 if (__ratelimit(&_rs))
1037 scrub_print_warning("checksum/header error",
1038 sblock_to_check);
1039 if (sblock_bad->generation_error)
1040 btrfs_dev_stat_inc_and_print(dev,
1041 BTRFS_DEV_STAT_GENERATION_ERRS);
1042 else
1043 btrfs_dev_stat_inc_and_print(dev,
1044 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1045 }
1046
1047 if (sctx->readonly) {
1048 ASSERT(!sctx->is_dev_replace);
1049 goto out;
1050 }
1051
1052 if (!is_metadata && !have_csum) {
1053 struct scrub_fixup_nodatasum *fixup_nodatasum;
1054
1055 WARN_ON(sctx->is_dev_replace);
1056
1057nodatasum_case:
1058
1059
1060
1061
1062
1063
1064
1065
1066 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
1067 if (!fixup_nodatasum)
1068 goto did_not_correct_error;
1069 fixup_nodatasum->sctx = sctx;
1070 fixup_nodatasum->dev = dev;
1071 fixup_nodatasum->logical = logical;
1072 fixup_nodatasum->root = fs_info->extent_root;
1073 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1074 scrub_pending_trans_workers_inc(sctx);
1075 btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
1076 scrub_fixup_nodatasum, NULL, NULL);
1077 btrfs_queue_work(fs_info->scrub_workers,
1078 &fixup_nodatasum->work);
1079 goto out;
1080 }
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097 for (mirror_index = 0;
1098 mirror_index < BTRFS_MAX_MIRRORS &&
1099 sblocks_for_recheck[mirror_index].page_count > 0;
1100 mirror_index++) {
1101 struct scrub_block *sblock_other;
1102
1103 if (mirror_index == failed_mirror_index)
1104 continue;
1105 sblock_other = sblocks_for_recheck + mirror_index;
1106
1107
1108 scrub_recheck_block(fs_info, sblock_other, is_metadata,
1109 have_csum, csum, generation,
1110 sctx->csum_size, 0);
1111
1112 if (!sblock_other->header_error &&
1113 !sblock_other->checksum_error &&
1114 sblock_other->no_io_error_seen) {
1115 if (sctx->is_dev_replace) {
1116 scrub_write_block_to_dev_replace(sblock_other);
1117 goto corrected_error;
1118 } else {
1119 ret = scrub_repair_block_from_good_copy(
1120 sblock_bad, sblock_other);
1121 if (!ret)
1122 goto corrected_error;
1123 }
1124 }
1125 }
1126
1127 if (sblock_bad->no_io_error_seen && !sctx->is_dev_replace)
1128 goto did_not_correct_error;
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 success = 1;
1155 for (page_num = 0; page_num < sblock_bad->page_count;
1156 page_num++) {
1157 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1158 struct scrub_block *sblock_other = NULL;
1159
1160
1161 if (!page_bad->io_error && !sctx->is_dev_replace)
1162 continue;
1163
1164
1165 if (page_bad->io_error) {
1166 for (mirror_index = 0;
1167 mirror_index < BTRFS_MAX_MIRRORS &&
1168 sblocks_for_recheck[mirror_index].page_count > 0;
1169 mirror_index++) {
1170 if (!sblocks_for_recheck[mirror_index].
1171 pagev[page_num]->io_error) {
1172 sblock_other = sblocks_for_recheck +
1173 mirror_index;
1174 break;
1175 }
1176 }
1177 if (!sblock_other)
1178 success = 0;
1179 }
1180
1181 if (sctx->is_dev_replace) {
1182
1183
1184
1185
1186
1187
1188
1189 if (!sblock_other)
1190 sblock_other = sblock_bad;
1191
1192 if (scrub_write_page_to_dev_replace(sblock_other,
1193 page_num) != 0) {
1194 btrfs_dev_replace_stats_inc(
1195 &sctx->dev_root->
1196 fs_info->dev_replace.
1197 num_write_errors);
1198 success = 0;
1199 }
1200 } else if (sblock_other) {
1201 ret = scrub_repair_page_from_good_copy(sblock_bad,
1202 sblock_other,
1203 page_num, 0);
1204 if (0 == ret)
1205 page_bad->io_error = 0;
1206 else
1207 success = 0;
1208 }
1209 }
1210
1211 if (success && !sctx->is_dev_replace) {
1212 if (is_metadata || have_csum) {
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222 scrub_recheck_block(fs_info, sblock_bad,
1223 is_metadata, have_csum, csum,
1224 generation, sctx->csum_size, 1);
1225 if (!sblock_bad->header_error &&
1226 !sblock_bad->checksum_error &&
1227 sblock_bad->no_io_error_seen)
1228 goto corrected_error;
1229 else
1230 goto did_not_correct_error;
1231 } else {
1232corrected_error:
1233 spin_lock(&sctx->stat_lock);
1234 sctx->stat.corrected_errors++;
1235 sblock_to_check->data_corrected = 1;
1236 spin_unlock(&sctx->stat_lock);
1237 printk_ratelimited_in_rcu(KERN_ERR
1238 "BTRFS: fixed up error at logical %llu on dev %s\n",
1239 logical, rcu_str_deref(dev->name));
1240 }
1241 } else {
1242did_not_correct_error:
1243 spin_lock(&sctx->stat_lock);
1244 sctx->stat.uncorrectable_errors++;
1245 spin_unlock(&sctx->stat_lock);
1246 printk_ratelimited_in_rcu(KERN_ERR
1247 "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
1248 logical, rcu_str_deref(dev->name));
1249 }
1250
1251out:
1252 if (sblocks_for_recheck) {
1253 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1254 mirror_index++) {
1255 struct scrub_block *sblock = sblocks_for_recheck +
1256 mirror_index;
1257 struct scrub_recover *recover;
1258 int page_index;
1259
1260 for (page_index = 0; page_index < sblock->page_count;
1261 page_index++) {
1262 sblock->pagev[page_index]->sblock = NULL;
1263 recover = sblock->pagev[page_index]->recover;
1264 if (recover) {
1265 scrub_put_recover(recover);
1266 sblock->pagev[page_index]->recover =
1267 NULL;
1268 }
1269 scrub_page_put(sblock->pagev[page_index]);
1270 }
1271 }
1272 kfree(sblocks_for_recheck);
1273 }
1274
1275 return 0;
1276}
1277
1278static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
1279{
1280 if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
1281 return 2;
1282 else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
1283 return 3;
1284 else
1285 return (int)bbio->num_stripes;
1286}
1287
1288static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
1289 u64 *raid_map,
1290 u64 mapped_length,
1291 int nstripes, int mirror,
1292 int *stripe_index,
1293 u64 *stripe_offset)
1294{
1295 int i;
1296
1297 if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
1298
1299 for (i = 0; i < nstripes; i++) {
1300 if (raid_map[i] == RAID6_Q_STRIPE ||
1301 raid_map[i] == RAID5_P_STRIPE)
1302 continue;
1303
1304 if (logical >= raid_map[i] &&
1305 logical < raid_map[i] + mapped_length)
1306 break;
1307 }
1308
1309 *stripe_index = i;
1310 *stripe_offset = logical - raid_map[i];
1311 } else {
1312
1313 *stripe_index = mirror;
1314 *stripe_offset = 0;
1315 }
1316}
1317
1318static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
1319 struct scrub_block *sblocks_for_recheck)
1320{
1321 struct scrub_ctx *sctx = original_sblock->sctx;
1322 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
1323 u64 length = original_sblock->page_count * PAGE_SIZE;
1324 u64 logical = original_sblock->pagev[0]->logical;
1325 struct scrub_recover *recover;
1326 struct btrfs_bio *bbio;
1327 u64 sublen;
1328 u64 mapped_length;
1329 u64 stripe_offset;
1330 int stripe_index;
1331 int page_index = 0;
1332 int mirror_index;
1333 int nmirrors;
1334 int ret;
1335
1336
1337
1338
1339
1340
1341
1342 while (length > 0) {
1343 sublen = min_t(u64, length, PAGE_SIZE);
1344 mapped_length = sublen;
1345 bbio = NULL;
1346
1347
1348
1349
1350
1351 ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
1352 &mapped_length, &bbio, 0, 1);
1353 if (ret || !bbio || mapped_length < sublen) {
1354 btrfs_put_bbio(bbio);
1355 return -EIO;
1356 }
1357
1358 recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
1359 if (!recover) {
1360 btrfs_put_bbio(bbio);
1361 return -ENOMEM;
1362 }
1363
1364 atomic_set(&recover->refs, 1);
1365 recover->bbio = bbio;
1366 recover->map_length = mapped_length;
1367
1368 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1369
1370 nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
1371
1372 for (mirror_index = 0; mirror_index < nmirrors;
1373 mirror_index++) {
1374 struct scrub_block *sblock;
1375 struct scrub_page *page;
1376
1377 sblock = sblocks_for_recheck + mirror_index;
1378 sblock->sctx = sctx;
1379 page = kzalloc(sizeof(*page), GFP_NOFS);
1380 if (!page) {
1381leave_nomem:
1382 spin_lock(&sctx->stat_lock);
1383 sctx->stat.malloc_errors++;
1384 spin_unlock(&sctx->stat_lock);
1385 scrub_put_recover(recover);
1386 return -ENOMEM;
1387 }
1388 scrub_page_get(page);
1389 sblock->pagev[page_index] = page;
1390 page->logical = logical;
1391
1392 scrub_stripe_index_and_offset(logical,
1393 bbio->map_type,
1394 bbio->raid_map,
1395 mapped_length,
1396 bbio->num_stripes -
1397 bbio->num_tgtdevs,
1398 mirror_index,
1399 &stripe_index,
1400 &stripe_offset);
1401 page->physical = bbio->stripes[stripe_index].physical +
1402 stripe_offset;
1403 page->dev = bbio->stripes[stripe_index].dev;
1404
1405 BUG_ON(page_index >= original_sblock->page_count);
1406 page->physical_for_dev_replace =
1407 original_sblock->pagev[page_index]->
1408 physical_for_dev_replace;
1409
1410 page->mirror_num = mirror_index + 1;
1411 sblock->page_count++;
1412 page->page = alloc_page(GFP_NOFS);
1413 if (!page->page)
1414 goto leave_nomem;
1415
1416 scrub_get_recover(recover);
1417 page->recover = recover;
1418 }
1419 scrub_put_recover(recover);
1420 length -= sublen;
1421 logical += sublen;
1422 page_index++;
1423 }
1424
1425 return 0;
1426}
1427
1428struct scrub_bio_ret {
1429 struct completion event;
1430 int error;
1431};
1432
1433static void scrub_bio_wait_endio(struct bio *bio, int error)
1434{
1435 struct scrub_bio_ret *ret = bio->bi_private;
1436
1437 ret->error = error;
1438 complete(&ret->event);
1439}
1440
1441static inline int scrub_is_page_on_raid56(struct scrub_page *page)
1442{
1443 return page->recover &&
1444 (page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
1445}
1446
1447static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
1448 struct bio *bio,
1449 struct scrub_page *page)
1450{
1451 struct scrub_bio_ret done;
1452 int ret;
1453
1454 init_completion(&done.event);
1455 done.error = 0;
1456 bio->bi_iter.bi_sector = page->logical >> 9;
1457 bio->bi_private = &done;
1458 bio->bi_end_io = scrub_bio_wait_endio;
1459
1460 ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
1461 page->recover->map_length,
1462 page->mirror_num, 0);
1463 if (ret)
1464 return ret;
1465
1466 wait_for_completion(&done.event);
1467 if (done.error)
1468 return -EIO;
1469
1470 return 0;
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1481 struct scrub_block *sblock, int is_metadata,
1482 int have_csum, u8 *csum, u64 generation,
1483 u16 csum_size, int retry_failed_mirror)
1484{
1485 int page_num;
1486
1487 sblock->no_io_error_seen = 1;
1488 sblock->header_error = 0;
1489 sblock->checksum_error = 0;
1490
1491 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1492 struct bio *bio;
1493 struct scrub_page *page = sblock->pagev[page_num];
1494
1495 if (page->dev->bdev == NULL) {
1496 page->io_error = 1;
1497 sblock->no_io_error_seen = 0;
1498 continue;
1499 }
1500
1501 WARN_ON(!page->page);
1502 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1503 if (!bio) {
1504 page->io_error = 1;
1505 sblock->no_io_error_seen = 0;
1506 continue;
1507 }
1508 bio->bi_bdev = page->dev->bdev;
1509
1510 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1511 if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
1512 if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
1513 sblock->no_io_error_seen = 0;
1514 } else {
1515 bio->bi_iter.bi_sector = page->physical >> 9;
1516
1517 if (btrfsic_submit_bio_wait(READ, bio))
1518 sblock->no_io_error_seen = 0;
1519 }
1520
1521 bio_put(bio);
1522 }
1523
1524 if (sblock->no_io_error_seen)
1525 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1526 have_csum, csum, generation,
1527 csum_size);
1528
1529 return;
1530}
1531
1532static inline int scrub_check_fsid(u8 fsid[],
1533 struct scrub_page *spage)
1534{
1535 struct btrfs_fs_devices *fs_devices = spage->dev->fs_devices;
1536 int ret;
1537
1538 ret = memcmp(fsid, fs_devices->fsid, BTRFS_UUID_SIZE);
1539 return !ret;
1540}
1541
1542static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1543 struct scrub_block *sblock,
1544 int is_metadata, int have_csum,
1545 const u8 *csum, u64 generation,
1546 u16 csum_size)
1547{
1548 int page_num;
1549 u8 calculated_csum[BTRFS_CSUM_SIZE];
1550 u32 crc = ~(u32)0;
1551 void *mapped_buffer;
1552
1553 WARN_ON(!sblock->pagev[0]->page);
1554 if (is_metadata) {
1555 struct btrfs_header *h;
1556
1557 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1558 h = (struct btrfs_header *)mapped_buffer;
1559
1560 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
1561 !scrub_check_fsid(h->fsid, sblock->pagev[0]) ||
1562 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1563 BTRFS_UUID_SIZE)) {
1564 sblock->header_error = 1;
1565 } else if (generation != btrfs_stack_header_generation(h)) {
1566 sblock->header_error = 1;
1567 sblock->generation_error = 1;
1568 }
1569 csum = h->csum;
1570 } else {
1571 if (!have_csum)
1572 return;
1573
1574 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1575 }
1576
1577 for (page_num = 0;;) {
1578 if (page_num == 0 && is_metadata)
1579 crc = btrfs_csum_data(
1580 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1581 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1582 else
1583 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1584
1585 kunmap_atomic(mapped_buffer);
1586 page_num++;
1587 if (page_num >= sblock->page_count)
1588 break;
1589 WARN_ON(!sblock->pagev[page_num]->page);
1590
1591 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1592 }
1593
1594 btrfs_csum_final(crc, calculated_csum);
1595 if (memcmp(calculated_csum, csum, csum_size))
1596 sblock->checksum_error = 1;
1597}
1598
1599static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1600 struct scrub_block *sblock_good)
1601{
1602 int page_num;
1603 int ret = 0;
1604
1605 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1606 int ret_sub;
1607
1608 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1609 sblock_good,
1610 page_num, 1);
1611 if (ret_sub)
1612 ret = ret_sub;
1613 }
1614
1615 return ret;
1616}
1617
1618static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1619 struct scrub_block *sblock_good,
1620 int page_num, int force_write)
1621{
1622 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1623 struct scrub_page *page_good = sblock_good->pagev[page_num];
1624
1625 BUG_ON(page_bad->page == NULL);
1626 BUG_ON(page_good->page == NULL);
1627 if (force_write || sblock_bad->header_error ||
1628 sblock_bad->checksum_error || page_bad->io_error) {
1629 struct bio *bio;
1630 int ret;
1631
1632 if (!page_bad->dev->bdev) {
1633 printk_ratelimited(KERN_WARNING "BTRFS: "
1634 "scrub_repair_page_from_good_copy(bdev == NULL) "
1635 "is unexpected!\n");
1636 return -EIO;
1637 }
1638
1639 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1640 if (!bio)
1641 return -EIO;
1642 bio->bi_bdev = page_bad->dev->bdev;
1643 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1644
1645 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1646 if (PAGE_SIZE != ret) {
1647 bio_put(bio);
1648 return -EIO;
1649 }
1650
1651 if (btrfsic_submit_bio_wait(WRITE, bio)) {
1652 btrfs_dev_stat_inc_and_print(page_bad->dev,
1653 BTRFS_DEV_STAT_WRITE_ERRS);
1654 btrfs_dev_replace_stats_inc(
1655 &sblock_bad->sctx->dev_root->fs_info->
1656 dev_replace.num_write_errors);
1657 bio_put(bio);
1658 return -EIO;
1659 }
1660 bio_put(bio);
1661 }
1662
1663 return 0;
1664}
1665
1666static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1667{
1668 int page_num;
1669
1670
1671
1672
1673
1674 if (sblock->sparity)
1675 return;
1676
1677 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1678 int ret;
1679
1680 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1681 if (ret)
1682 btrfs_dev_replace_stats_inc(
1683 &sblock->sctx->dev_root->fs_info->dev_replace.
1684 num_write_errors);
1685 }
1686}
1687
1688static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1689 int page_num)
1690{
1691 struct scrub_page *spage = sblock->pagev[page_num];
1692
1693 BUG_ON(spage->page == NULL);
1694 if (spage->io_error) {
1695 void *mapped_buffer = kmap_atomic(spage->page);
1696
1697 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
1698 flush_dcache_page(spage->page);
1699 kunmap_atomic(mapped_buffer);
1700 }
1701 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1702}
1703
1704static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1705 struct scrub_page *spage)
1706{
1707 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1708 struct scrub_bio *sbio;
1709 int ret;
1710
1711 mutex_lock(&wr_ctx->wr_lock);
1712again:
1713 if (!wr_ctx->wr_curr_bio) {
1714 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1715 GFP_NOFS);
1716 if (!wr_ctx->wr_curr_bio) {
1717 mutex_unlock(&wr_ctx->wr_lock);
1718 return -ENOMEM;
1719 }
1720 wr_ctx->wr_curr_bio->sctx = sctx;
1721 wr_ctx->wr_curr_bio->page_count = 0;
1722 }
1723 sbio = wr_ctx->wr_curr_bio;
1724 if (sbio->page_count == 0) {
1725 struct bio *bio;
1726
1727 sbio->physical = spage->physical_for_dev_replace;
1728 sbio->logical = spage->logical;
1729 sbio->dev = wr_ctx->tgtdev;
1730 bio = sbio->bio;
1731 if (!bio) {
1732 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1733 if (!bio) {
1734 mutex_unlock(&wr_ctx->wr_lock);
1735 return -ENOMEM;
1736 }
1737 sbio->bio = bio;
1738 }
1739
1740 bio->bi_private = sbio;
1741 bio->bi_end_io = scrub_wr_bio_end_io;
1742 bio->bi_bdev = sbio->dev->bdev;
1743 bio->bi_iter.bi_sector = sbio->physical >> 9;
1744 sbio->err = 0;
1745 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1746 spage->physical_for_dev_replace ||
1747 sbio->logical + sbio->page_count * PAGE_SIZE !=
1748 spage->logical) {
1749 scrub_wr_submit(sctx);
1750 goto again;
1751 }
1752
1753 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1754 if (ret != PAGE_SIZE) {
1755 if (sbio->page_count < 1) {
1756 bio_put(sbio->bio);
1757 sbio->bio = NULL;
1758 mutex_unlock(&wr_ctx->wr_lock);
1759 return -EIO;
1760 }
1761 scrub_wr_submit(sctx);
1762 goto again;
1763 }
1764
1765 sbio->pagev[sbio->page_count] = spage;
1766 scrub_page_get(spage);
1767 sbio->page_count++;
1768 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1769 scrub_wr_submit(sctx);
1770 mutex_unlock(&wr_ctx->wr_lock);
1771
1772 return 0;
1773}
1774
1775static void scrub_wr_submit(struct scrub_ctx *sctx)
1776{
1777 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1778 struct scrub_bio *sbio;
1779
1780 if (!wr_ctx->wr_curr_bio)
1781 return;
1782
1783 sbio = wr_ctx->wr_curr_bio;
1784 wr_ctx->wr_curr_bio = NULL;
1785 WARN_ON(!sbio->bio->bi_bdev);
1786 scrub_pending_bio_inc(sctx);
1787
1788
1789
1790
1791 btrfsic_submit_bio(WRITE, sbio->bio);
1792}
1793
1794static void scrub_wr_bio_end_io(struct bio *bio, int err)
1795{
1796 struct scrub_bio *sbio = bio->bi_private;
1797 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1798
1799 sbio->err = err;
1800 sbio->bio = bio;
1801
1802 btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
1803 scrub_wr_bio_end_io_worker, NULL, NULL);
1804 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1805}
1806
1807static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1808{
1809 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1810 struct scrub_ctx *sctx = sbio->sctx;
1811 int i;
1812
1813 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1814 if (sbio->err) {
1815 struct btrfs_dev_replace *dev_replace =
1816 &sbio->sctx->dev_root->fs_info->dev_replace;
1817
1818 for (i = 0; i < sbio->page_count; i++) {
1819 struct scrub_page *spage = sbio->pagev[i];
1820
1821 spage->io_error = 1;
1822 btrfs_dev_replace_stats_inc(&dev_replace->
1823 num_write_errors);
1824 }
1825 }
1826
1827 for (i = 0; i < sbio->page_count; i++)
1828 scrub_page_put(sbio->pagev[i]);
1829
1830 bio_put(sbio->bio);
1831 kfree(sbio);
1832 scrub_pending_bio_dec(sctx);
1833}
1834
1835static int scrub_checksum(struct scrub_block *sblock)
1836{
1837 u64 flags;
1838 int ret;
1839
1840 WARN_ON(sblock->page_count < 1);
1841 flags = sblock->pagev[0]->flags;
1842 ret = 0;
1843 if (flags & BTRFS_EXTENT_FLAG_DATA)
1844 ret = scrub_checksum_data(sblock);
1845 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1846 ret = scrub_checksum_tree_block(sblock);
1847 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1848 (void)scrub_checksum_super(sblock);
1849 else
1850 WARN_ON(1);
1851 if (ret)
1852 scrub_handle_errored_block(sblock);
1853
1854 return ret;
1855}
1856
1857static int scrub_checksum_data(struct scrub_block *sblock)
1858{
1859 struct scrub_ctx *sctx = sblock->sctx;
1860 u8 csum[BTRFS_CSUM_SIZE];
1861 u8 *on_disk_csum;
1862 struct page *page;
1863 void *buffer;
1864 u32 crc = ~(u32)0;
1865 int fail = 0;
1866 u64 len;
1867 int index;
1868
1869 BUG_ON(sblock->page_count < 1);
1870 if (!sblock->pagev[0]->have_csum)
1871 return 0;
1872
1873 on_disk_csum = sblock->pagev[0]->csum;
1874 page = sblock->pagev[0]->page;
1875 buffer = kmap_atomic(page);
1876
1877 len = sctx->sectorsize;
1878 index = 0;
1879 for (;;) {
1880 u64 l = min_t(u64, len, PAGE_SIZE);
1881
1882 crc = btrfs_csum_data(buffer, crc, l);
1883 kunmap_atomic(buffer);
1884 len -= l;
1885 if (len == 0)
1886 break;
1887 index++;
1888 BUG_ON(index >= sblock->page_count);
1889 BUG_ON(!sblock->pagev[index]->page);
1890 page = sblock->pagev[index]->page;
1891 buffer = kmap_atomic(page);
1892 }
1893
1894 btrfs_csum_final(crc, csum);
1895 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1896 fail = 1;
1897
1898 return fail;
1899}
1900
1901static int scrub_checksum_tree_block(struct scrub_block *sblock)
1902{
1903 struct scrub_ctx *sctx = sblock->sctx;
1904 struct btrfs_header *h;
1905 struct btrfs_root *root = sctx->dev_root;
1906 struct btrfs_fs_info *fs_info = root->fs_info;
1907 u8 calculated_csum[BTRFS_CSUM_SIZE];
1908 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1909 struct page *page;
1910 void *mapped_buffer;
1911 u64 mapped_size;
1912 void *p;
1913 u32 crc = ~(u32)0;
1914 int fail = 0;
1915 int crc_fail = 0;
1916 u64 len;
1917 int index;
1918
1919 BUG_ON(sblock->page_count < 1);
1920 page = sblock->pagev[0]->page;
1921 mapped_buffer = kmap_atomic(page);
1922 h = (struct btrfs_header *)mapped_buffer;
1923 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1924
1925
1926
1927
1928
1929
1930
1931 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1932 ++fail;
1933
1934 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
1935 ++fail;
1936
1937 if (!scrub_check_fsid(h->fsid, sblock->pagev[0]))
1938 ++fail;
1939
1940 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1941 BTRFS_UUID_SIZE))
1942 ++fail;
1943
1944 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1945 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1946 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1947 index = 0;
1948 for (;;) {
1949 u64 l = min_t(u64, len, mapped_size);
1950
1951 crc = btrfs_csum_data(p, crc, l);
1952 kunmap_atomic(mapped_buffer);
1953 len -= l;
1954 if (len == 0)
1955 break;
1956 index++;
1957 BUG_ON(index >= sblock->page_count);
1958 BUG_ON(!sblock->pagev[index]->page);
1959 page = sblock->pagev[index]->page;
1960 mapped_buffer = kmap_atomic(page);
1961 mapped_size = PAGE_SIZE;
1962 p = mapped_buffer;
1963 }
1964
1965 btrfs_csum_final(crc, calculated_csum);
1966 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1967 ++crc_fail;
1968
1969 return fail || crc_fail;
1970}
1971
1972static int scrub_checksum_super(struct scrub_block *sblock)
1973{
1974 struct btrfs_super_block *s;
1975 struct scrub_ctx *sctx = sblock->sctx;
1976 u8 calculated_csum[BTRFS_CSUM_SIZE];
1977 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1978 struct page *page;
1979 void *mapped_buffer;
1980 u64 mapped_size;
1981 void *p;
1982 u32 crc = ~(u32)0;
1983 int fail_gen = 0;
1984 int fail_cor = 0;
1985 u64 len;
1986 int index;
1987
1988 BUG_ON(sblock->page_count < 1);
1989 page = sblock->pagev[0]->page;
1990 mapped_buffer = kmap_atomic(page);
1991 s = (struct btrfs_super_block *)mapped_buffer;
1992 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1993
1994 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1995 ++fail_cor;
1996
1997 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1998 ++fail_gen;
1999
2000 if (!scrub_check_fsid(s->fsid, sblock->pagev[0]))
2001 ++fail_cor;
2002
2003 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
2004 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
2005 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
2006 index = 0;
2007 for (;;) {
2008 u64 l = min_t(u64, len, mapped_size);
2009
2010 crc = btrfs_csum_data(p, crc, l);
2011 kunmap_atomic(mapped_buffer);
2012 len -= l;
2013 if (len == 0)
2014 break;
2015 index++;
2016 BUG_ON(index >= sblock->page_count);
2017 BUG_ON(!sblock->pagev[index]->page);
2018 page = sblock->pagev[index]->page;
2019 mapped_buffer = kmap_atomic(page);
2020 mapped_size = PAGE_SIZE;
2021 p = mapped_buffer;
2022 }
2023
2024 btrfs_csum_final(crc, calculated_csum);
2025 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
2026 ++fail_cor;
2027
2028 if (fail_cor + fail_gen) {
2029
2030
2031
2032
2033
2034 spin_lock(&sctx->stat_lock);
2035 ++sctx->stat.super_errors;
2036 spin_unlock(&sctx->stat_lock);
2037 if (fail_cor)
2038 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
2039 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2040 else
2041 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
2042 BTRFS_DEV_STAT_GENERATION_ERRS);
2043 }
2044
2045 return fail_cor + fail_gen;
2046}
2047
2048static void scrub_block_get(struct scrub_block *sblock)
2049{
2050 atomic_inc(&sblock->refs);
2051}
2052
2053static void scrub_block_put(struct scrub_block *sblock)
2054{
2055 if (atomic_dec_and_test(&sblock->refs)) {
2056 int i;
2057
2058 if (sblock->sparity)
2059 scrub_parity_put(sblock->sparity);
2060
2061 for (i = 0; i < sblock->page_count; i++)
2062 scrub_page_put(sblock->pagev[i]);
2063 kfree(sblock);
2064 }
2065}
2066
2067static void scrub_page_get(struct scrub_page *spage)
2068{
2069 atomic_inc(&spage->refs);
2070}
2071
2072static void scrub_page_put(struct scrub_page *spage)
2073{
2074 if (atomic_dec_and_test(&spage->refs)) {
2075 if (spage->page)
2076 __free_page(spage->page);
2077 kfree(spage);
2078 }
2079}
2080
2081static void scrub_submit(struct scrub_ctx *sctx)
2082{
2083 struct scrub_bio *sbio;
2084
2085 if (sctx->curr == -1)
2086 return;
2087
2088 sbio = sctx->bios[sctx->curr];
2089 sctx->curr = -1;
2090 scrub_pending_bio_inc(sctx);
2091
2092 if (!sbio->bio->bi_bdev) {
2093
2094
2095
2096
2097
2098
2099
2100 printk_ratelimited(KERN_WARNING
2101 "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
2102 bio_endio(sbio->bio, -EIO);
2103 } else {
2104 btrfsic_submit_bio(READ, sbio->bio);
2105 }
2106}
2107
2108static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
2109 struct scrub_page *spage)
2110{
2111 struct scrub_block *sblock = spage->sblock;
2112 struct scrub_bio *sbio;
2113 int ret;
2114
2115again:
2116
2117
2118
2119 while (sctx->curr == -1) {
2120 spin_lock(&sctx->list_lock);
2121 sctx->curr = sctx->first_free;
2122 if (sctx->curr != -1) {
2123 sctx->first_free = sctx->bios[sctx->curr]->next_free;
2124 sctx->bios[sctx->curr]->next_free = -1;
2125 sctx->bios[sctx->curr]->page_count = 0;
2126 spin_unlock(&sctx->list_lock);
2127 } else {
2128 spin_unlock(&sctx->list_lock);
2129 wait_event(sctx->list_wait, sctx->first_free != -1);
2130 }
2131 }
2132 sbio = sctx->bios[sctx->curr];
2133 if (sbio->page_count == 0) {
2134 struct bio *bio;
2135
2136 sbio->physical = spage->physical;
2137 sbio->logical = spage->logical;
2138 sbio->dev = spage->dev;
2139 bio = sbio->bio;
2140 if (!bio) {
2141 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
2142 if (!bio)
2143 return -ENOMEM;
2144 sbio->bio = bio;
2145 }
2146
2147 bio->bi_private = sbio;
2148 bio->bi_end_io = scrub_bio_end_io;
2149 bio->bi_bdev = sbio->dev->bdev;
2150 bio->bi_iter.bi_sector = sbio->physical >> 9;
2151 sbio->err = 0;
2152 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
2153 spage->physical ||
2154 sbio->logical + sbio->page_count * PAGE_SIZE !=
2155 spage->logical ||
2156 sbio->dev != spage->dev) {
2157 scrub_submit(sctx);
2158 goto again;
2159 }
2160
2161 sbio->pagev[sbio->page_count] = spage;
2162 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
2163 if (ret != PAGE_SIZE) {
2164 if (sbio->page_count < 1) {
2165 bio_put(sbio->bio);
2166 sbio->bio = NULL;
2167 return -EIO;
2168 }
2169 scrub_submit(sctx);
2170 goto again;
2171 }
2172
2173 scrub_block_get(sblock);
2174 atomic_inc(&sblock->outstanding_pages);
2175 sbio->page_count++;
2176 if (sbio->page_count == sctx->pages_per_rd_bio)
2177 scrub_submit(sctx);
2178
2179 return 0;
2180}
2181
2182static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
2183 u64 physical, struct btrfs_device *dev, u64 flags,
2184 u64 gen, int mirror_num, u8 *csum, int force,
2185 u64 physical_for_dev_replace)
2186{
2187 struct scrub_block *sblock;
2188 int index;
2189
2190 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
2191 if (!sblock) {
2192 spin_lock(&sctx->stat_lock);
2193 sctx->stat.malloc_errors++;
2194 spin_unlock(&sctx->stat_lock);
2195 return -ENOMEM;
2196 }
2197
2198
2199
2200 atomic_set(&sblock->refs, 1);
2201 sblock->sctx = sctx;
2202 sblock->no_io_error_seen = 1;
2203
2204 for (index = 0; len > 0; index++) {
2205 struct scrub_page *spage;
2206 u64 l = min_t(u64, len, PAGE_SIZE);
2207
2208 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2209 if (!spage) {
2210leave_nomem:
2211 spin_lock(&sctx->stat_lock);
2212 sctx->stat.malloc_errors++;
2213 spin_unlock(&sctx->stat_lock);
2214 scrub_block_put(sblock);
2215 return -ENOMEM;
2216 }
2217 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2218 scrub_page_get(spage);
2219 sblock->pagev[index] = spage;
2220 spage->sblock = sblock;
2221 spage->dev = dev;
2222 spage->flags = flags;
2223 spage->generation = gen;
2224 spage->logical = logical;
2225 spage->physical = physical;
2226 spage->physical_for_dev_replace = physical_for_dev_replace;
2227 spage->mirror_num = mirror_num;
2228 if (csum) {
2229 spage->have_csum = 1;
2230 memcpy(spage->csum, csum, sctx->csum_size);
2231 } else {
2232 spage->have_csum = 0;
2233 }
2234 sblock->page_count++;
2235 spage->page = alloc_page(GFP_NOFS);
2236 if (!spage->page)
2237 goto leave_nomem;
2238 len -= l;
2239 logical += l;
2240 physical += l;
2241 physical_for_dev_replace += l;
2242 }
2243
2244 WARN_ON(sblock->page_count == 0);
2245 for (index = 0; index < sblock->page_count; index++) {
2246 struct scrub_page *spage = sblock->pagev[index];
2247 int ret;
2248
2249 ret = scrub_add_page_to_rd_bio(sctx, spage);
2250 if (ret) {
2251 scrub_block_put(sblock);
2252 return ret;
2253 }
2254 }
2255
2256 if (force)
2257 scrub_submit(sctx);
2258
2259
2260 scrub_block_put(sblock);
2261 return 0;
2262}
2263
2264static void scrub_bio_end_io(struct bio *bio, int err)
2265{
2266 struct scrub_bio *sbio = bio->bi_private;
2267 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2268
2269 sbio->err = err;
2270 sbio->bio = bio;
2271
2272 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2273}
2274
2275static void scrub_bio_end_io_worker(struct btrfs_work *work)
2276{
2277 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2278 struct scrub_ctx *sctx = sbio->sctx;
2279 int i;
2280
2281 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2282 if (sbio->err) {
2283 for (i = 0; i < sbio->page_count; i++) {
2284 struct scrub_page *spage = sbio->pagev[i];
2285
2286 spage->io_error = 1;
2287 spage->sblock->no_io_error_seen = 0;
2288 }
2289 }
2290
2291
2292 for (i = 0; i < sbio->page_count; i++) {
2293 struct scrub_page *spage = sbio->pagev[i];
2294 struct scrub_block *sblock = spage->sblock;
2295
2296 if (atomic_dec_and_test(&sblock->outstanding_pages))
2297 scrub_block_complete(sblock);
2298 scrub_block_put(sblock);
2299 }
2300
2301 bio_put(sbio->bio);
2302 sbio->bio = NULL;
2303 spin_lock(&sctx->list_lock);
2304 sbio->next_free = sctx->first_free;
2305 sctx->first_free = sbio->index;
2306 spin_unlock(&sctx->list_lock);
2307
2308 if (sctx->is_dev_replace &&
2309 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2310 mutex_lock(&sctx->wr_ctx.wr_lock);
2311 scrub_wr_submit(sctx);
2312 mutex_unlock(&sctx->wr_ctx.wr_lock);
2313 }
2314
2315 scrub_pending_bio_dec(sctx);
2316}
2317
2318static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
2319 unsigned long *bitmap,
2320 u64 start, u64 len)
2321{
2322 int offset;
2323 int nsectors;
2324 int sectorsize = sparity->sctx->dev_root->sectorsize;
2325
2326 if (len >= sparity->stripe_len) {
2327 bitmap_set(bitmap, 0, sparity->nsectors);
2328 return;
2329 }
2330
2331 start -= sparity->logic_start;
2332 offset = (int)do_div(start, sparity->stripe_len);
2333 offset /= sectorsize;
2334 nsectors = (int)len / sectorsize;
2335
2336 if (offset + nsectors <= sparity->nsectors) {
2337 bitmap_set(bitmap, offset, nsectors);
2338 return;
2339 }
2340
2341 bitmap_set(bitmap, offset, sparity->nsectors - offset);
2342 bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
2343}
2344
2345static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
2346 u64 start, u64 len)
2347{
2348 __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
2349}
2350
2351static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
2352 u64 start, u64 len)
2353{
2354 __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
2355}
2356
2357static void scrub_block_complete(struct scrub_block *sblock)
2358{
2359 int corrupted = 0;
2360
2361 if (!sblock->no_io_error_seen) {
2362 corrupted = 1;
2363 scrub_handle_errored_block(sblock);
2364 } else {
2365
2366
2367
2368
2369
2370 corrupted = scrub_checksum(sblock);
2371 if (!corrupted && sblock->sctx->is_dev_replace)
2372 scrub_write_block_to_dev_replace(sblock);
2373 }
2374
2375 if (sblock->sparity && corrupted && !sblock->data_corrected) {
2376 u64 start = sblock->pagev[0]->logical;
2377 u64 end = sblock->pagev[sblock->page_count - 1]->logical +
2378 PAGE_SIZE;
2379
2380 scrub_parity_mark_sectors_error(sblock->sparity,
2381 start, end - start);
2382 }
2383}
2384
2385static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
2386 u8 *csum)
2387{
2388 struct btrfs_ordered_sum *sum = NULL;
2389 unsigned long index;
2390 unsigned long num_sectors;
2391
2392 while (!list_empty(&sctx->csum_list)) {
2393 sum = list_first_entry(&sctx->csum_list,
2394 struct btrfs_ordered_sum, list);
2395 if (sum->bytenr > logical)
2396 return 0;
2397 if (sum->bytenr + sum->len > logical)
2398 break;
2399
2400 ++sctx->stat.csum_discards;
2401 list_del(&sum->list);
2402 kfree(sum);
2403 sum = NULL;
2404 }
2405 if (!sum)
2406 return 0;
2407
2408 index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
2409 num_sectors = sum->len / sctx->sectorsize;
2410 memcpy(csum, sum->sums + index, sctx->csum_size);
2411 if (index == num_sectors - 1) {
2412 list_del(&sum->list);
2413 kfree(sum);
2414 }
2415 return 1;
2416}
2417
2418
2419static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2420 u64 physical, struct btrfs_device *dev, u64 flags,
2421 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2422{
2423 int ret;
2424 u8 csum[BTRFS_CSUM_SIZE];
2425 u32 blocksize;
2426
2427 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2428 blocksize = sctx->sectorsize;
2429 spin_lock(&sctx->stat_lock);
2430 sctx->stat.data_extents_scrubbed++;
2431 sctx->stat.data_bytes_scrubbed += len;
2432 spin_unlock(&sctx->stat_lock);
2433 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2434 blocksize = sctx->nodesize;
2435 spin_lock(&sctx->stat_lock);
2436 sctx->stat.tree_extents_scrubbed++;
2437 sctx->stat.tree_bytes_scrubbed += len;
2438 spin_unlock(&sctx->stat_lock);
2439 } else {
2440 blocksize = sctx->sectorsize;
2441 WARN_ON(1);
2442 }
2443
2444 while (len) {
2445 u64 l = min_t(u64, len, blocksize);
2446 int have_csum = 0;
2447
2448 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2449
2450 have_csum = scrub_find_csum(sctx, logical, l, csum);
2451 if (have_csum == 0)
2452 ++sctx->stat.no_csum;
2453 if (sctx->is_dev_replace && !have_csum) {
2454 ret = copy_nocow_pages(sctx, logical, l,
2455 mirror_num,
2456 physical_for_dev_replace);
2457 goto behind_scrub_pages;
2458 }
2459 }
2460 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2461 mirror_num, have_csum ? csum : NULL, 0,
2462 physical_for_dev_replace);
2463behind_scrub_pages:
2464 if (ret)
2465 return ret;
2466 len -= l;
2467 logical += l;
2468 physical += l;
2469 physical_for_dev_replace += l;
2470 }
2471 return 0;
2472}
2473
2474static int scrub_pages_for_parity(struct scrub_parity *sparity,
2475 u64 logical, u64 len,
2476 u64 physical, struct btrfs_device *dev,
2477 u64 flags, u64 gen, int mirror_num, u8 *csum)
2478{
2479 struct scrub_ctx *sctx = sparity->sctx;
2480 struct scrub_block *sblock;
2481 int index;
2482
2483 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
2484 if (!sblock) {
2485 spin_lock(&sctx->stat_lock);
2486 sctx->stat.malloc_errors++;
2487 spin_unlock(&sctx->stat_lock);
2488 return -ENOMEM;
2489 }
2490
2491
2492
2493 atomic_set(&sblock->refs, 1);
2494 sblock->sctx = sctx;
2495 sblock->no_io_error_seen = 1;
2496 sblock->sparity = sparity;
2497 scrub_parity_get(sparity);
2498
2499 for (index = 0; len > 0; index++) {
2500 struct scrub_page *spage;
2501 u64 l = min_t(u64, len, PAGE_SIZE);
2502
2503 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2504 if (!spage) {
2505leave_nomem:
2506 spin_lock(&sctx->stat_lock);
2507 sctx->stat.malloc_errors++;
2508 spin_unlock(&sctx->stat_lock);
2509 scrub_block_put(sblock);
2510 return -ENOMEM;
2511 }
2512 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2513
2514 scrub_page_get(spage);
2515 sblock->pagev[index] = spage;
2516
2517 scrub_page_get(spage);
2518 list_add_tail(&spage->list, &sparity->spages);
2519 spage->sblock = sblock;
2520 spage->dev = dev;
2521 spage->flags = flags;
2522 spage->generation = gen;
2523 spage->logical = logical;
2524 spage->physical = physical;
2525 spage->mirror_num = mirror_num;
2526 if (csum) {
2527 spage->have_csum = 1;
2528 memcpy(spage->csum, csum, sctx->csum_size);
2529 } else {
2530 spage->have_csum = 0;
2531 }
2532 sblock->page_count++;
2533 spage->page = alloc_page(GFP_NOFS);
2534 if (!spage->page)
2535 goto leave_nomem;
2536 len -= l;
2537 logical += l;
2538 physical += l;
2539 }
2540
2541 WARN_ON(sblock->page_count == 0);
2542 for (index = 0; index < sblock->page_count; index++) {
2543 struct scrub_page *spage = sblock->pagev[index];
2544 int ret;
2545
2546 ret = scrub_add_page_to_rd_bio(sctx, spage);
2547 if (ret) {
2548 scrub_block_put(sblock);
2549 return ret;
2550 }
2551 }
2552
2553
2554 scrub_block_put(sblock);
2555 return 0;
2556}
2557
2558static int scrub_extent_for_parity(struct scrub_parity *sparity,
2559 u64 logical, u64 len,
2560 u64 physical, struct btrfs_device *dev,
2561 u64 flags, u64 gen, int mirror_num)
2562{
2563 struct scrub_ctx *sctx = sparity->sctx;
2564 int ret;
2565 u8 csum[BTRFS_CSUM_SIZE];
2566 u32 blocksize;
2567
2568 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2569 blocksize = sctx->sectorsize;
2570 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2571 blocksize = sctx->nodesize;
2572 } else {
2573 blocksize = sctx->sectorsize;
2574 WARN_ON(1);
2575 }
2576
2577 while (len) {
2578 u64 l = min_t(u64, len, blocksize);
2579 int have_csum = 0;
2580
2581 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2582
2583 have_csum = scrub_find_csum(sctx, logical, l, csum);
2584 if (have_csum == 0)
2585 goto skip;
2586 }
2587 ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
2588 flags, gen, mirror_num,
2589 have_csum ? csum : NULL);
2590 if (ret)
2591 return ret;
2592skip:
2593 len -= l;
2594 logical += l;
2595 physical += l;
2596 }
2597 return 0;
2598}
2599
2600
2601
2602
2603
2604
2605
2606
2607static int get_raid56_logic_offset(u64 physical, int num,
2608 struct map_lookup *map, u64 *offset,
2609 u64 *stripe_start)
2610{
2611 int i;
2612 int j = 0;
2613 u64 stripe_nr;
2614 u64 last_offset;
2615 int stripe_index;
2616 int rot;
2617
2618 last_offset = (physical - map->stripes[num].physical) *
2619 nr_data_stripes(map);
2620 if (stripe_start)
2621 *stripe_start = last_offset;
2622
2623 *offset = last_offset;
2624 for (i = 0; i < nr_data_stripes(map); i++) {
2625 *offset = last_offset + i * map->stripe_len;
2626
2627 stripe_nr = *offset;
2628 do_div(stripe_nr, map->stripe_len);
2629 do_div(stripe_nr, nr_data_stripes(map));
2630
2631
2632 rot = do_div(stripe_nr, map->num_stripes);
2633
2634 rot += i;
2635 stripe_index = rot % map->num_stripes;
2636 if (stripe_index == num)
2637 return 0;
2638 if (stripe_index < num)
2639 j++;
2640 }
2641 *offset = last_offset + j * map->stripe_len;
2642 return 1;
2643}
2644
2645static void scrub_free_parity(struct scrub_parity *sparity)
2646{
2647 struct scrub_ctx *sctx = sparity->sctx;
2648 struct scrub_page *curr, *next;
2649 int nbits;
2650
2651 nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
2652 if (nbits) {
2653 spin_lock(&sctx->stat_lock);
2654 sctx->stat.read_errors += nbits;
2655 sctx->stat.uncorrectable_errors += nbits;
2656 spin_unlock(&sctx->stat_lock);
2657 }
2658
2659 list_for_each_entry_safe(curr, next, &sparity->spages, list) {
2660 list_del_init(&curr->list);
2661 scrub_page_put(curr);
2662 }
2663
2664 kfree(sparity);
2665}
2666
2667static void scrub_parity_bio_endio(struct bio *bio, int error)
2668{
2669 struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
2670 struct scrub_ctx *sctx = sparity->sctx;
2671
2672 if (error)
2673 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2674 sparity->nsectors);
2675
2676 scrub_free_parity(sparity);
2677 scrub_pending_bio_dec(sctx);
2678 bio_put(bio);
2679}
2680
2681static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
2682{
2683 struct scrub_ctx *sctx = sparity->sctx;
2684 struct bio *bio;
2685 struct btrfs_raid_bio *rbio;
2686 struct scrub_page *spage;
2687 struct btrfs_bio *bbio = NULL;
2688 u64 length;
2689 int ret;
2690
2691 if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
2692 sparity->nsectors))
2693 goto out;
2694
2695 length = sparity->logic_end - sparity->logic_start + 1;
2696 ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
2697 sparity->logic_start,
2698 &length, &bbio, 0, 1);
2699 if (ret || !bbio || !bbio->raid_map)
2700 goto bbio_out;
2701
2702 bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
2703 if (!bio)
2704 goto bbio_out;
2705
2706 bio->bi_iter.bi_sector = sparity->logic_start >> 9;
2707 bio->bi_private = sparity;
2708 bio->bi_end_io = scrub_parity_bio_endio;
2709
2710 rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
2711 length, sparity->scrub_dev,
2712 sparity->dbitmap,
2713 sparity->nsectors);
2714 if (!rbio)
2715 goto rbio_out;
2716
2717 list_for_each_entry(spage, &sparity->spages, list)
2718 raid56_parity_add_scrub_pages(rbio, spage->page,
2719 spage->logical);
2720
2721 scrub_pending_bio_inc(sctx);
2722 raid56_parity_submit_scrub_rbio(rbio);
2723 return;
2724
2725rbio_out:
2726 bio_put(bio);
2727bbio_out:
2728 btrfs_put_bbio(bbio);
2729 bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
2730 sparity->nsectors);
2731 spin_lock(&sctx->stat_lock);
2732 sctx->stat.malloc_errors++;
2733 spin_unlock(&sctx->stat_lock);
2734out:
2735 scrub_free_parity(sparity);
2736}
2737
2738static inline int scrub_calc_parity_bitmap_len(int nsectors)
2739{
2740 return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
2741}
2742
2743static void scrub_parity_get(struct scrub_parity *sparity)
2744{
2745 atomic_inc(&sparity->refs);
2746}
2747
2748static void scrub_parity_put(struct scrub_parity *sparity)
2749{
2750 if (!atomic_dec_and_test(&sparity->refs))
2751 return;
2752
2753 scrub_parity_check_and_repair(sparity);
2754}
2755
2756static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
2757 struct map_lookup *map,
2758 struct btrfs_device *sdev,
2759 struct btrfs_path *path,
2760 u64 logic_start,
2761 u64 logic_end)
2762{
2763 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2764 struct btrfs_root *root = fs_info->extent_root;
2765 struct btrfs_root *csum_root = fs_info->csum_root;
2766 struct btrfs_extent_item *extent;
2767 u64 flags;
2768 int ret;
2769 int slot;
2770 struct extent_buffer *l;
2771 struct btrfs_key key;
2772 u64 generation;
2773 u64 extent_logical;
2774 u64 extent_physical;
2775 u64 extent_len;
2776 struct btrfs_device *extent_dev;
2777 struct scrub_parity *sparity;
2778 int nsectors;
2779 int bitmap_len;
2780 int extent_mirror_num;
2781 int stop_loop = 0;
2782
2783 nsectors = map->stripe_len / root->sectorsize;
2784 bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
2785 sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
2786 GFP_NOFS);
2787 if (!sparity) {
2788 spin_lock(&sctx->stat_lock);
2789 sctx->stat.malloc_errors++;
2790 spin_unlock(&sctx->stat_lock);
2791 return -ENOMEM;
2792 }
2793
2794 sparity->stripe_len = map->stripe_len;
2795 sparity->nsectors = nsectors;
2796 sparity->sctx = sctx;
2797 sparity->scrub_dev = sdev;
2798 sparity->logic_start = logic_start;
2799 sparity->logic_end = logic_end;
2800 atomic_set(&sparity->refs, 1);
2801 INIT_LIST_HEAD(&sparity->spages);
2802 sparity->dbitmap = sparity->bitmap;
2803 sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
2804
2805 ret = 0;
2806 while (logic_start < logic_end) {
2807 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2808 key.type = BTRFS_METADATA_ITEM_KEY;
2809 else
2810 key.type = BTRFS_EXTENT_ITEM_KEY;
2811 key.objectid = logic_start;
2812 key.offset = (u64)-1;
2813
2814 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2815 if (ret < 0)
2816 goto out;
2817
2818 if (ret > 0) {
2819 ret = btrfs_previous_extent_item(root, path, 0);
2820 if (ret < 0)
2821 goto out;
2822 if (ret > 0) {
2823 btrfs_release_path(path);
2824 ret = btrfs_search_slot(NULL, root, &key,
2825 path, 0, 0);
2826 if (ret < 0)
2827 goto out;
2828 }
2829 }
2830
2831 stop_loop = 0;
2832 while (1) {
2833 u64 bytes;
2834
2835 l = path->nodes[0];
2836 slot = path->slots[0];
2837 if (slot >= btrfs_header_nritems(l)) {
2838 ret = btrfs_next_leaf(root, path);
2839 if (ret == 0)
2840 continue;
2841 if (ret < 0)
2842 goto out;
2843
2844 stop_loop = 1;
2845 break;
2846 }
2847 btrfs_item_key_to_cpu(l, &key, slot);
2848
2849 if (key.type == BTRFS_METADATA_ITEM_KEY)
2850 bytes = root->nodesize;
2851 else
2852 bytes = key.offset;
2853
2854 if (key.objectid + bytes <= logic_start)
2855 goto next;
2856
2857 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2858 key.type != BTRFS_METADATA_ITEM_KEY)
2859 goto next;
2860
2861 if (key.objectid > logic_end) {
2862 stop_loop = 1;
2863 break;
2864 }
2865
2866 while (key.objectid >= logic_start + map->stripe_len)
2867 logic_start += map->stripe_len;
2868
2869 extent = btrfs_item_ptr(l, slot,
2870 struct btrfs_extent_item);
2871 flags = btrfs_extent_flags(l, extent);
2872 generation = btrfs_extent_generation(l, extent);
2873
2874 if (key.objectid < logic_start &&
2875 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2876 btrfs_err(fs_info,
2877 "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
2878 key.objectid, logic_start);
2879 goto next;
2880 }
2881again:
2882 extent_logical = key.objectid;
2883 extent_len = bytes;
2884
2885 if (extent_logical < logic_start) {
2886 extent_len -= logic_start - extent_logical;
2887 extent_logical = logic_start;
2888 }
2889
2890 if (extent_logical + extent_len >
2891 logic_start + map->stripe_len)
2892 extent_len = logic_start + map->stripe_len -
2893 extent_logical;
2894
2895 scrub_parity_mark_sectors_data(sparity, extent_logical,
2896 extent_len);
2897
2898 scrub_remap_extent(fs_info, extent_logical,
2899 extent_len, &extent_physical,
2900 &extent_dev,
2901 &extent_mirror_num);
2902
2903 ret = btrfs_lookup_csums_range(csum_root,
2904 extent_logical,
2905 extent_logical + extent_len - 1,
2906 &sctx->csum_list, 1);
2907 if (ret)
2908 goto out;
2909
2910 ret = scrub_extent_for_parity(sparity, extent_logical,
2911 extent_len,
2912 extent_physical,
2913 extent_dev, flags,
2914 generation,
2915 extent_mirror_num);
2916 if (ret)
2917 goto out;
2918
2919 scrub_free_csums(sctx);
2920 if (extent_logical + extent_len <
2921 key.objectid + bytes) {
2922 logic_start += map->stripe_len;
2923
2924 if (logic_start >= logic_end) {
2925 stop_loop = 1;
2926 break;
2927 }
2928
2929 if (logic_start < key.objectid + bytes) {
2930 cond_resched();
2931 goto again;
2932 }
2933 }
2934next:
2935 path->slots[0]++;
2936 }
2937
2938 btrfs_release_path(path);
2939
2940 if (stop_loop)
2941 break;
2942
2943 logic_start += map->stripe_len;
2944 }
2945out:
2946 if (ret < 0)
2947 scrub_parity_mark_sectors_error(sparity, logic_start,
2948 logic_end - logic_start + 1);
2949 scrub_parity_put(sparity);
2950 scrub_submit(sctx);
2951 mutex_lock(&sctx->wr_ctx.wr_lock);
2952 scrub_wr_submit(sctx);
2953 mutex_unlock(&sctx->wr_ctx.wr_lock);
2954
2955 btrfs_release_path(path);
2956 return ret < 0 ? ret : 0;
2957}
2958
2959static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2960 struct map_lookup *map,
2961 struct btrfs_device *scrub_dev,
2962 int num, u64 base, u64 length,
2963 int is_dev_replace)
2964{
2965 struct btrfs_path *path, *ppath;
2966 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2967 struct btrfs_root *root = fs_info->extent_root;
2968 struct btrfs_root *csum_root = fs_info->csum_root;
2969 struct btrfs_extent_item *extent;
2970 struct blk_plug plug;
2971 u64 flags;
2972 int ret;
2973 int slot;
2974 u64 nstripes;
2975 struct extent_buffer *l;
2976 struct btrfs_key key;
2977 u64 physical;
2978 u64 logical;
2979 u64 logic_end;
2980 u64 physical_end;
2981 u64 generation;
2982 int mirror_num;
2983 struct reada_control *reada1;
2984 struct reada_control *reada2;
2985 struct btrfs_key key_start;
2986 struct btrfs_key key_end;
2987 u64 increment = map->stripe_len;
2988 u64 offset;
2989 u64 extent_logical;
2990 u64 extent_physical;
2991 u64 extent_len;
2992 u64 stripe_logical;
2993 u64 stripe_end;
2994 struct btrfs_device *extent_dev;
2995 int extent_mirror_num;
2996 int stop_loop = 0;
2997
2998 nstripes = length;
2999 physical = map->stripes[num].physical;
3000 offset = 0;
3001 do_div(nstripes, map->stripe_len);
3002 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
3003 offset = map->stripe_len * num;
3004 increment = map->stripe_len * map->num_stripes;
3005 mirror_num = 1;
3006 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
3007 int factor = map->num_stripes / map->sub_stripes;
3008 offset = map->stripe_len * (num / map->sub_stripes);
3009 increment = map->stripe_len * factor;
3010 mirror_num = num % map->sub_stripes + 1;
3011 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3012 increment = map->stripe_len;
3013 mirror_num = num % map->num_stripes + 1;
3014 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3015 increment = map->stripe_len;
3016 mirror_num = num % map->num_stripes + 1;
3017 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3018 get_raid56_logic_offset(physical, num, map, &offset, NULL);
3019 increment = map->stripe_len * nr_data_stripes(map);
3020 mirror_num = 1;
3021 } else {
3022 increment = map->stripe_len;
3023 mirror_num = 1;
3024 }
3025
3026 path = btrfs_alloc_path();
3027 if (!path)
3028 return -ENOMEM;
3029
3030 ppath = btrfs_alloc_path();
3031 if (!ppath) {
3032 btrfs_free_path(path);
3033 return -ENOMEM;
3034 }
3035
3036
3037
3038
3039
3040
3041 path->search_commit_root = 1;
3042 path->skip_locking = 1;
3043
3044 ppath->search_commit_root = 1;
3045 ppath->skip_locking = 1;
3046
3047
3048
3049
3050
3051 logical = base + offset;
3052 physical_end = physical + nstripes * map->stripe_len;
3053 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3054 get_raid56_logic_offset(physical_end, num,
3055 map, &logic_end, NULL);
3056 logic_end += base;
3057 } else {
3058 logic_end = logical + increment * nstripes;
3059 }
3060 wait_event(sctx->list_wait,
3061 atomic_read(&sctx->bios_in_flight) == 0);
3062 scrub_blocked_if_needed(fs_info);
3063
3064
3065 key_start.objectid = logical;
3066 key_start.type = BTRFS_EXTENT_ITEM_KEY;
3067 key_start.offset = (u64)0;
3068 key_end.objectid = logic_end;
3069 key_end.type = BTRFS_METADATA_ITEM_KEY;
3070 key_end.offset = (u64)-1;
3071 reada1 = btrfs_reada_add(root, &key_start, &key_end);
3072
3073 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3074 key_start.type = BTRFS_EXTENT_CSUM_KEY;
3075 key_start.offset = logical;
3076 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
3077 key_end.type = BTRFS_EXTENT_CSUM_KEY;
3078 key_end.offset = logic_end;
3079 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
3080
3081 if (!IS_ERR(reada1))
3082 btrfs_reada_wait(reada1);
3083 if (!IS_ERR(reada2))
3084 btrfs_reada_wait(reada2);
3085
3086
3087
3088
3089
3090
3091 blk_start_plug(&plug);
3092
3093
3094
3095
3096 ret = 0;
3097 while (physical < physical_end) {
3098
3099 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3100 ret = get_raid56_logic_offset(physical, num,
3101 map, &logical, &stripe_logical);
3102 logical += base;
3103 if (ret) {
3104 stripe_logical += base;
3105 stripe_end = stripe_logical + increment - 1;
3106 ret = scrub_raid56_parity(sctx, map, scrub_dev,
3107 ppath, stripe_logical,
3108 stripe_end);
3109 if (ret)
3110 goto out;
3111 goto skip;
3112 }
3113 }
3114
3115
3116
3117 if (atomic_read(&fs_info->scrub_cancel_req) ||
3118 atomic_read(&sctx->cancel_req)) {
3119 ret = -ECANCELED;
3120 goto out;
3121 }
3122
3123
3124
3125 if (atomic_read(&fs_info->scrub_pause_req)) {
3126
3127 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
3128 scrub_submit(sctx);
3129 mutex_lock(&sctx->wr_ctx.wr_lock);
3130 scrub_wr_submit(sctx);
3131 mutex_unlock(&sctx->wr_ctx.wr_lock);
3132 wait_event(sctx->list_wait,
3133 atomic_read(&sctx->bios_in_flight) == 0);
3134 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
3135 scrub_blocked_if_needed(fs_info);
3136 }
3137
3138 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
3139 key.type = BTRFS_METADATA_ITEM_KEY;
3140 else
3141 key.type = BTRFS_EXTENT_ITEM_KEY;
3142 key.objectid = logical;
3143 key.offset = (u64)-1;
3144
3145 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3146 if (ret < 0)
3147 goto out;
3148
3149 if (ret > 0) {
3150 ret = btrfs_previous_extent_item(root, path, 0);
3151 if (ret < 0)
3152 goto out;
3153 if (ret > 0) {
3154
3155
3156 btrfs_release_path(path);
3157 ret = btrfs_search_slot(NULL, root, &key,
3158 path, 0, 0);
3159 if (ret < 0)
3160 goto out;
3161 }
3162 }
3163
3164 stop_loop = 0;
3165 while (1) {
3166 u64 bytes;
3167
3168 l = path->nodes[0];
3169 slot = path->slots[0];
3170 if (slot >= btrfs_header_nritems(l)) {
3171 ret = btrfs_next_leaf(root, path);
3172 if (ret == 0)
3173 continue;
3174 if (ret < 0)
3175 goto out;
3176
3177 stop_loop = 1;
3178 break;
3179 }
3180 btrfs_item_key_to_cpu(l, &key, slot);
3181
3182 if (key.type == BTRFS_METADATA_ITEM_KEY)
3183 bytes = root->nodesize;
3184 else
3185 bytes = key.offset;
3186
3187 if (key.objectid + bytes <= logical)
3188 goto next;
3189
3190 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
3191 key.type != BTRFS_METADATA_ITEM_KEY)
3192 goto next;
3193
3194 if (key.objectid >= logical + map->stripe_len) {
3195
3196 if (key.objectid >= logic_end)
3197 stop_loop = 1;
3198 break;
3199 }
3200
3201 extent = btrfs_item_ptr(l, slot,
3202 struct btrfs_extent_item);
3203 flags = btrfs_extent_flags(l, extent);
3204 generation = btrfs_extent_generation(l, extent);
3205
3206 if (key.objectid < logical &&
3207 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
3208 btrfs_err(fs_info,
3209 "scrub: tree block %llu spanning "
3210 "stripes, ignored. logical=%llu",
3211 key.objectid, logical);
3212 goto next;
3213 }
3214
3215again:
3216 extent_logical = key.objectid;
3217 extent_len = bytes;
3218
3219
3220
3221
3222 if (extent_logical < logical) {
3223 extent_len -= logical - extent_logical;
3224 extent_logical = logical;
3225 }
3226 if (extent_logical + extent_len >
3227 logical + map->stripe_len) {
3228 extent_len = logical + map->stripe_len -
3229 extent_logical;
3230 }
3231
3232 extent_physical = extent_logical - logical + physical;
3233 extent_dev = scrub_dev;
3234 extent_mirror_num = mirror_num;
3235 if (is_dev_replace)
3236 scrub_remap_extent(fs_info, extent_logical,
3237 extent_len, &extent_physical,
3238 &extent_dev,
3239 &extent_mirror_num);
3240
3241 ret = btrfs_lookup_csums_range(csum_root, logical,
3242 logical + map->stripe_len - 1,
3243 &sctx->csum_list, 1);
3244 if (ret)
3245 goto out;
3246
3247 ret = scrub_extent(sctx, extent_logical, extent_len,
3248 extent_physical, extent_dev, flags,
3249 generation, extent_mirror_num,
3250 extent_logical - logical + physical);
3251 if (ret)
3252 goto out;
3253
3254 scrub_free_csums(sctx);
3255 if (extent_logical + extent_len <
3256 key.objectid + bytes) {
3257 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3258
3259
3260
3261
3262loop:
3263 physical += map->stripe_len;
3264 ret = get_raid56_logic_offset(physical,
3265 num, map, &logical,
3266 &stripe_logical);
3267 logical += base;
3268
3269 if (ret && physical < physical_end) {
3270 stripe_logical += base;
3271 stripe_end = stripe_logical +
3272 increment - 1;
3273 ret = scrub_raid56_parity(sctx,
3274 map, scrub_dev, ppath,
3275 stripe_logical,
3276 stripe_end);
3277 if (ret)
3278 goto out;
3279 goto loop;
3280 }
3281 } else {
3282 physical += map->stripe_len;
3283 logical += increment;
3284 }
3285 if (logical < key.objectid + bytes) {
3286 cond_resched();
3287 goto again;
3288 }
3289
3290 if (physical >= physical_end) {
3291 stop_loop = 1;
3292 break;
3293 }
3294 }
3295next:
3296 path->slots[0]++;
3297 }
3298 btrfs_release_path(path);
3299skip:
3300 logical += increment;
3301 physical += map->stripe_len;
3302 spin_lock(&sctx->stat_lock);
3303 if (stop_loop)
3304 sctx->stat.last_physical = map->stripes[num].physical +
3305 length;
3306 else
3307 sctx->stat.last_physical = physical;
3308 spin_unlock(&sctx->stat_lock);
3309 if (stop_loop)
3310 break;
3311 }
3312out:
3313
3314 scrub_submit(sctx);
3315 mutex_lock(&sctx->wr_ctx.wr_lock);
3316 scrub_wr_submit(sctx);
3317 mutex_unlock(&sctx->wr_ctx.wr_lock);
3318
3319 blk_finish_plug(&plug);
3320 btrfs_free_path(path);
3321 btrfs_free_path(ppath);
3322 return ret < 0 ? ret : 0;
3323}
3324
3325static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
3326 struct btrfs_device *scrub_dev,
3327 u64 chunk_tree, u64 chunk_objectid,
3328 u64 chunk_offset, u64 length,
3329 u64 dev_offset, int is_dev_replace)
3330{
3331 struct btrfs_mapping_tree *map_tree =
3332 &sctx->dev_root->fs_info->mapping_tree;
3333 struct map_lookup *map;
3334 struct extent_map *em;
3335 int i;
3336 int ret = 0;
3337
3338 read_lock(&map_tree->map_tree.lock);
3339 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
3340 read_unlock(&map_tree->map_tree.lock);
3341
3342 if (!em)
3343 return -EINVAL;
3344
3345 map = (struct map_lookup *)em->bdev;
3346 if (em->start != chunk_offset)
3347 goto out;
3348
3349 if (em->len < length)
3350 goto out;
3351
3352 for (i = 0; i < map->num_stripes; ++i) {
3353 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
3354 map->stripes[i].physical == dev_offset) {
3355 ret = scrub_stripe(sctx, map, scrub_dev, i,
3356 chunk_offset, length,
3357 is_dev_replace);
3358 if (ret)
3359 goto out;
3360 }
3361 }
3362out:
3363 free_extent_map(em);
3364
3365 return ret;
3366}
3367
3368static noinline_for_stack
3369int scrub_enumerate_chunks(struct scrub_ctx *sctx,
3370 struct btrfs_device *scrub_dev, u64 start, u64 end,
3371 int is_dev_replace)
3372{
3373 struct btrfs_dev_extent *dev_extent = NULL;
3374 struct btrfs_path *path;
3375 struct btrfs_root *root = sctx->dev_root;
3376 struct btrfs_fs_info *fs_info = root->fs_info;
3377 u64 length;
3378 u64 chunk_tree;
3379 u64 chunk_objectid;
3380 u64 chunk_offset;
3381 int ret;
3382 int slot;
3383 struct extent_buffer *l;
3384 struct btrfs_key key;
3385 struct btrfs_key found_key;
3386 struct btrfs_block_group_cache *cache;
3387 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
3388
3389 path = btrfs_alloc_path();
3390 if (!path)
3391 return -ENOMEM;
3392
3393 path->reada = 2;
3394 path->search_commit_root = 1;
3395 path->skip_locking = 1;
3396
3397 key.objectid = scrub_dev->devid;
3398 key.offset = 0ull;
3399 key.type = BTRFS_DEV_EXTENT_KEY;
3400
3401 while (1) {
3402 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3403 if (ret < 0)
3404 break;
3405 if (ret > 0) {
3406 if (path->slots[0] >=
3407 btrfs_header_nritems(path->nodes[0])) {
3408 ret = btrfs_next_leaf(root, path);
3409 if (ret)
3410 break;
3411 }
3412 }
3413
3414 l = path->nodes[0];
3415 slot = path->slots[0];
3416
3417 btrfs_item_key_to_cpu(l, &found_key, slot);
3418
3419 if (found_key.objectid != scrub_dev->devid)
3420 break;
3421
3422 if (found_key.type != BTRFS_DEV_EXTENT_KEY)
3423 break;
3424
3425 if (found_key.offset >= end)
3426 break;
3427
3428 if (found_key.offset < key.offset)
3429 break;
3430
3431 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
3432 length = btrfs_dev_extent_length(l, dev_extent);
3433
3434 if (found_key.offset + length <= start)
3435 goto skip;
3436
3437 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
3438 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
3439 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
3440
3441
3442
3443
3444
3445 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3446
3447
3448
3449 if (!cache)
3450 goto skip;
3451
3452 dev_replace->cursor_right = found_key.offset + length;
3453 dev_replace->cursor_left = found_key.offset;
3454 dev_replace->item_needs_writeback = 1;
3455 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
3456 chunk_offset, length, found_key.offset,
3457 is_dev_replace);
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
3470 scrub_submit(sctx);
3471 mutex_lock(&sctx->wr_ctx.wr_lock);
3472 scrub_wr_submit(sctx);
3473 mutex_unlock(&sctx->wr_ctx.wr_lock);
3474
3475 wait_event(sctx->list_wait,
3476 atomic_read(&sctx->bios_in_flight) == 0);
3477 atomic_inc(&fs_info->scrubs_paused);
3478 wake_up(&fs_info->scrub_pause_wait);
3479
3480
3481
3482
3483
3484
3485 wait_event(sctx->list_wait,
3486 atomic_read(&sctx->workers_pending) == 0);
3487 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
3488
3489 mutex_lock(&fs_info->scrub_lock);
3490 __scrub_blocked_if_needed(fs_info);
3491 atomic_dec(&fs_info->scrubs_paused);
3492 mutex_unlock(&fs_info->scrub_lock);
3493 wake_up(&fs_info->scrub_pause_wait);
3494
3495 btrfs_put_block_group(cache);
3496 if (ret)
3497 break;
3498 if (is_dev_replace &&
3499 atomic64_read(&dev_replace->num_write_errors) > 0) {
3500 ret = -EIO;
3501 break;
3502 }
3503 if (sctx->stat.malloc_errors > 0) {
3504 ret = -ENOMEM;
3505 break;
3506 }
3507
3508 dev_replace->cursor_left = dev_replace->cursor_right;
3509 dev_replace->item_needs_writeback = 1;
3510skip:
3511 key.offset = found_key.offset + length;
3512 btrfs_release_path(path);
3513 }
3514
3515 btrfs_free_path(path);
3516
3517
3518
3519
3520
3521 return ret < 0 ? ret : 0;
3522}
3523
3524static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
3525 struct btrfs_device *scrub_dev)
3526{
3527 int i;
3528 u64 bytenr;
3529 u64 gen;
3530 int ret;
3531 struct btrfs_root *root = sctx->dev_root;
3532
3533 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
3534 return -EIO;
3535
3536
3537 if (scrub_dev->fs_devices != root->fs_info->fs_devices)
3538 gen = scrub_dev->generation;
3539 else
3540 gen = root->fs_info->last_trans_committed;
3541
3542 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
3543 bytenr = btrfs_sb_offset(i);
3544 if (bytenr + BTRFS_SUPER_INFO_SIZE >
3545 scrub_dev->commit_total_bytes)
3546 break;
3547
3548 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
3549 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
3550 NULL, 1, bytenr);
3551 if (ret)
3552 return ret;
3553 }
3554 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3555
3556 return 0;
3557}
3558
3559
3560
3561
3562static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
3563 int is_dev_replace)
3564{
3565 int ret = 0;
3566 int flags = WQ_FREEZABLE | WQ_UNBOUND;
3567 int max_active = fs_info->thread_pool_size;
3568
3569 if (fs_info->scrub_workers_refcnt == 0) {
3570 if (is_dev_replace)
3571 fs_info->scrub_workers =
3572 btrfs_alloc_workqueue("btrfs-scrub", flags,
3573 1, 4);
3574 else
3575 fs_info->scrub_workers =
3576 btrfs_alloc_workqueue("btrfs-scrub", flags,
3577 max_active, 4);
3578 if (!fs_info->scrub_workers) {
3579 ret = -ENOMEM;
3580 goto out;
3581 }
3582 fs_info->scrub_wr_completion_workers =
3583 btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
3584 max_active, 2);
3585 if (!fs_info->scrub_wr_completion_workers) {
3586 ret = -ENOMEM;
3587 goto out;
3588 }
3589 fs_info->scrub_nocow_workers =
3590 btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
3591 if (!fs_info->scrub_nocow_workers) {
3592 ret = -ENOMEM;
3593 goto out;
3594 }
3595 }
3596 ++fs_info->scrub_workers_refcnt;
3597out:
3598 return ret;
3599}
3600
3601static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
3602{
3603 if (--fs_info->scrub_workers_refcnt == 0) {
3604 btrfs_destroy_workqueue(fs_info->scrub_workers);
3605 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
3606 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
3607 }
3608 WARN_ON(fs_info->scrub_workers_refcnt < 0);
3609}
3610
3611int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3612 u64 end, struct btrfs_scrub_progress *progress,
3613 int readonly, int is_dev_replace)
3614{
3615 struct scrub_ctx *sctx;
3616 int ret;
3617 struct btrfs_device *dev;
3618 struct rcu_string *name;
3619
3620 if (btrfs_fs_closing(fs_info))
3621 return -EINVAL;
3622
3623 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
3624
3625
3626
3627
3628
3629 btrfs_err(fs_info,
3630 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
3631 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
3632 return -EINVAL;
3633 }
3634
3635 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
3636
3637 btrfs_err(fs_info,
3638 "scrub: size assumption sectorsize != PAGE_SIZE "
3639 "(%d != %lu) fails",
3640 fs_info->chunk_root->sectorsize, PAGE_SIZE);
3641 return -EINVAL;
3642 }
3643
3644 if (fs_info->chunk_root->nodesize >
3645 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
3646 fs_info->chunk_root->sectorsize >
3647 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
3648
3649
3650
3651
3652 btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
3653 "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
3654 fs_info->chunk_root->nodesize,
3655 SCRUB_MAX_PAGES_PER_BLOCK,
3656 fs_info->chunk_root->sectorsize,
3657 SCRUB_MAX_PAGES_PER_BLOCK);
3658 return -EINVAL;
3659 }
3660
3661
3662 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3663 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3664 if (!dev || (dev->missing && !is_dev_replace)) {
3665 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3666 return -ENODEV;
3667 }
3668
3669 if (!is_dev_replace && !readonly && !dev->writeable) {
3670 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3671 rcu_read_lock();
3672 name = rcu_dereference(dev->name);
3673 btrfs_err(fs_info, "scrub: device %s is not writable",
3674 name->str);
3675 rcu_read_unlock();
3676 return -EROFS;
3677 }
3678
3679 mutex_lock(&fs_info->scrub_lock);
3680 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
3681 mutex_unlock(&fs_info->scrub_lock);
3682 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3683 return -EIO;
3684 }
3685
3686 btrfs_dev_replace_lock(&fs_info->dev_replace);
3687 if (dev->scrub_device ||
3688 (!is_dev_replace &&
3689 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
3690 btrfs_dev_replace_unlock(&fs_info->dev_replace);
3691 mutex_unlock(&fs_info->scrub_lock);
3692 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3693 return -EINPROGRESS;
3694 }
3695 btrfs_dev_replace_unlock(&fs_info->dev_replace);
3696
3697 ret = scrub_workers_get(fs_info, is_dev_replace);
3698 if (ret) {
3699 mutex_unlock(&fs_info->scrub_lock);
3700 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3701 return ret;
3702 }
3703
3704 sctx = scrub_setup_ctx(dev, is_dev_replace);
3705 if (IS_ERR(sctx)) {
3706 mutex_unlock(&fs_info->scrub_lock);
3707 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3708 scrub_workers_put(fs_info);
3709 return PTR_ERR(sctx);
3710 }
3711 sctx->readonly = readonly;
3712 dev->scrub_device = sctx;
3713 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3714
3715
3716
3717
3718
3719 __scrub_blocked_if_needed(fs_info);
3720 atomic_inc(&fs_info->scrubs_running);
3721 mutex_unlock(&fs_info->scrub_lock);
3722
3723 if (!is_dev_replace) {
3724
3725
3726
3727
3728 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3729 ret = scrub_supers(sctx, dev);
3730 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3731 }
3732
3733 if (!ret)
3734 ret = scrub_enumerate_chunks(sctx, dev, start, end,
3735 is_dev_replace);
3736
3737 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3738 atomic_dec(&fs_info->scrubs_running);
3739 wake_up(&fs_info->scrub_pause_wait);
3740
3741 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3742
3743 if (progress)
3744 memcpy(progress, &sctx->stat, sizeof(*progress));
3745
3746 mutex_lock(&fs_info->scrub_lock);
3747 dev->scrub_device = NULL;
3748 scrub_workers_put(fs_info);
3749 mutex_unlock(&fs_info->scrub_lock);
3750
3751 scrub_put_ctx(sctx);
3752
3753 return ret;
3754}
3755
3756void btrfs_scrub_pause(struct btrfs_root *root)
3757{
3758 struct btrfs_fs_info *fs_info = root->fs_info;
3759
3760 mutex_lock(&fs_info->scrub_lock);
3761 atomic_inc(&fs_info->scrub_pause_req);
3762 while (atomic_read(&fs_info->scrubs_paused) !=
3763 atomic_read(&fs_info->scrubs_running)) {
3764 mutex_unlock(&fs_info->scrub_lock);
3765 wait_event(fs_info->scrub_pause_wait,
3766 atomic_read(&fs_info->scrubs_paused) ==
3767 atomic_read(&fs_info->scrubs_running));
3768 mutex_lock(&fs_info->scrub_lock);
3769 }
3770 mutex_unlock(&fs_info->scrub_lock);
3771}
3772
3773void btrfs_scrub_continue(struct btrfs_root *root)
3774{
3775 struct btrfs_fs_info *fs_info = root->fs_info;
3776
3777 atomic_dec(&fs_info->scrub_pause_req);
3778 wake_up(&fs_info->scrub_pause_wait);
3779}
3780
3781int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3782{
3783 mutex_lock(&fs_info->scrub_lock);
3784 if (!atomic_read(&fs_info->scrubs_running)) {
3785 mutex_unlock(&fs_info->scrub_lock);
3786 return -ENOTCONN;
3787 }
3788
3789 atomic_inc(&fs_info->scrub_cancel_req);
3790 while (atomic_read(&fs_info->scrubs_running)) {
3791 mutex_unlock(&fs_info->scrub_lock);
3792 wait_event(fs_info->scrub_pause_wait,
3793 atomic_read(&fs_info->scrubs_running) == 0);
3794 mutex_lock(&fs_info->scrub_lock);
3795 }
3796 atomic_dec(&fs_info->scrub_cancel_req);
3797 mutex_unlock(&fs_info->scrub_lock);
3798
3799 return 0;
3800}
3801
3802int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3803 struct btrfs_device *dev)
3804{
3805 struct scrub_ctx *sctx;
3806
3807 mutex_lock(&fs_info->scrub_lock);
3808 sctx = dev->scrub_device;
3809 if (!sctx) {
3810 mutex_unlock(&fs_info->scrub_lock);
3811 return -ENOTCONN;
3812 }
3813 atomic_inc(&sctx->cancel_req);
3814 while (dev->scrub_device) {
3815 mutex_unlock(&fs_info->scrub_lock);
3816 wait_event(fs_info->scrub_pause_wait,
3817 dev->scrub_device == NULL);
3818 mutex_lock(&fs_info->scrub_lock);
3819 }
3820 mutex_unlock(&fs_info->scrub_lock);
3821
3822 return 0;
3823}
3824
3825int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3826 struct btrfs_scrub_progress *progress)
3827{
3828 struct btrfs_device *dev;
3829 struct scrub_ctx *sctx = NULL;
3830
3831 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3832 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
3833 if (dev)
3834 sctx = dev->scrub_device;
3835 if (sctx)
3836 memcpy(progress, &sctx->stat, sizeof(*progress));
3837 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3838
3839 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
3840}
3841
3842static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3843 u64 extent_logical, u64 extent_len,
3844 u64 *extent_physical,
3845 struct btrfs_device **extent_dev,
3846 int *extent_mirror_num)
3847{
3848 u64 mapped_length;
3849 struct btrfs_bio *bbio = NULL;
3850 int ret;
3851
3852 mapped_length = extent_len;
3853 ret = btrfs_map_block(fs_info, READ, extent_logical,
3854 &mapped_length, &bbio, 0);
3855 if (ret || !bbio || mapped_length < extent_len ||
3856 !bbio->stripes[0].dev->bdev) {
3857 btrfs_put_bbio(bbio);
3858 return;
3859 }
3860
3861 *extent_physical = bbio->stripes[0].physical;
3862 *extent_mirror_num = bbio->mirror_num;
3863 *extent_dev = bbio->stripes[0].dev;
3864 btrfs_put_bbio(bbio);
3865}
3866
3867static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
3868 struct scrub_wr_ctx *wr_ctx,
3869 struct btrfs_fs_info *fs_info,
3870 struct btrfs_device *dev,
3871 int is_dev_replace)
3872{
3873 WARN_ON(wr_ctx->wr_curr_bio != NULL);
3874
3875 mutex_init(&wr_ctx->wr_lock);
3876 wr_ctx->wr_curr_bio = NULL;
3877 if (!is_dev_replace)
3878 return 0;
3879
3880 WARN_ON(!dev->bdev);
3881 wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
3882 bio_get_nr_vecs(dev->bdev));
3883 wr_ctx->tgtdev = dev;
3884 atomic_set(&wr_ctx->flush_all_writes, 0);
3885 return 0;
3886}
3887
3888static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
3889{
3890 mutex_lock(&wr_ctx->wr_lock);
3891 kfree(wr_ctx->wr_curr_bio);
3892 wr_ctx->wr_curr_bio = NULL;
3893 mutex_unlock(&wr_ctx->wr_lock);
3894}
3895
3896static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3897 int mirror_num, u64 physical_for_dev_replace)
3898{
3899 struct scrub_copy_nocow_ctx *nocow_ctx;
3900 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3901
3902 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
3903 if (!nocow_ctx) {
3904 spin_lock(&sctx->stat_lock);
3905 sctx->stat.malloc_errors++;
3906 spin_unlock(&sctx->stat_lock);
3907 return -ENOMEM;
3908 }
3909
3910 scrub_pending_trans_workers_inc(sctx);
3911
3912 nocow_ctx->sctx = sctx;
3913 nocow_ctx->logical = logical;
3914 nocow_ctx->len = len;
3915 nocow_ctx->mirror_num = mirror_num;
3916 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3917 btrfs_init_work(&nocow_ctx->work, btrfs_scrubnc_helper,
3918 copy_nocow_pages_worker, NULL, NULL);
3919 INIT_LIST_HEAD(&nocow_ctx->inodes);
3920 btrfs_queue_work(fs_info->scrub_nocow_workers,
3921 &nocow_ctx->work);
3922
3923 return 0;
3924}
3925
3926static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
3927{
3928 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3929 struct scrub_nocow_inode *nocow_inode;
3930
3931 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
3932 if (!nocow_inode)
3933 return -ENOMEM;
3934 nocow_inode->inum = inum;
3935 nocow_inode->offset = offset;
3936 nocow_inode->root = root;
3937 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
3938 return 0;
3939}
3940
3941#define COPY_COMPLETE 1
3942
3943static void copy_nocow_pages_worker(struct btrfs_work *work)
3944{
3945 struct scrub_copy_nocow_ctx *nocow_ctx =
3946 container_of(work, struct scrub_copy_nocow_ctx, work);
3947 struct scrub_ctx *sctx = nocow_ctx->sctx;
3948 u64 logical = nocow_ctx->logical;
3949 u64 len = nocow_ctx->len;
3950 int mirror_num = nocow_ctx->mirror_num;
3951 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3952 int ret;
3953 struct btrfs_trans_handle *trans = NULL;
3954 struct btrfs_fs_info *fs_info;
3955 struct btrfs_path *path;
3956 struct btrfs_root *root;
3957 int not_written = 0;
3958
3959 fs_info = sctx->dev_root->fs_info;
3960 root = fs_info->extent_root;
3961
3962 path = btrfs_alloc_path();
3963 if (!path) {
3964 spin_lock(&sctx->stat_lock);
3965 sctx->stat.malloc_errors++;
3966 spin_unlock(&sctx->stat_lock);
3967 not_written = 1;
3968 goto out;
3969 }
3970
3971 trans = btrfs_join_transaction(root);
3972 if (IS_ERR(trans)) {
3973 not_written = 1;
3974 goto out;
3975 }
3976
3977 ret = iterate_inodes_from_logical(logical, fs_info, path,
3978 record_inode_for_nocow, nocow_ctx);
3979 if (ret != 0 && ret != -ENOENT) {
3980 btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
3981 "phys %llu, len %llu, mir %u, ret %d",
3982 logical, physical_for_dev_replace, len, mirror_num,
3983 ret);
3984 not_written = 1;
3985 goto out;
3986 }
3987
3988 btrfs_end_transaction(trans, root);
3989 trans = NULL;
3990 while (!list_empty(&nocow_ctx->inodes)) {
3991 struct scrub_nocow_inode *entry;
3992 entry = list_first_entry(&nocow_ctx->inodes,
3993 struct scrub_nocow_inode,
3994 list);
3995 list_del_init(&entry->list);
3996 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
3997 entry->root, nocow_ctx);
3998 kfree(entry);
3999 if (ret == COPY_COMPLETE) {
4000 ret = 0;
4001 break;
4002 } else if (ret) {
4003 break;
4004 }
4005 }
4006out:
4007 while (!list_empty(&nocow_ctx->inodes)) {
4008 struct scrub_nocow_inode *entry;
4009 entry = list_first_entry(&nocow_ctx->inodes,
4010 struct scrub_nocow_inode,
4011 list);
4012 list_del_init(&entry->list);
4013 kfree(entry);
4014 }
4015 if (trans && !IS_ERR(trans))
4016 btrfs_end_transaction(trans, root);
4017 if (not_written)
4018 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
4019 num_uncorrectable_read_errors);
4020
4021 btrfs_free_path(path);
4022 kfree(nocow_ctx);
4023
4024 scrub_pending_trans_workers_dec(sctx);
4025}
4026
4027static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
4028 u64 logical)
4029{
4030 struct extent_state *cached_state = NULL;
4031 struct btrfs_ordered_extent *ordered;
4032 struct extent_io_tree *io_tree;
4033 struct extent_map *em;
4034 u64 lockstart = start, lockend = start + len - 1;
4035 int ret = 0;
4036
4037 io_tree = &BTRFS_I(inode)->io_tree;
4038
4039 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
4040 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
4041 if (ordered) {
4042 btrfs_put_ordered_extent(ordered);
4043 ret = 1;
4044 goto out_unlock;
4045 }
4046
4047 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
4048 if (IS_ERR(em)) {
4049 ret = PTR_ERR(em);
4050 goto out_unlock;
4051 }
4052
4053
4054
4055
4056
4057 if (em->block_start > logical ||
4058 em->block_start + em->block_len < logical + len) {
4059 free_extent_map(em);
4060 ret = 1;
4061 goto out_unlock;
4062 }
4063 free_extent_map(em);
4064
4065out_unlock:
4066 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
4067 GFP_NOFS);
4068 return ret;
4069}
4070
4071static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
4072 struct scrub_copy_nocow_ctx *nocow_ctx)
4073{
4074 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
4075 struct btrfs_key key;
4076 struct inode *inode;
4077 struct page *page;
4078 struct btrfs_root *local_root;
4079 struct extent_io_tree *io_tree;
4080 u64 physical_for_dev_replace;
4081 u64 nocow_ctx_logical;
4082 u64 len = nocow_ctx->len;
4083 unsigned long index;
4084 int srcu_index;
4085 int ret = 0;
4086 int err = 0;
4087
4088 key.objectid = root;
4089 key.type = BTRFS_ROOT_ITEM_KEY;
4090 key.offset = (u64)-1;
4091
4092 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
4093
4094 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
4095 if (IS_ERR(local_root)) {
4096 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4097 return PTR_ERR(local_root);
4098 }
4099
4100 key.type = BTRFS_INODE_ITEM_KEY;
4101 key.objectid = inum;
4102 key.offset = 0;
4103 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
4104 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
4105 if (IS_ERR(inode))
4106 return PTR_ERR(inode);
4107
4108
4109 mutex_lock(&inode->i_mutex);
4110 inode_dio_wait(inode);
4111
4112 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
4113 io_tree = &BTRFS_I(inode)->io_tree;
4114 nocow_ctx_logical = nocow_ctx->logical;
4115
4116 ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
4117 if (ret) {
4118 ret = ret > 0 ? 0 : ret;
4119 goto out;
4120 }
4121
4122 while (len >= PAGE_CACHE_SIZE) {
4123 index = offset >> PAGE_CACHE_SHIFT;
4124again:
4125 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
4126 if (!page) {
4127 btrfs_err(fs_info, "find_or_create_page() failed");
4128 ret = -ENOMEM;
4129 goto out;
4130 }
4131
4132 if (PageUptodate(page)) {
4133 if (PageDirty(page))
4134 goto next_page;
4135 } else {
4136 ClearPageError(page);
4137 err = extent_read_full_page(io_tree, page,
4138 btrfs_get_extent,
4139 nocow_ctx->mirror_num);
4140 if (err) {
4141 ret = err;
4142 goto next_page;
4143 }
4144
4145 lock_page(page);
4146
4147
4148
4149
4150
4151
4152 if (page->mapping != inode->i_mapping) {
4153 unlock_page(page);
4154 page_cache_release(page);
4155 goto again;
4156 }
4157 if (!PageUptodate(page)) {
4158 ret = -EIO;
4159 goto next_page;
4160 }
4161 }
4162
4163 ret = check_extent_to_block(inode, offset, len,
4164 nocow_ctx_logical);
4165 if (ret) {
4166 ret = ret > 0 ? 0 : ret;
4167 goto next_page;
4168 }
4169
4170 err = write_page_nocow(nocow_ctx->sctx,
4171 physical_for_dev_replace, page);
4172 if (err)
4173 ret = err;
4174next_page:
4175 unlock_page(page);
4176 page_cache_release(page);
4177
4178 if (ret)
4179 break;
4180
4181 offset += PAGE_CACHE_SIZE;
4182 physical_for_dev_replace += PAGE_CACHE_SIZE;
4183 nocow_ctx_logical += PAGE_CACHE_SIZE;
4184 len -= PAGE_CACHE_SIZE;
4185 }
4186 ret = COPY_COMPLETE;
4187out:
4188 mutex_unlock(&inode->i_mutex);
4189 iput(inode);
4190 return ret;
4191}
4192
4193static int write_page_nocow(struct scrub_ctx *sctx,
4194 u64 physical_for_dev_replace, struct page *page)
4195{
4196 struct bio *bio;
4197 struct btrfs_device *dev;
4198 int ret;
4199
4200 dev = sctx->wr_ctx.tgtdev;
4201 if (!dev)
4202 return -EIO;
4203 if (!dev->bdev) {
4204 printk_ratelimited(KERN_WARNING
4205 "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
4206 return -EIO;
4207 }
4208 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
4209 if (!bio) {
4210 spin_lock(&sctx->stat_lock);
4211 sctx->stat.malloc_errors++;
4212 spin_unlock(&sctx->stat_lock);
4213 return -ENOMEM;
4214 }
4215 bio->bi_iter.bi_size = 0;
4216 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
4217 bio->bi_bdev = dev->bdev;
4218 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
4219 if (ret != PAGE_CACHE_SIZE) {
4220leave_with_eio:
4221 bio_put(bio);
4222 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
4223 return -EIO;
4224 }
4225
4226 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
4227 goto leave_with_eio;
4228
4229 bio_put(bio);
4230 return 0;
4231}
4232