1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45struct scrub_block;
46struct scrub_ctx;
47
48
49
50
51
52
53
54#define SCRUB_PAGES_PER_RD_BIO 32
55#define SCRUB_PAGES_PER_WR_BIO 32
56#define SCRUB_BIOS_PER_SCTX 64
57
58
59
60
61
62
63#define SCRUB_MAX_PAGES_PER_BLOCK 16
64
65struct scrub_page {
66 struct scrub_block *sblock;
67 struct page *page;
68 struct btrfs_device *dev;
69 u64 flags;
70 u64 generation;
71 u64 logical;
72 u64 physical;
73 u64 physical_for_dev_replace;
74 atomic_t ref_count;
75 struct {
76 unsigned int mirror_num:8;
77 unsigned int have_csum:1;
78 unsigned int io_error:1;
79 };
80 u8 csum[BTRFS_CSUM_SIZE];
81};
82
83struct scrub_bio {
84 int index;
85 struct scrub_ctx *sctx;
86 struct btrfs_device *dev;
87 struct bio *bio;
88 int err;
89 u64 logical;
90 u64 physical;
91#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
92 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
93#else
94 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
95#endif
96 int page_count;
97 int next_free;
98 struct btrfs_work work;
99};
100
101struct scrub_block {
102 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
103 int page_count;
104 atomic_t outstanding_pages;
105 atomic_t ref_count;
106 struct scrub_ctx *sctx;
107 struct {
108 unsigned int header_error:1;
109 unsigned int checksum_error:1;
110 unsigned int no_io_error_seen:1;
111 unsigned int generation_error:1;
112 };
113};
114
115struct scrub_wr_ctx {
116 struct scrub_bio *wr_curr_bio;
117 struct btrfs_device *tgtdev;
118 int pages_per_wr_bio;
119 atomic_t flush_all_writes;
120 struct mutex wr_lock;
121};
122
123struct scrub_ctx {
124 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
125 struct btrfs_root *dev_root;
126 int first_free;
127 int curr;
128 atomic_t bios_in_flight;
129 atomic_t workers_pending;
130 spinlock_t list_lock;
131 wait_queue_head_t list_wait;
132 u16 csum_size;
133 struct list_head csum_list;
134 atomic_t cancel_req;
135 int readonly;
136 int pages_per_rd_bio;
137 u32 sectorsize;
138 u32 nodesize;
139 u32 leafsize;
140
141 int is_dev_replace;
142 struct scrub_wr_ctx wr_ctx;
143
144
145
146
147 struct btrfs_scrub_progress stat;
148 spinlock_t stat_lock;
149};
150
151struct scrub_fixup_nodatasum {
152 struct scrub_ctx *sctx;
153 struct btrfs_device *dev;
154 u64 logical;
155 struct btrfs_root *root;
156 struct btrfs_work work;
157 int mirror_num;
158};
159
160struct scrub_copy_nocow_ctx {
161 struct scrub_ctx *sctx;
162 u64 logical;
163 u64 len;
164 int mirror_num;
165 u64 physical_for_dev_replace;
166 struct btrfs_work work;
167};
168
169struct scrub_warning {
170 struct btrfs_path *path;
171 u64 extent_item_size;
172 char *scratch_buf;
173 char *msg_buf;
174 const char *errstr;
175 sector_t sector;
176 u64 logical;
177 struct btrfs_device *dev;
178 int msg_bufsize;
179 int scratch_bufsize;
180};
181
182
183static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
184static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
185static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
186static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
187static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
188static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
189 struct btrfs_fs_info *fs_info,
190 struct scrub_block *original_sblock,
191 u64 length, u64 logical,
192 struct scrub_block *sblocks_for_recheck);
193static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
194 struct scrub_block *sblock, int is_metadata,
195 int have_csum, u8 *csum, u64 generation,
196 u16 csum_size);
197static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
198 struct scrub_block *sblock,
199 int is_metadata, int have_csum,
200 const u8 *csum, u64 generation,
201 u16 csum_size);
202static void scrub_complete_bio_end_io(struct bio *bio, int err);
203static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
204 struct scrub_block *sblock_good,
205 int force_write);
206static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
207 struct scrub_block *sblock_good,
208 int page_num, int force_write);
209static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
210static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
211 int page_num);
212static int scrub_checksum_data(struct scrub_block *sblock);
213static int scrub_checksum_tree_block(struct scrub_block *sblock);
214static int scrub_checksum_super(struct scrub_block *sblock);
215static void scrub_block_get(struct scrub_block *sblock);
216static void scrub_block_put(struct scrub_block *sblock);
217static void scrub_page_get(struct scrub_page *spage);
218static void scrub_page_put(struct scrub_page *spage);
219static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
220 struct scrub_page *spage);
221static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
222 u64 physical, struct btrfs_device *dev, u64 flags,
223 u64 gen, int mirror_num, u8 *csum, int force,
224 u64 physical_for_dev_replace);
225static void scrub_bio_end_io(struct bio *bio, int err);
226static void scrub_bio_end_io_worker(struct btrfs_work *work);
227static void scrub_block_complete(struct scrub_block *sblock);
228static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
229 u64 extent_logical, u64 extent_len,
230 u64 *extent_physical,
231 struct btrfs_device **extent_dev,
232 int *extent_mirror_num);
233static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
234 struct scrub_wr_ctx *wr_ctx,
235 struct btrfs_fs_info *fs_info,
236 struct btrfs_device *dev,
237 int is_dev_replace);
238static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
239static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
240 struct scrub_page *spage);
241static void scrub_wr_submit(struct scrub_ctx *sctx);
242static void scrub_wr_bio_end_io(struct bio *bio, int err);
243static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
244static int write_page_nocow(struct scrub_ctx *sctx,
245 u64 physical_for_dev_replace, struct page *page);
246static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
247 void *ctx);
248static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
249 int mirror_num, u64 physical_for_dev_replace);
250static void copy_nocow_pages_worker(struct btrfs_work *work);
251
252
253static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
254{
255 atomic_inc(&sctx->bios_in_flight);
256}
257
258static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
259{
260 atomic_dec(&sctx->bios_in_flight);
261 wake_up(&sctx->list_wait);
262}
263
264
265
266
267
268static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
269{
270 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
271
272
273
274
275
276
277
278
279
280
281 mutex_lock(&fs_info->scrub_lock);
282 atomic_inc(&fs_info->scrubs_running);
283 atomic_inc(&fs_info->scrubs_paused);
284 mutex_unlock(&fs_info->scrub_lock);
285 atomic_inc(&sctx->workers_pending);
286}
287
288
289static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
290{
291 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
292
293
294
295
296
297 mutex_lock(&fs_info->scrub_lock);
298 atomic_dec(&fs_info->scrubs_running);
299 atomic_dec(&fs_info->scrubs_paused);
300 mutex_unlock(&fs_info->scrub_lock);
301 atomic_dec(&sctx->workers_pending);
302 wake_up(&fs_info->scrub_pause_wait);
303 wake_up(&sctx->list_wait);
304}
305
306static void scrub_free_csums(struct scrub_ctx *sctx)
307{
308 while (!list_empty(&sctx->csum_list)) {
309 struct btrfs_ordered_sum *sum;
310 sum = list_first_entry(&sctx->csum_list,
311 struct btrfs_ordered_sum, list);
312 list_del(&sum->list);
313 kfree(sum);
314 }
315}
316
317static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
318{
319 int i;
320
321 if (!sctx)
322 return;
323
324 scrub_free_wr_ctx(&sctx->wr_ctx);
325
326
327 if (sctx->curr != -1) {
328 struct scrub_bio *sbio = sctx->bios[sctx->curr];
329
330 for (i = 0; i < sbio->page_count; i++) {
331 WARN_ON(!sbio->pagev[i]->page);
332 scrub_block_put(sbio->pagev[i]->sblock);
333 }
334 bio_put(sbio->bio);
335 }
336
337 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
338 struct scrub_bio *sbio = sctx->bios[i];
339
340 if (!sbio)
341 break;
342 kfree(sbio);
343 }
344
345 scrub_free_csums(sctx);
346 kfree(sctx);
347}
348
349static noinline_for_stack
350struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
351{
352 struct scrub_ctx *sctx;
353 int i;
354 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
355 int pages_per_rd_bio;
356 int ret;
357
358
359
360
361
362
363
364
365 if (dev->bdev)
366 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
367 bio_get_nr_vecs(dev->bdev));
368 else
369 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
370 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
371 if (!sctx)
372 goto nomem;
373 sctx->is_dev_replace = is_dev_replace;
374 sctx->pages_per_rd_bio = pages_per_rd_bio;
375 sctx->curr = -1;
376 sctx->dev_root = dev->dev_root;
377 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
378 struct scrub_bio *sbio;
379
380 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
381 if (!sbio)
382 goto nomem;
383 sctx->bios[i] = sbio;
384
385 sbio->index = i;
386 sbio->sctx = sctx;
387 sbio->page_count = 0;
388 sbio->work.func = scrub_bio_end_io_worker;
389
390 if (i != SCRUB_BIOS_PER_SCTX - 1)
391 sctx->bios[i]->next_free = i + 1;
392 else
393 sctx->bios[i]->next_free = -1;
394 }
395 sctx->first_free = 0;
396 sctx->nodesize = dev->dev_root->nodesize;
397 sctx->leafsize = dev->dev_root->leafsize;
398 sctx->sectorsize = dev->dev_root->sectorsize;
399 atomic_set(&sctx->bios_in_flight, 0);
400 atomic_set(&sctx->workers_pending, 0);
401 atomic_set(&sctx->cancel_req, 0);
402 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
403 INIT_LIST_HEAD(&sctx->csum_list);
404
405 spin_lock_init(&sctx->list_lock);
406 spin_lock_init(&sctx->stat_lock);
407 init_waitqueue_head(&sctx->list_wait);
408
409 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
410 fs_info->dev_replace.tgtdev, is_dev_replace);
411 if (ret) {
412 scrub_free_ctx(sctx);
413 return ERR_PTR(ret);
414 }
415 return sctx;
416
417nomem:
418 scrub_free_ctx(sctx);
419 return ERR_PTR(-ENOMEM);
420}
421
422static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
423 void *warn_ctx)
424{
425 u64 isize;
426 u32 nlink;
427 int ret;
428 int i;
429 struct extent_buffer *eb;
430 struct btrfs_inode_item *inode_item;
431 struct scrub_warning *swarn = warn_ctx;
432 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
433 struct inode_fs_paths *ipath = NULL;
434 struct btrfs_root *local_root;
435 struct btrfs_key root_key;
436
437 root_key.objectid = root;
438 root_key.type = BTRFS_ROOT_ITEM_KEY;
439 root_key.offset = (u64)-1;
440 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
441 if (IS_ERR(local_root)) {
442 ret = PTR_ERR(local_root);
443 goto err;
444 }
445
446 ret = inode_item_info(inum, 0, local_root, swarn->path);
447 if (ret) {
448 btrfs_release_path(swarn->path);
449 goto err;
450 }
451
452 eb = swarn->path->nodes[0];
453 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
454 struct btrfs_inode_item);
455 isize = btrfs_inode_size(eb, inode_item);
456 nlink = btrfs_inode_nlink(eb, inode_item);
457 btrfs_release_path(swarn->path);
458
459 ipath = init_ipath(4096, local_root, swarn->path);
460 if (IS_ERR(ipath)) {
461 ret = PTR_ERR(ipath);
462 ipath = NULL;
463 goto err;
464 }
465 ret = paths_from_inode(inum, ipath);
466
467 if (ret < 0)
468 goto err;
469
470
471
472
473
474 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
475 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
476 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
477 "length %llu, links %u (path: %s)\n", swarn->errstr,
478 swarn->logical, rcu_str_deref(swarn->dev->name),
479 (unsigned long long)swarn->sector, root, inum, offset,
480 min(isize - offset, (u64)PAGE_SIZE), nlink,
481 (char *)(unsigned long)ipath->fspath->val[i]);
482
483 free_ipath(ipath);
484 return 0;
485
486err:
487 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
488 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
489 "resolving failed with ret=%d\n", swarn->errstr,
490 swarn->logical, rcu_str_deref(swarn->dev->name),
491 (unsigned long long)swarn->sector, root, inum, offset, ret);
492
493 free_ipath(ipath);
494 return 0;
495}
496
497static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
498{
499 struct btrfs_device *dev;
500 struct btrfs_fs_info *fs_info;
501 struct btrfs_path *path;
502 struct btrfs_key found_key;
503 struct extent_buffer *eb;
504 struct btrfs_extent_item *ei;
505 struct scrub_warning swarn;
506 unsigned long ptr = 0;
507 u64 extent_item_pos;
508 u64 flags = 0;
509 u64 ref_root;
510 u32 item_size;
511 u8 ref_level;
512 const int bufsize = 4096;
513 int ret;
514
515 WARN_ON(sblock->page_count < 1);
516 dev = sblock->pagev[0]->dev;
517 fs_info = sblock->sctx->dev_root->fs_info;
518
519 path = btrfs_alloc_path();
520
521 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
522 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
523 swarn.sector = (sblock->pagev[0]->physical) >> 9;
524 swarn.logical = sblock->pagev[0]->logical;
525 swarn.errstr = errstr;
526 swarn.dev = NULL;
527 swarn.msg_bufsize = bufsize;
528 swarn.scratch_bufsize = bufsize;
529
530 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
531 goto out;
532
533 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
534 &flags);
535 if (ret < 0)
536 goto out;
537
538 extent_item_pos = swarn.logical - found_key.objectid;
539 swarn.extent_item_size = found_key.offset;
540
541 eb = path->nodes[0];
542 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
543 item_size = btrfs_item_size_nr(eb, path->slots[0]);
544 btrfs_release_path(path);
545
546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
547 do {
548 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
549 &ref_root, &ref_level);
550 printk_in_rcu(KERN_WARNING
551 "btrfs: %s at logical %llu on dev %s, "
552 "sector %llu: metadata %s (level %d) in tree "
553 "%llu\n", errstr, swarn.logical,
554 rcu_str_deref(dev->name),
555 (unsigned long long)swarn.sector,
556 ref_level ? "node" : "leaf",
557 ret < 0 ? -1 : ref_level,
558 ret < 0 ? -1 : ref_root);
559 } while (ret != 1);
560 } else {
561 swarn.path = path;
562 swarn.dev = dev;
563 iterate_extent_inodes(fs_info, found_key.objectid,
564 extent_item_pos, 1,
565 scrub_print_warning_inode, &swarn);
566 }
567
568out:
569 btrfs_free_path(path);
570 kfree(swarn.scratch_buf);
571 kfree(swarn.msg_buf);
572}
573
574static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
575{
576 struct page *page = NULL;
577 unsigned long index;
578 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
579 int ret;
580 int corrected = 0;
581 struct btrfs_key key;
582 struct inode *inode = NULL;
583 struct btrfs_fs_info *fs_info;
584 u64 end = offset + PAGE_SIZE - 1;
585 struct btrfs_root *local_root;
586 int srcu_index;
587
588 key.objectid = root;
589 key.type = BTRFS_ROOT_ITEM_KEY;
590 key.offset = (u64)-1;
591
592 fs_info = fixup->root->fs_info;
593 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
594
595 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
596 if (IS_ERR(local_root)) {
597 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
598 return PTR_ERR(local_root);
599 }
600
601 key.type = BTRFS_INODE_ITEM_KEY;
602 key.objectid = inum;
603 key.offset = 0;
604 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
605 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
606 if (IS_ERR(inode))
607 return PTR_ERR(inode);
608
609 index = offset >> PAGE_CACHE_SHIFT;
610
611 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
612 if (!page) {
613 ret = -ENOMEM;
614 goto out;
615 }
616
617 if (PageUptodate(page)) {
618 if (PageDirty(page)) {
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635 ret = -EIO;
636 goto out;
637 }
638 fs_info = BTRFS_I(inode)->root->fs_info;
639 ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
640 fixup->logical, page,
641 fixup->mirror_num);
642 unlock_page(page);
643 corrected = !ret;
644 } else {
645
646
647
648
649
650 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
651 EXTENT_DAMAGED, GFP_NOFS);
652 if (ret) {
653
654 WARN_ON(ret > 0);
655 if (ret > 0)
656 ret = -EFAULT;
657 goto out;
658 }
659
660 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
661 btrfs_get_extent,
662 fixup->mirror_num);
663 wait_on_page_locked(page);
664
665 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
666 end, EXTENT_DAMAGED, 0, NULL);
667 if (!corrected)
668 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
669 EXTENT_DAMAGED, GFP_NOFS);
670 }
671
672out:
673 if (page)
674 put_page(page);
675 if (inode)
676 iput(inode);
677
678 if (ret < 0)
679 return ret;
680
681 if (ret == 0 && corrected) {
682
683
684
685
686 return 1;
687 }
688
689 return -EIO;
690}
691
692static void scrub_fixup_nodatasum(struct btrfs_work *work)
693{
694 int ret;
695 struct scrub_fixup_nodatasum *fixup;
696 struct scrub_ctx *sctx;
697 struct btrfs_trans_handle *trans = NULL;
698 struct btrfs_fs_info *fs_info;
699 struct btrfs_path *path;
700 int uncorrectable = 0;
701
702 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
703 sctx = fixup->sctx;
704 fs_info = fixup->root->fs_info;
705
706 path = btrfs_alloc_path();
707 if (!path) {
708 spin_lock(&sctx->stat_lock);
709 ++sctx->stat.malloc_errors;
710 spin_unlock(&sctx->stat_lock);
711 uncorrectable = 1;
712 goto out;
713 }
714
715 trans = btrfs_join_transaction(fixup->root);
716 if (IS_ERR(trans)) {
717 uncorrectable = 1;
718 goto out;
719 }
720
721
722
723
724
725
726
727
728
729
730 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
731 path, scrub_fixup_readpage,
732 fixup);
733 if (ret < 0) {
734 uncorrectable = 1;
735 goto out;
736 }
737 WARN_ON(ret != 1);
738
739 spin_lock(&sctx->stat_lock);
740 ++sctx->stat.corrected_errors;
741 spin_unlock(&sctx->stat_lock);
742
743out:
744 if (trans && !IS_ERR(trans))
745 btrfs_end_transaction(trans, fixup->root);
746 if (uncorrectable) {
747 spin_lock(&sctx->stat_lock);
748 ++sctx->stat.uncorrectable_errors;
749 spin_unlock(&sctx->stat_lock);
750 btrfs_dev_replace_stats_inc(
751 &sctx->dev_root->fs_info->dev_replace.
752 num_uncorrectable_read_errors);
753 printk_ratelimited_in_rcu(KERN_ERR
754 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
755 (unsigned long long)fixup->logical,
756 rcu_str_deref(fixup->dev->name));
757 }
758
759 btrfs_free_path(path);
760 kfree(fixup);
761
762 scrub_pending_trans_workers_dec(sctx);
763}
764
765
766
767
768
769
770
771
772
773static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
774{
775 struct scrub_ctx *sctx = sblock_to_check->sctx;
776 struct btrfs_device *dev;
777 struct btrfs_fs_info *fs_info;
778 u64 length;
779 u64 logical;
780 u64 generation;
781 unsigned int failed_mirror_index;
782 unsigned int is_metadata;
783 unsigned int have_csum;
784 u8 *csum;
785 struct scrub_block *sblocks_for_recheck;
786 struct scrub_block *sblock_bad;
787 int ret;
788 int mirror_index;
789 int page_num;
790 int success;
791 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
792 DEFAULT_RATELIMIT_BURST);
793
794 BUG_ON(sblock_to_check->page_count < 1);
795 fs_info = sctx->dev_root->fs_info;
796 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
797
798
799
800
801
802 spin_lock(&sctx->stat_lock);
803 ++sctx->stat.super_errors;
804 spin_unlock(&sctx->stat_lock);
805 return 0;
806 }
807 length = sblock_to_check->page_count * PAGE_SIZE;
808 logical = sblock_to_check->pagev[0]->logical;
809 generation = sblock_to_check->pagev[0]->generation;
810 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
811 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
812 is_metadata = !(sblock_to_check->pagev[0]->flags &
813 BTRFS_EXTENT_FLAG_DATA);
814 have_csum = sblock_to_check->pagev[0]->have_csum;
815 csum = sblock_to_check->pagev[0]->csum;
816 dev = sblock_to_check->pagev[0]->dev;
817
818 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
819 sblocks_for_recheck = NULL;
820 goto nodatasum_case;
821 }
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
853 sizeof(*sblocks_for_recheck),
854 GFP_NOFS);
855 if (!sblocks_for_recheck) {
856 spin_lock(&sctx->stat_lock);
857 sctx->stat.malloc_errors++;
858 sctx->stat.read_errors++;
859 sctx->stat.uncorrectable_errors++;
860 spin_unlock(&sctx->stat_lock);
861 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
862 goto out;
863 }
864
865
866 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
867 logical, sblocks_for_recheck);
868 if (ret) {
869 spin_lock(&sctx->stat_lock);
870 sctx->stat.read_errors++;
871 sctx->stat.uncorrectable_errors++;
872 spin_unlock(&sctx->stat_lock);
873 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
874 goto out;
875 }
876 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
877 sblock_bad = sblocks_for_recheck + failed_mirror_index;
878
879
880 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
881 csum, generation, sctx->csum_size);
882
883 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
884 sblock_bad->no_io_error_seen) {
885
886
887
888
889
890
891
892
893 spin_lock(&sctx->stat_lock);
894 sctx->stat.unverified_errors++;
895 spin_unlock(&sctx->stat_lock);
896
897 if (sctx->is_dev_replace)
898 scrub_write_block_to_dev_replace(sblock_bad);
899 goto out;
900 }
901
902 if (!sblock_bad->no_io_error_seen) {
903 spin_lock(&sctx->stat_lock);
904 sctx->stat.read_errors++;
905 spin_unlock(&sctx->stat_lock);
906 if (__ratelimit(&_rs))
907 scrub_print_warning("i/o error", sblock_to_check);
908 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
909 } else if (sblock_bad->checksum_error) {
910 spin_lock(&sctx->stat_lock);
911 sctx->stat.csum_errors++;
912 spin_unlock(&sctx->stat_lock);
913 if (__ratelimit(&_rs))
914 scrub_print_warning("checksum error", sblock_to_check);
915 btrfs_dev_stat_inc_and_print(dev,
916 BTRFS_DEV_STAT_CORRUPTION_ERRS);
917 } else if (sblock_bad->header_error) {
918 spin_lock(&sctx->stat_lock);
919 sctx->stat.verify_errors++;
920 spin_unlock(&sctx->stat_lock);
921 if (__ratelimit(&_rs))
922 scrub_print_warning("checksum/header error",
923 sblock_to_check);
924 if (sblock_bad->generation_error)
925 btrfs_dev_stat_inc_and_print(dev,
926 BTRFS_DEV_STAT_GENERATION_ERRS);
927 else
928 btrfs_dev_stat_inc_and_print(dev,
929 BTRFS_DEV_STAT_CORRUPTION_ERRS);
930 }
931
932 if (sctx->readonly && !sctx->is_dev_replace)
933 goto did_not_correct_error;
934
935 if (!is_metadata && !have_csum) {
936 struct scrub_fixup_nodatasum *fixup_nodatasum;
937
938nodatasum_case:
939 WARN_ON(sctx->is_dev_replace);
940
941
942
943
944
945
946
947
948 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
949 if (!fixup_nodatasum)
950 goto did_not_correct_error;
951 fixup_nodatasum->sctx = sctx;
952 fixup_nodatasum->dev = dev;
953 fixup_nodatasum->logical = logical;
954 fixup_nodatasum->root = fs_info->extent_root;
955 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
956 scrub_pending_trans_workers_inc(sctx);
957 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
958 btrfs_queue_worker(&fs_info->scrub_workers,
959 &fixup_nodatasum->work);
960 goto out;
961 }
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978 for (mirror_index = 0;
979 mirror_index < BTRFS_MAX_MIRRORS &&
980 sblocks_for_recheck[mirror_index].page_count > 0;
981 mirror_index++) {
982 struct scrub_block *sblock_other;
983
984 if (mirror_index == failed_mirror_index)
985 continue;
986 sblock_other = sblocks_for_recheck + mirror_index;
987
988
989 scrub_recheck_block(fs_info, sblock_other, is_metadata,
990 have_csum, csum, generation,
991 sctx->csum_size);
992
993 if (!sblock_other->header_error &&
994 !sblock_other->checksum_error &&
995 sblock_other->no_io_error_seen) {
996 if (sctx->is_dev_replace) {
997 scrub_write_block_to_dev_replace(sblock_other);
998 } else {
999 int force_write = is_metadata || have_csum;
1000
1001 ret = scrub_repair_block_from_good_copy(
1002 sblock_bad, sblock_other,
1003 force_write);
1004 }
1005 if (0 == ret)
1006 goto corrected_error;
1007 }
1008 }
1009
1010
1011
1012
1013 if (sctx->is_dev_replace) {
1014 success = 1;
1015 for (page_num = 0; page_num < sblock_bad->page_count;
1016 page_num++) {
1017 int sub_success;
1018
1019 sub_success = 0;
1020 for (mirror_index = 0;
1021 mirror_index < BTRFS_MAX_MIRRORS &&
1022 sblocks_for_recheck[mirror_index].page_count > 0;
1023 mirror_index++) {
1024 struct scrub_block *sblock_other =
1025 sblocks_for_recheck + mirror_index;
1026 struct scrub_page *page_other =
1027 sblock_other->pagev[page_num];
1028
1029 if (!page_other->io_error) {
1030 ret = scrub_write_page_to_dev_replace(
1031 sblock_other, page_num);
1032 if (ret == 0) {
1033
1034 sub_success = 1;
1035 break;
1036 } else {
1037 btrfs_dev_replace_stats_inc(
1038 &sctx->dev_root->
1039 fs_info->dev_replace.
1040 num_write_errors);
1041 }
1042 }
1043 }
1044
1045 if (!sub_success) {
1046
1047
1048
1049
1050
1051
1052
1053 success = 0;
1054 ret = scrub_write_page_to_dev_replace(
1055 sblock_bad, page_num);
1056 if (ret)
1057 btrfs_dev_replace_stats_inc(
1058 &sctx->dev_root->fs_info->
1059 dev_replace.num_write_errors);
1060 }
1061 }
1062
1063 goto out;
1064 }
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093 if (sblock_bad->no_io_error_seen)
1094 goto did_not_correct_error;
1095
1096 success = 1;
1097 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1098 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1099
1100 if (!page_bad->io_error)
1101 continue;
1102
1103 for (mirror_index = 0;
1104 mirror_index < BTRFS_MAX_MIRRORS &&
1105 sblocks_for_recheck[mirror_index].page_count > 0;
1106 mirror_index++) {
1107 struct scrub_block *sblock_other = sblocks_for_recheck +
1108 mirror_index;
1109 struct scrub_page *page_other = sblock_other->pagev[
1110 page_num];
1111
1112 if (!page_other->io_error) {
1113 ret = scrub_repair_page_from_good_copy(
1114 sblock_bad, sblock_other, page_num, 0);
1115 if (0 == ret) {
1116 page_bad->io_error = 0;
1117 break;
1118 }
1119 }
1120 }
1121
1122 if (page_bad->io_error) {
1123
1124 success = 0;
1125 }
1126 }
1127
1128 if (success) {
1129 if (is_metadata || have_csum) {
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139 scrub_recheck_block(fs_info, sblock_bad,
1140 is_metadata, have_csum, csum,
1141 generation, sctx->csum_size);
1142 if (!sblock_bad->header_error &&
1143 !sblock_bad->checksum_error &&
1144 sblock_bad->no_io_error_seen)
1145 goto corrected_error;
1146 else
1147 goto did_not_correct_error;
1148 } else {
1149corrected_error:
1150 spin_lock(&sctx->stat_lock);
1151 sctx->stat.corrected_errors++;
1152 spin_unlock(&sctx->stat_lock);
1153 printk_ratelimited_in_rcu(KERN_ERR
1154 "btrfs: fixed up error at logical %llu on dev %s\n",
1155 (unsigned long long)logical,
1156 rcu_str_deref(dev->name));
1157 }
1158 } else {
1159did_not_correct_error:
1160 spin_lock(&sctx->stat_lock);
1161 sctx->stat.uncorrectable_errors++;
1162 spin_unlock(&sctx->stat_lock);
1163 printk_ratelimited_in_rcu(KERN_ERR
1164 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
1165 (unsigned long long)logical,
1166 rcu_str_deref(dev->name));
1167 }
1168
1169out:
1170 if (sblocks_for_recheck) {
1171 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1172 mirror_index++) {
1173 struct scrub_block *sblock = sblocks_for_recheck +
1174 mirror_index;
1175 int page_index;
1176
1177 for (page_index = 0; page_index < sblock->page_count;
1178 page_index++) {
1179 sblock->pagev[page_index]->sblock = NULL;
1180 scrub_page_put(sblock->pagev[page_index]);
1181 }
1182 }
1183 kfree(sblocks_for_recheck);
1184 }
1185
1186 return 0;
1187}
1188
1189static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1190 struct btrfs_fs_info *fs_info,
1191 struct scrub_block *original_sblock,
1192 u64 length, u64 logical,
1193 struct scrub_block *sblocks_for_recheck)
1194{
1195 int page_index;
1196 int mirror_index;
1197 int ret;
1198
1199
1200
1201
1202
1203
1204
1205 page_index = 0;
1206 while (length > 0) {
1207 u64 sublen = min_t(u64, length, PAGE_SIZE);
1208 u64 mapped_length = sublen;
1209 struct btrfs_bio *bbio = NULL;
1210
1211
1212
1213
1214
1215 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
1216 &mapped_length, &bbio, 0);
1217 if (ret || !bbio || mapped_length < sublen) {
1218 kfree(bbio);
1219 return -EIO;
1220 }
1221
1222 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1223 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
1224 mirror_index++) {
1225 struct scrub_block *sblock;
1226 struct scrub_page *page;
1227
1228 if (mirror_index >= BTRFS_MAX_MIRRORS)
1229 continue;
1230
1231 sblock = sblocks_for_recheck + mirror_index;
1232 sblock->sctx = sctx;
1233 page = kzalloc(sizeof(*page), GFP_NOFS);
1234 if (!page) {
1235leave_nomem:
1236 spin_lock(&sctx->stat_lock);
1237 sctx->stat.malloc_errors++;
1238 spin_unlock(&sctx->stat_lock);
1239 kfree(bbio);
1240 return -ENOMEM;
1241 }
1242 scrub_page_get(page);
1243 sblock->pagev[page_index] = page;
1244 page->logical = logical;
1245 page->physical = bbio->stripes[mirror_index].physical;
1246 BUG_ON(page_index >= original_sblock->page_count);
1247 page->physical_for_dev_replace =
1248 original_sblock->pagev[page_index]->
1249 physical_for_dev_replace;
1250
1251 page->dev = bbio->stripes[mirror_index].dev;
1252 page->mirror_num = mirror_index + 1;
1253 sblock->page_count++;
1254 page->page = alloc_page(GFP_NOFS);
1255 if (!page->page)
1256 goto leave_nomem;
1257 }
1258 kfree(bbio);
1259 length -= sublen;
1260 logical += sublen;
1261 page_index++;
1262 }
1263
1264 return 0;
1265}
1266
1267
1268
1269
1270
1271
1272
1273
1274static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1275 struct scrub_block *sblock, int is_metadata,
1276 int have_csum, u8 *csum, u64 generation,
1277 u16 csum_size)
1278{
1279 int page_num;
1280
1281 sblock->no_io_error_seen = 1;
1282 sblock->header_error = 0;
1283 sblock->checksum_error = 0;
1284
1285 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1286 struct bio *bio;
1287 struct scrub_page *page = sblock->pagev[page_num];
1288 DECLARE_COMPLETION_ONSTACK(complete);
1289
1290 if (page->dev->bdev == NULL) {
1291 page->io_error = 1;
1292 sblock->no_io_error_seen = 0;
1293 continue;
1294 }
1295
1296 WARN_ON(!page->page);
1297 bio = bio_alloc(GFP_NOFS, 1);
1298 if (!bio) {
1299 page->io_error = 1;
1300 sblock->no_io_error_seen = 0;
1301 continue;
1302 }
1303 bio->bi_bdev = page->dev->bdev;
1304 bio->bi_sector = page->physical >> 9;
1305 bio->bi_end_io = scrub_complete_bio_end_io;
1306 bio->bi_private = &complete;
1307
1308 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1309 btrfsic_submit_bio(READ, bio);
1310
1311
1312 wait_for_completion(&complete);
1313
1314 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
1315 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1316 sblock->no_io_error_seen = 0;
1317 bio_put(bio);
1318 }
1319
1320 if (sblock->no_io_error_seen)
1321 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1322 have_csum, csum, generation,
1323 csum_size);
1324
1325 return;
1326}
1327
1328static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1329 struct scrub_block *sblock,
1330 int is_metadata, int have_csum,
1331 const u8 *csum, u64 generation,
1332 u16 csum_size)
1333{
1334 int page_num;
1335 u8 calculated_csum[BTRFS_CSUM_SIZE];
1336 u32 crc = ~(u32)0;
1337 struct btrfs_root *root = fs_info->extent_root;
1338 void *mapped_buffer;
1339
1340 WARN_ON(!sblock->pagev[0]->page);
1341 if (is_metadata) {
1342 struct btrfs_header *h;
1343
1344 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1345 h = (struct btrfs_header *)mapped_buffer;
1346
1347 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
1348 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1349 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1350 BTRFS_UUID_SIZE)) {
1351 sblock->header_error = 1;
1352 } else if (generation != le64_to_cpu(h->generation)) {
1353 sblock->header_error = 1;
1354 sblock->generation_error = 1;
1355 }
1356 csum = h->csum;
1357 } else {
1358 if (!have_csum)
1359 return;
1360
1361 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1362 }
1363
1364 for (page_num = 0;;) {
1365 if (page_num == 0 && is_metadata)
1366 crc = btrfs_csum_data(root,
1367 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1368 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1369 else
1370 crc = btrfs_csum_data(root, mapped_buffer, crc,
1371 PAGE_SIZE);
1372
1373 kunmap_atomic(mapped_buffer);
1374 page_num++;
1375 if (page_num >= sblock->page_count)
1376 break;
1377 WARN_ON(!sblock->pagev[page_num]->page);
1378
1379 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1380 }
1381
1382 btrfs_csum_final(crc, calculated_csum);
1383 if (memcmp(calculated_csum, csum, csum_size))
1384 sblock->checksum_error = 1;
1385}
1386
1387static void scrub_complete_bio_end_io(struct bio *bio, int err)
1388{
1389 complete((struct completion *)bio->bi_private);
1390}
1391
1392static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1393 struct scrub_block *sblock_good,
1394 int force_write)
1395{
1396 int page_num;
1397 int ret = 0;
1398
1399 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1400 int ret_sub;
1401
1402 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1403 sblock_good,
1404 page_num,
1405 force_write);
1406 if (ret_sub)
1407 ret = ret_sub;
1408 }
1409
1410 return ret;
1411}
1412
1413static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1414 struct scrub_block *sblock_good,
1415 int page_num, int force_write)
1416{
1417 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1418 struct scrub_page *page_good = sblock_good->pagev[page_num];
1419
1420 BUG_ON(page_bad->page == NULL);
1421 BUG_ON(page_good->page == NULL);
1422 if (force_write || sblock_bad->header_error ||
1423 sblock_bad->checksum_error || page_bad->io_error) {
1424 struct bio *bio;
1425 int ret;
1426 DECLARE_COMPLETION_ONSTACK(complete);
1427
1428 if (!page_bad->dev->bdev) {
1429 printk_ratelimited(KERN_WARNING
1430 "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
1431 return -EIO;
1432 }
1433
1434 bio = bio_alloc(GFP_NOFS, 1);
1435 if (!bio)
1436 return -EIO;
1437 bio->bi_bdev = page_bad->dev->bdev;
1438 bio->bi_sector = page_bad->physical >> 9;
1439 bio->bi_end_io = scrub_complete_bio_end_io;
1440 bio->bi_private = &complete;
1441
1442 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1443 if (PAGE_SIZE != ret) {
1444 bio_put(bio);
1445 return -EIO;
1446 }
1447 btrfsic_submit_bio(WRITE, bio);
1448
1449
1450 wait_for_completion(&complete);
1451 if (!bio_flagged(bio, BIO_UPTODATE)) {
1452 btrfs_dev_stat_inc_and_print(page_bad->dev,
1453 BTRFS_DEV_STAT_WRITE_ERRS);
1454 btrfs_dev_replace_stats_inc(
1455 &sblock_bad->sctx->dev_root->fs_info->
1456 dev_replace.num_write_errors);
1457 bio_put(bio);
1458 return -EIO;
1459 }
1460 bio_put(bio);
1461 }
1462
1463 return 0;
1464}
1465
1466static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1467{
1468 int page_num;
1469
1470 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1471 int ret;
1472
1473 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1474 if (ret)
1475 btrfs_dev_replace_stats_inc(
1476 &sblock->sctx->dev_root->fs_info->dev_replace.
1477 num_write_errors);
1478 }
1479}
1480
1481static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1482 int page_num)
1483{
1484 struct scrub_page *spage = sblock->pagev[page_num];
1485
1486 BUG_ON(spage->page == NULL);
1487 if (spage->io_error) {
1488 void *mapped_buffer = kmap_atomic(spage->page);
1489
1490 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
1491 flush_dcache_page(spage->page);
1492 kunmap_atomic(mapped_buffer);
1493 }
1494 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1495}
1496
1497static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1498 struct scrub_page *spage)
1499{
1500 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1501 struct scrub_bio *sbio;
1502 int ret;
1503
1504 mutex_lock(&wr_ctx->wr_lock);
1505again:
1506 if (!wr_ctx->wr_curr_bio) {
1507 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1508 GFP_NOFS);
1509 if (!wr_ctx->wr_curr_bio) {
1510 mutex_unlock(&wr_ctx->wr_lock);
1511 return -ENOMEM;
1512 }
1513 wr_ctx->wr_curr_bio->sctx = sctx;
1514 wr_ctx->wr_curr_bio->page_count = 0;
1515 }
1516 sbio = wr_ctx->wr_curr_bio;
1517 if (sbio->page_count == 0) {
1518 struct bio *bio;
1519
1520 sbio->physical = spage->physical_for_dev_replace;
1521 sbio->logical = spage->logical;
1522 sbio->dev = wr_ctx->tgtdev;
1523 bio = sbio->bio;
1524 if (!bio) {
1525 bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1526 if (!bio) {
1527 mutex_unlock(&wr_ctx->wr_lock);
1528 return -ENOMEM;
1529 }
1530 sbio->bio = bio;
1531 }
1532
1533 bio->bi_private = sbio;
1534 bio->bi_end_io = scrub_wr_bio_end_io;
1535 bio->bi_bdev = sbio->dev->bdev;
1536 bio->bi_sector = sbio->physical >> 9;
1537 sbio->err = 0;
1538 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1539 spage->physical_for_dev_replace ||
1540 sbio->logical + sbio->page_count * PAGE_SIZE !=
1541 spage->logical) {
1542 scrub_wr_submit(sctx);
1543 goto again;
1544 }
1545
1546 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1547 if (ret != PAGE_SIZE) {
1548 if (sbio->page_count < 1) {
1549 bio_put(sbio->bio);
1550 sbio->bio = NULL;
1551 mutex_unlock(&wr_ctx->wr_lock);
1552 return -EIO;
1553 }
1554 scrub_wr_submit(sctx);
1555 goto again;
1556 }
1557
1558 sbio->pagev[sbio->page_count] = spage;
1559 scrub_page_get(spage);
1560 sbio->page_count++;
1561 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1562 scrub_wr_submit(sctx);
1563 mutex_unlock(&wr_ctx->wr_lock);
1564
1565 return 0;
1566}
1567
1568static void scrub_wr_submit(struct scrub_ctx *sctx)
1569{
1570 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1571 struct scrub_bio *sbio;
1572
1573 if (!wr_ctx->wr_curr_bio)
1574 return;
1575
1576 sbio = wr_ctx->wr_curr_bio;
1577 wr_ctx->wr_curr_bio = NULL;
1578 WARN_ON(!sbio->bio->bi_bdev);
1579 scrub_pending_bio_inc(sctx);
1580
1581
1582
1583
1584 btrfsic_submit_bio(WRITE, sbio->bio);
1585}
1586
1587static void scrub_wr_bio_end_io(struct bio *bio, int err)
1588{
1589 struct scrub_bio *sbio = bio->bi_private;
1590 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1591
1592 sbio->err = err;
1593 sbio->bio = bio;
1594
1595 sbio->work.func = scrub_wr_bio_end_io_worker;
1596 btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
1597}
1598
1599static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1600{
1601 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1602 struct scrub_ctx *sctx = sbio->sctx;
1603 int i;
1604
1605 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1606 if (sbio->err) {
1607 struct btrfs_dev_replace *dev_replace =
1608 &sbio->sctx->dev_root->fs_info->dev_replace;
1609
1610 for (i = 0; i < sbio->page_count; i++) {
1611 struct scrub_page *spage = sbio->pagev[i];
1612
1613 spage->io_error = 1;
1614 btrfs_dev_replace_stats_inc(&dev_replace->
1615 num_write_errors);
1616 }
1617 }
1618
1619 for (i = 0; i < sbio->page_count; i++)
1620 scrub_page_put(sbio->pagev[i]);
1621
1622 bio_put(sbio->bio);
1623 kfree(sbio);
1624 scrub_pending_bio_dec(sctx);
1625}
1626
1627static int scrub_checksum(struct scrub_block *sblock)
1628{
1629 u64 flags;
1630 int ret;
1631
1632 WARN_ON(sblock->page_count < 1);
1633 flags = sblock->pagev[0]->flags;
1634 ret = 0;
1635 if (flags & BTRFS_EXTENT_FLAG_DATA)
1636 ret = scrub_checksum_data(sblock);
1637 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1638 ret = scrub_checksum_tree_block(sblock);
1639 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1640 (void)scrub_checksum_super(sblock);
1641 else
1642 WARN_ON(1);
1643 if (ret)
1644 scrub_handle_errored_block(sblock);
1645
1646 return ret;
1647}
1648
1649static int scrub_checksum_data(struct scrub_block *sblock)
1650{
1651 struct scrub_ctx *sctx = sblock->sctx;
1652 u8 csum[BTRFS_CSUM_SIZE];
1653 u8 *on_disk_csum;
1654 struct page *page;
1655 void *buffer;
1656 u32 crc = ~(u32)0;
1657 int fail = 0;
1658 struct btrfs_root *root = sctx->dev_root;
1659 u64 len;
1660 int index;
1661
1662 BUG_ON(sblock->page_count < 1);
1663 if (!sblock->pagev[0]->have_csum)
1664 return 0;
1665
1666 on_disk_csum = sblock->pagev[0]->csum;
1667 page = sblock->pagev[0]->page;
1668 buffer = kmap_atomic(page);
1669
1670 len = sctx->sectorsize;
1671 index = 0;
1672 for (;;) {
1673 u64 l = min_t(u64, len, PAGE_SIZE);
1674
1675 crc = btrfs_csum_data(root, buffer, crc, l);
1676 kunmap_atomic(buffer);
1677 len -= l;
1678 if (len == 0)
1679 break;
1680 index++;
1681 BUG_ON(index >= sblock->page_count);
1682 BUG_ON(!sblock->pagev[index]->page);
1683 page = sblock->pagev[index]->page;
1684 buffer = kmap_atomic(page);
1685 }
1686
1687 btrfs_csum_final(crc, csum);
1688 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1689 fail = 1;
1690
1691 return fail;
1692}
1693
1694static int scrub_checksum_tree_block(struct scrub_block *sblock)
1695{
1696 struct scrub_ctx *sctx = sblock->sctx;
1697 struct btrfs_header *h;
1698 struct btrfs_root *root = sctx->dev_root;
1699 struct btrfs_fs_info *fs_info = root->fs_info;
1700 u8 calculated_csum[BTRFS_CSUM_SIZE];
1701 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1702 struct page *page;
1703 void *mapped_buffer;
1704 u64 mapped_size;
1705 void *p;
1706 u32 crc = ~(u32)0;
1707 int fail = 0;
1708 int crc_fail = 0;
1709 u64 len;
1710 int index;
1711
1712 BUG_ON(sblock->page_count < 1);
1713 page = sblock->pagev[0]->page;
1714 mapped_buffer = kmap_atomic(page);
1715 h = (struct btrfs_header *)mapped_buffer;
1716 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1717
1718
1719
1720
1721
1722
1723
1724 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
1725 ++fail;
1726
1727 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
1728 ++fail;
1729
1730 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1731 ++fail;
1732
1733 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1734 BTRFS_UUID_SIZE))
1735 ++fail;
1736
1737 WARN_ON(sctx->nodesize != sctx->leafsize);
1738 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1739 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1740 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1741 index = 0;
1742 for (;;) {
1743 u64 l = min_t(u64, len, mapped_size);
1744
1745 crc = btrfs_csum_data(root, p, crc, l);
1746 kunmap_atomic(mapped_buffer);
1747 len -= l;
1748 if (len == 0)
1749 break;
1750 index++;
1751 BUG_ON(index >= sblock->page_count);
1752 BUG_ON(!sblock->pagev[index]->page);
1753 page = sblock->pagev[index]->page;
1754 mapped_buffer = kmap_atomic(page);
1755 mapped_size = PAGE_SIZE;
1756 p = mapped_buffer;
1757 }
1758
1759 btrfs_csum_final(crc, calculated_csum);
1760 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1761 ++crc_fail;
1762
1763 return fail || crc_fail;
1764}
1765
1766static int scrub_checksum_super(struct scrub_block *sblock)
1767{
1768 struct btrfs_super_block *s;
1769 struct scrub_ctx *sctx = sblock->sctx;
1770 struct btrfs_root *root = sctx->dev_root;
1771 struct btrfs_fs_info *fs_info = root->fs_info;
1772 u8 calculated_csum[BTRFS_CSUM_SIZE];
1773 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1774 struct page *page;
1775 void *mapped_buffer;
1776 u64 mapped_size;
1777 void *p;
1778 u32 crc = ~(u32)0;
1779 int fail_gen = 0;
1780 int fail_cor = 0;
1781 u64 len;
1782 int index;
1783
1784 BUG_ON(sblock->page_count < 1);
1785 page = sblock->pagev[0]->page;
1786 mapped_buffer = kmap_atomic(page);
1787 s = (struct btrfs_super_block *)mapped_buffer;
1788 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1789
1790 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
1791 ++fail_cor;
1792
1793 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
1794 ++fail_gen;
1795
1796 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1797 ++fail_cor;
1798
1799 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1800 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1801 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1802 index = 0;
1803 for (;;) {
1804 u64 l = min_t(u64, len, mapped_size);
1805
1806 crc = btrfs_csum_data(root, p, crc, l);
1807 kunmap_atomic(mapped_buffer);
1808 len -= l;
1809 if (len == 0)
1810 break;
1811 index++;
1812 BUG_ON(index >= sblock->page_count);
1813 BUG_ON(!sblock->pagev[index]->page);
1814 page = sblock->pagev[index]->page;
1815 mapped_buffer = kmap_atomic(page);
1816 mapped_size = PAGE_SIZE;
1817 p = mapped_buffer;
1818 }
1819
1820 btrfs_csum_final(crc, calculated_csum);
1821 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1822 ++fail_cor;
1823
1824 if (fail_cor + fail_gen) {
1825
1826
1827
1828
1829
1830 spin_lock(&sctx->stat_lock);
1831 ++sctx->stat.super_errors;
1832 spin_unlock(&sctx->stat_lock);
1833 if (fail_cor)
1834 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1835 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1836 else
1837 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1838 BTRFS_DEV_STAT_GENERATION_ERRS);
1839 }
1840
1841 return fail_cor + fail_gen;
1842}
1843
1844static void scrub_block_get(struct scrub_block *sblock)
1845{
1846 atomic_inc(&sblock->ref_count);
1847}
1848
1849static void scrub_block_put(struct scrub_block *sblock)
1850{
1851 if (atomic_dec_and_test(&sblock->ref_count)) {
1852 int i;
1853
1854 for (i = 0; i < sblock->page_count; i++)
1855 scrub_page_put(sblock->pagev[i]);
1856 kfree(sblock);
1857 }
1858}
1859
1860static void scrub_page_get(struct scrub_page *spage)
1861{
1862 atomic_inc(&spage->ref_count);
1863}
1864
1865static void scrub_page_put(struct scrub_page *spage)
1866{
1867 if (atomic_dec_and_test(&spage->ref_count)) {
1868 if (spage->page)
1869 __free_page(spage->page);
1870 kfree(spage);
1871 }
1872}
1873
1874static void scrub_submit(struct scrub_ctx *sctx)
1875{
1876 struct scrub_bio *sbio;
1877
1878 if (sctx->curr == -1)
1879 return;
1880
1881 sbio = sctx->bios[sctx->curr];
1882 sctx->curr = -1;
1883 scrub_pending_bio_inc(sctx);
1884
1885 if (!sbio->bio->bi_bdev) {
1886
1887
1888
1889
1890
1891
1892
1893 printk_ratelimited(KERN_WARNING
1894 "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
1895 bio_endio(sbio->bio, -EIO);
1896 } else {
1897 btrfsic_submit_bio(READ, sbio->bio);
1898 }
1899}
1900
1901static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
1902 struct scrub_page *spage)
1903{
1904 struct scrub_block *sblock = spage->sblock;
1905 struct scrub_bio *sbio;
1906 int ret;
1907
1908again:
1909
1910
1911
1912 while (sctx->curr == -1) {
1913 spin_lock(&sctx->list_lock);
1914 sctx->curr = sctx->first_free;
1915 if (sctx->curr != -1) {
1916 sctx->first_free = sctx->bios[sctx->curr]->next_free;
1917 sctx->bios[sctx->curr]->next_free = -1;
1918 sctx->bios[sctx->curr]->page_count = 0;
1919 spin_unlock(&sctx->list_lock);
1920 } else {
1921 spin_unlock(&sctx->list_lock);
1922 wait_event(sctx->list_wait, sctx->first_free != -1);
1923 }
1924 }
1925 sbio = sctx->bios[sctx->curr];
1926 if (sbio->page_count == 0) {
1927 struct bio *bio;
1928
1929 sbio->physical = spage->physical;
1930 sbio->logical = spage->logical;
1931 sbio->dev = spage->dev;
1932 bio = sbio->bio;
1933 if (!bio) {
1934 bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1935 if (!bio)
1936 return -ENOMEM;
1937 sbio->bio = bio;
1938 }
1939
1940 bio->bi_private = sbio;
1941 bio->bi_end_io = scrub_bio_end_io;
1942 bio->bi_bdev = sbio->dev->bdev;
1943 bio->bi_sector = sbio->physical >> 9;
1944 sbio->err = 0;
1945 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1946 spage->physical ||
1947 sbio->logical + sbio->page_count * PAGE_SIZE !=
1948 spage->logical ||
1949 sbio->dev != spage->dev) {
1950 scrub_submit(sctx);
1951 goto again;
1952 }
1953
1954 sbio->pagev[sbio->page_count] = spage;
1955 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1956 if (ret != PAGE_SIZE) {
1957 if (sbio->page_count < 1) {
1958 bio_put(sbio->bio);
1959 sbio->bio = NULL;
1960 return -EIO;
1961 }
1962 scrub_submit(sctx);
1963 goto again;
1964 }
1965
1966 scrub_block_get(sblock);
1967 atomic_inc(&sblock->outstanding_pages);
1968 sbio->page_count++;
1969 if (sbio->page_count == sctx->pages_per_rd_bio)
1970 scrub_submit(sctx);
1971
1972 return 0;
1973}
1974
1975static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1976 u64 physical, struct btrfs_device *dev, u64 flags,
1977 u64 gen, int mirror_num, u8 *csum, int force,
1978 u64 physical_for_dev_replace)
1979{
1980 struct scrub_block *sblock;
1981 int index;
1982
1983 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1984 if (!sblock) {
1985 spin_lock(&sctx->stat_lock);
1986 sctx->stat.malloc_errors++;
1987 spin_unlock(&sctx->stat_lock);
1988 return -ENOMEM;
1989 }
1990
1991
1992
1993 atomic_set(&sblock->ref_count, 1);
1994 sblock->sctx = sctx;
1995 sblock->no_io_error_seen = 1;
1996
1997 for (index = 0; len > 0; index++) {
1998 struct scrub_page *spage;
1999 u64 l = min_t(u64, len, PAGE_SIZE);
2000
2001 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2002 if (!spage) {
2003leave_nomem:
2004 spin_lock(&sctx->stat_lock);
2005 sctx->stat.malloc_errors++;
2006 spin_unlock(&sctx->stat_lock);
2007 scrub_block_put(sblock);
2008 return -ENOMEM;
2009 }
2010 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2011 scrub_page_get(spage);
2012 sblock->pagev[index] = spage;
2013 spage->sblock = sblock;
2014 spage->dev = dev;
2015 spage->flags = flags;
2016 spage->generation = gen;
2017 spage->logical = logical;
2018 spage->physical = physical;
2019 spage->physical_for_dev_replace = physical_for_dev_replace;
2020 spage->mirror_num = mirror_num;
2021 if (csum) {
2022 spage->have_csum = 1;
2023 memcpy(spage->csum, csum, sctx->csum_size);
2024 } else {
2025 spage->have_csum = 0;
2026 }
2027 sblock->page_count++;
2028 spage->page = alloc_page(GFP_NOFS);
2029 if (!spage->page)
2030 goto leave_nomem;
2031 len -= l;
2032 logical += l;
2033 physical += l;
2034 physical_for_dev_replace += l;
2035 }
2036
2037 WARN_ON(sblock->page_count == 0);
2038 for (index = 0; index < sblock->page_count; index++) {
2039 struct scrub_page *spage = sblock->pagev[index];
2040 int ret;
2041
2042 ret = scrub_add_page_to_rd_bio(sctx, spage);
2043 if (ret) {
2044 scrub_block_put(sblock);
2045 return ret;
2046 }
2047 }
2048
2049 if (force)
2050 scrub_submit(sctx);
2051
2052
2053 scrub_block_put(sblock);
2054 return 0;
2055}
2056
2057static void scrub_bio_end_io(struct bio *bio, int err)
2058{
2059 struct scrub_bio *sbio = bio->bi_private;
2060 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2061
2062 sbio->err = err;
2063 sbio->bio = bio;
2064
2065 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
2066}
2067
2068static void scrub_bio_end_io_worker(struct btrfs_work *work)
2069{
2070 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2071 struct scrub_ctx *sctx = sbio->sctx;
2072 int i;
2073
2074 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2075 if (sbio->err) {
2076 for (i = 0; i < sbio->page_count; i++) {
2077 struct scrub_page *spage = sbio->pagev[i];
2078
2079 spage->io_error = 1;
2080 spage->sblock->no_io_error_seen = 0;
2081 }
2082 }
2083
2084
2085 for (i = 0; i < sbio->page_count; i++) {
2086 struct scrub_page *spage = sbio->pagev[i];
2087 struct scrub_block *sblock = spage->sblock;
2088
2089 if (atomic_dec_and_test(&sblock->outstanding_pages))
2090 scrub_block_complete(sblock);
2091 scrub_block_put(sblock);
2092 }
2093
2094 bio_put(sbio->bio);
2095 sbio->bio = NULL;
2096 spin_lock(&sctx->list_lock);
2097 sbio->next_free = sctx->first_free;
2098 sctx->first_free = sbio->index;
2099 spin_unlock(&sctx->list_lock);
2100
2101 if (sctx->is_dev_replace &&
2102 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2103 mutex_lock(&sctx->wr_ctx.wr_lock);
2104 scrub_wr_submit(sctx);
2105 mutex_unlock(&sctx->wr_ctx.wr_lock);
2106 }
2107
2108 scrub_pending_bio_dec(sctx);
2109}
2110
2111static void scrub_block_complete(struct scrub_block *sblock)
2112{
2113 if (!sblock->no_io_error_seen) {
2114 scrub_handle_errored_block(sblock);
2115 } else {
2116
2117
2118
2119
2120
2121 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
2122 scrub_write_block_to_dev_replace(sblock);
2123 }
2124}
2125
2126static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
2127 u8 *csum)
2128{
2129 struct btrfs_ordered_sum *sum = NULL;
2130 int ret = 0;
2131 unsigned long i;
2132 unsigned long num_sectors;
2133
2134 while (!list_empty(&sctx->csum_list)) {
2135 sum = list_first_entry(&sctx->csum_list,
2136 struct btrfs_ordered_sum, list);
2137 if (sum->bytenr > logical)
2138 return 0;
2139 if (sum->bytenr + sum->len > logical)
2140 break;
2141
2142 ++sctx->stat.csum_discards;
2143 list_del(&sum->list);
2144 kfree(sum);
2145 sum = NULL;
2146 }
2147 if (!sum)
2148 return 0;
2149
2150 num_sectors = sum->len / sctx->sectorsize;
2151 for (i = 0; i < num_sectors; ++i) {
2152 if (sum->sums[i].bytenr == logical) {
2153 memcpy(csum, &sum->sums[i].sum, sctx->csum_size);
2154 ret = 1;
2155 break;
2156 }
2157 }
2158 if (ret && i == num_sectors - 1) {
2159 list_del(&sum->list);
2160 kfree(sum);
2161 }
2162 return ret;
2163}
2164
2165
2166static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2167 u64 physical, struct btrfs_device *dev, u64 flags,
2168 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2169{
2170 int ret;
2171 u8 csum[BTRFS_CSUM_SIZE];
2172 u32 blocksize;
2173
2174 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2175 blocksize = sctx->sectorsize;
2176 spin_lock(&sctx->stat_lock);
2177 sctx->stat.data_extents_scrubbed++;
2178 sctx->stat.data_bytes_scrubbed += len;
2179 spin_unlock(&sctx->stat_lock);
2180 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2181 WARN_ON(sctx->nodesize != sctx->leafsize);
2182 blocksize = sctx->nodesize;
2183 spin_lock(&sctx->stat_lock);
2184 sctx->stat.tree_extents_scrubbed++;
2185 sctx->stat.tree_bytes_scrubbed += len;
2186 spin_unlock(&sctx->stat_lock);
2187 } else {
2188 blocksize = sctx->sectorsize;
2189 WARN_ON(1);
2190 }
2191
2192 while (len) {
2193 u64 l = min_t(u64, len, blocksize);
2194 int have_csum = 0;
2195
2196 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2197
2198 have_csum = scrub_find_csum(sctx, logical, l, csum);
2199 if (have_csum == 0)
2200 ++sctx->stat.no_csum;
2201 if (sctx->is_dev_replace && !have_csum) {
2202 ret = copy_nocow_pages(sctx, logical, l,
2203 mirror_num,
2204 physical_for_dev_replace);
2205 goto behind_scrub_pages;
2206 }
2207 }
2208 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2209 mirror_num, have_csum ? csum : NULL, 0,
2210 physical_for_dev_replace);
2211behind_scrub_pages:
2212 if (ret)
2213 return ret;
2214 len -= l;
2215 logical += l;
2216 physical += l;
2217 physical_for_dev_replace += l;
2218 }
2219 return 0;
2220}
2221
2222static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2223 struct map_lookup *map,
2224 struct btrfs_device *scrub_dev,
2225 int num, u64 base, u64 length,
2226 int is_dev_replace)
2227{
2228 struct btrfs_path *path;
2229 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2230 struct btrfs_root *root = fs_info->extent_root;
2231 struct btrfs_root *csum_root = fs_info->csum_root;
2232 struct btrfs_extent_item *extent;
2233 struct blk_plug plug;
2234 u64 flags;
2235 int ret;
2236 int slot;
2237 int i;
2238 u64 nstripes;
2239 struct extent_buffer *l;
2240 struct btrfs_key key;
2241 u64 physical;
2242 u64 logical;
2243 u64 generation;
2244 int mirror_num;
2245 struct reada_control *reada1;
2246 struct reada_control *reada2;
2247 struct btrfs_key key_start;
2248 struct btrfs_key key_end;
2249 u64 increment = map->stripe_len;
2250 u64 offset;
2251 u64 extent_logical;
2252 u64 extent_physical;
2253 u64 extent_len;
2254 struct btrfs_device *extent_dev;
2255 int extent_mirror_num;
2256
2257 nstripes = length;
2258 offset = 0;
2259 do_div(nstripes, map->stripe_len);
2260 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
2261 offset = map->stripe_len * num;
2262 increment = map->stripe_len * map->num_stripes;
2263 mirror_num = 1;
2264 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2265 int factor = map->num_stripes / map->sub_stripes;
2266 offset = map->stripe_len * (num / map->sub_stripes);
2267 increment = map->stripe_len * factor;
2268 mirror_num = num % map->sub_stripes + 1;
2269 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2270 increment = map->stripe_len;
2271 mirror_num = num % map->num_stripes + 1;
2272 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2273 increment = map->stripe_len;
2274 mirror_num = num % map->num_stripes + 1;
2275 } else {
2276 increment = map->stripe_len;
2277 mirror_num = 1;
2278 }
2279
2280 path = btrfs_alloc_path();
2281 if (!path)
2282 return -ENOMEM;
2283
2284
2285
2286
2287
2288
2289 path->search_commit_root = 1;
2290 path->skip_locking = 1;
2291
2292
2293
2294
2295
2296
2297 logical = base + offset;
2298
2299 wait_event(sctx->list_wait,
2300 atomic_read(&sctx->bios_in_flight) == 0);
2301 atomic_inc(&fs_info->scrubs_paused);
2302 wake_up(&fs_info->scrub_pause_wait);
2303
2304
2305 key_start.objectid = logical;
2306 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2307 key_start.offset = (u64)0;
2308 key_end.objectid = base + offset + nstripes * increment;
2309 key_end.type = BTRFS_EXTENT_ITEM_KEY;
2310 key_end.offset = (u64)0;
2311 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2312
2313 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2314 key_start.type = BTRFS_EXTENT_CSUM_KEY;
2315 key_start.offset = logical;
2316 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2317 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2318 key_end.offset = base + offset + nstripes * increment;
2319 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2320
2321 if (!IS_ERR(reada1))
2322 btrfs_reada_wait(reada1);
2323 if (!IS_ERR(reada2))
2324 btrfs_reada_wait(reada2);
2325
2326 mutex_lock(&fs_info->scrub_lock);
2327 while (atomic_read(&fs_info->scrub_pause_req)) {
2328 mutex_unlock(&fs_info->scrub_lock);
2329 wait_event(fs_info->scrub_pause_wait,
2330 atomic_read(&fs_info->scrub_pause_req) == 0);
2331 mutex_lock(&fs_info->scrub_lock);
2332 }
2333 atomic_dec(&fs_info->scrubs_paused);
2334 mutex_unlock(&fs_info->scrub_lock);
2335 wake_up(&fs_info->scrub_pause_wait);
2336
2337
2338
2339
2340
2341 blk_start_plug(&plug);
2342
2343
2344
2345
2346 logical = base + offset;
2347 physical = map->stripes[num].physical;
2348 ret = 0;
2349 for (i = 0; i < nstripes; ++i) {
2350
2351
2352
2353 if (atomic_read(&fs_info->scrub_cancel_req) ||
2354 atomic_read(&sctx->cancel_req)) {
2355 ret = -ECANCELED;
2356 goto out;
2357 }
2358
2359
2360
2361 if (atomic_read(&fs_info->scrub_pause_req)) {
2362
2363 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2364 scrub_submit(sctx);
2365 mutex_lock(&sctx->wr_ctx.wr_lock);
2366 scrub_wr_submit(sctx);
2367 mutex_unlock(&sctx->wr_ctx.wr_lock);
2368 wait_event(sctx->list_wait,
2369 atomic_read(&sctx->bios_in_flight) == 0);
2370 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2371 atomic_inc(&fs_info->scrubs_paused);
2372 wake_up(&fs_info->scrub_pause_wait);
2373 mutex_lock(&fs_info->scrub_lock);
2374 while (atomic_read(&fs_info->scrub_pause_req)) {
2375 mutex_unlock(&fs_info->scrub_lock);
2376 wait_event(fs_info->scrub_pause_wait,
2377 atomic_read(&fs_info->scrub_pause_req) == 0);
2378 mutex_lock(&fs_info->scrub_lock);
2379 }
2380 atomic_dec(&fs_info->scrubs_paused);
2381 mutex_unlock(&fs_info->scrub_lock);
2382 wake_up(&fs_info->scrub_pause_wait);
2383 }
2384
2385 ret = btrfs_lookup_csums_range(csum_root, logical,
2386 logical + map->stripe_len - 1,
2387 &sctx->csum_list, 1);
2388 if (ret)
2389 goto out;
2390
2391 key.objectid = logical;
2392 key.type = BTRFS_EXTENT_ITEM_KEY;
2393 key.offset = (u64)0;
2394
2395 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2396 if (ret < 0)
2397 goto out;
2398 if (ret > 0) {
2399 ret = btrfs_previous_item(root, path, 0,
2400 BTRFS_EXTENT_ITEM_KEY);
2401 if (ret < 0)
2402 goto out;
2403 if (ret > 0) {
2404
2405
2406 btrfs_release_path(path);
2407 ret = btrfs_search_slot(NULL, root, &key,
2408 path, 0, 0);
2409 if (ret < 0)
2410 goto out;
2411 }
2412 }
2413
2414 while (1) {
2415 l = path->nodes[0];
2416 slot = path->slots[0];
2417 if (slot >= btrfs_header_nritems(l)) {
2418 ret = btrfs_next_leaf(root, path);
2419 if (ret == 0)
2420 continue;
2421 if (ret < 0)
2422 goto out;
2423
2424 break;
2425 }
2426 btrfs_item_key_to_cpu(l, &key, slot);
2427
2428 if (key.objectid + key.offset <= logical)
2429 goto next;
2430
2431 if (key.objectid >= logical + map->stripe_len)
2432 break;
2433
2434 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
2435 goto next;
2436
2437 extent = btrfs_item_ptr(l, slot,
2438 struct btrfs_extent_item);
2439 flags = btrfs_extent_flags(l, extent);
2440 generation = btrfs_extent_generation(l, extent);
2441
2442 if (key.objectid < logical &&
2443 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2444 printk(KERN_ERR
2445 "btrfs scrub: tree block %llu spanning "
2446 "stripes, ignored. logical=%llu\n",
2447 (unsigned long long)key.objectid,
2448 (unsigned long long)logical);
2449 goto next;
2450 }
2451
2452
2453
2454
2455 if (key.objectid < logical) {
2456 key.offset -= logical - key.objectid;
2457 key.objectid = logical;
2458 }
2459 if (key.objectid + key.offset >
2460 logical + map->stripe_len) {
2461 key.offset = logical + map->stripe_len -
2462 key.objectid;
2463 }
2464
2465 extent_logical = key.objectid;
2466 extent_physical = key.objectid - logical + physical;
2467 extent_len = key.offset;
2468 extent_dev = scrub_dev;
2469 extent_mirror_num = mirror_num;
2470 if (is_dev_replace)
2471 scrub_remap_extent(fs_info, extent_logical,
2472 extent_len, &extent_physical,
2473 &extent_dev,
2474 &extent_mirror_num);
2475 ret = scrub_extent(sctx, extent_logical, extent_len,
2476 extent_physical, extent_dev, flags,
2477 generation, extent_mirror_num,
2478 key.objectid - logical + physical);
2479 if (ret)
2480 goto out;
2481
2482next:
2483 path->slots[0]++;
2484 }
2485 btrfs_release_path(path);
2486 logical += increment;
2487 physical += map->stripe_len;
2488 spin_lock(&sctx->stat_lock);
2489 sctx->stat.last_physical = physical;
2490 spin_unlock(&sctx->stat_lock);
2491 }
2492out:
2493
2494 scrub_submit(sctx);
2495 mutex_lock(&sctx->wr_ctx.wr_lock);
2496 scrub_wr_submit(sctx);
2497 mutex_unlock(&sctx->wr_ctx.wr_lock);
2498
2499 blk_finish_plug(&plug);
2500 btrfs_free_path(path);
2501 return ret < 0 ? ret : 0;
2502}
2503
2504static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
2505 struct btrfs_device *scrub_dev,
2506 u64 chunk_tree, u64 chunk_objectid,
2507 u64 chunk_offset, u64 length,
2508 u64 dev_offset, int is_dev_replace)
2509{
2510 struct btrfs_mapping_tree *map_tree =
2511 &sctx->dev_root->fs_info->mapping_tree;
2512 struct map_lookup *map;
2513 struct extent_map *em;
2514 int i;
2515 int ret = 0;
2516
2517 read_lock(&map_tree->map_tree.lock);
2518 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2519 read_unlock(&map_tree->map_tree.lock);
2520
2521 if (!em)
2522 return -EINVAL;
2523
2524 map = (struct map_lookup *)em->bdev;
2525 if (em->start != chunk_offset)
2526 goto out;
2527
2528 if (em->len < length)
2529 goto out;
2530
2531 for (i = 0; i < map->num_stripes; ++i) {
2532 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
2533 map->stripes[i].physical == dev_offset) {
2534 ret = scrub_stripe(sctx, map, scrub_dev, i,
2535 chunk_offset, length,
2536 is_dev_replace);
2537 if (ret)
2538 goto out;
2539 }
2540 }
2541out:
2542 free_extent_map(em);
2543
2544 return ret;
2545}
2546
2547static noinline_for_stack
2548int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2549 struct btrfs_device *scrub_dev, u64 start, u64 end,
2550 int is_dev_replace)
2551{
2552 struct btrfs_dev_extent *dev_extent = NULL;
2553 struct btrfs_path *path;
2554 struct btrfs_root *root = sctx->dev_root;
2555 struct btrfs_fs_info *fs_info = root->fs_info;
2556 u64 length;
2557 u64 chunk_tree;
2558 u64 chunk_objectid;
2559 u64 chunk_offset;
2560 int ret;
2561 int slot;
2562 struct extent_buffer *l;
2563 struct btrfs_key key;
2564 struct btrfs_key found_key;
2565 struct btrfs_block_group_cache *cache;
2566 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
2567
2568 path = btrfs_alloc_path();
2569 if (!path)
2570 return -ENOMEM;
2571
2572 path->reada = 2;
2573 path->search_commit_root = 1;
2574 path->skip_locking = 1;
2575
2576 key.objectid = scrub_dev->devid;
2577 key.offset = 0ull;
2578 key.type = BTRFS_DEV_EXTENT_KEY;
2579
2580 while (1) {
2581 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2582 if (ret < 0)
2583 break;
2584 if (ret > 0) {
2585 if (path->slots[0] >=
2586 btrfs_header_nritems(path->nodes[0])) {
2587 ret = btrfs_next_leaf(root, path);
2588 if (ret)
2589 break;
2590 }
2591 }
2592
2593 l = path->nodes[0];
2594 slot = path->slots[0];
2595
2596 btrfs_item_key_to_cpu(l, &found_key, slot);
2597
2598 if (found_key.objectid != scrub_dev->devid)
2599 break;
2600
2601 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
2602 break;
2603
2604 if (found_key.offset >= end)
2605 break;
2606
2607 if (found_key.offset < key.offset)
2608 break;
2609
2610 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2611 length = btrfs_dev_extent_length(l, dev_extent);
2612
2613 if (found_key.offset + length <= start) {
2614 key.offset = found_key.offset + length;
2615 btrfs_release_path(path);
2616 continue;
2617 }
2618
2619 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2620 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2621 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2622
2623
2624
2625
2626
2627 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2628 if (!cache) {
2629 ret = -ENOENT;
2630 break;
2631 }
2632 dev_replace->cursor_right = found_key.offset + length;
2633 dev_replace->cursor_left = found_key.offset;
2634 dev_replace->item_needs_writeback = 1;
2635 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
2636 chunk_offset, length, found_key.offset,
2637 is_dev_replace);
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2650 scrub_submit(sctx);
2651 mutex_lock(&sctx->wr_ctx.wr_lock);
2652 scrub_wr_submit(sctx);
2653 mutex_unlock(&sctx->wr_ctx.wr_lock);
2654
2655 wait_event(sctx->list_wait,
2656 atomic_read(&sctx->bios_in_flight) == 0);
2657 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2658 atomic_inc(&fs_info->scrubs_paused);
2659 wake_up(&fs_info->scrub_pause_wait);
2660 wait_event(sctx->list_wait,
2661 atomic_read(&sctx->workers_pending) == 0);
2662
2663 mutex_lock(&fs_info->scrub_lock);
2664 while (atomic_read(&fs_info->scrub_pause_req)) {
2665 mutex_unlock(&fs_info->scrub_lock);
2666 wait_event(fs_info->scrub_pause_wait,
2667 atomic_read(&fs_info->scrub_pause_req) == 0);
2668 mutex_lock(&fs_info->scrub_lock);
2669 }
2670 atomic_dec(&fs_info->scrubs_paused);
2671 mutex_unlock(&fs_info->scrub_lock);
2672 wake_up(&fs_info->scrub_pause_wait);
2673
2674 dev_replace->cursor_left = dev_replace->cursor_right;
2675 dev_replace->item_needs_writeback = 1;
2676 btrfs_put_block_group(cache);
2677 if (ret)
2678 break;
2679 if (is_dev_replace &&
2680 atomic64_read(&dev_replace->num_write_errors) > 0) {
2681 ret = -EIO;
2682 break;
2683 }
2684 if (sctx->stat.malloc_errors > 0) {
2685 ret = -ENOMEM;
2686 break;
2687 }
2688
2689 key.offset = found_key.offset + length;
2690 btrfs_release_path(path);
2691 }
2692
2693 btrfs_free_path(path);
2694
2695
2696
2697
2698
2699 return ret < 0 ? ret : 0;
2700}
2701
2702static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
2703 struct btrfs_device *scrub_dev)
2704{
2705 int i;
2706 u64 bytenr;
2707 u64 gen;
2708 int ret;
2709 struct btrfs_root *root = sctx->dev_root;
2710
2711 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2712 return -EIO;
2713
2714 gen = root->fs_info->last_trans_committed;
2715
2716 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2717 bytenr = btrfs_sb_offset(i);
2718 if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
2719 break;
2720
2721 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
2722 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
2723 NULL, 1, bytenr);
2724 if (ret)
2725 return ret;
2726 }
2727 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2728
2729 return 0;
2730}
2731
2732
2733
2734
2735static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2736 int is_dev_replace)
2737{
2738 int ret = 0;
2739
2740 mutex_lock(&fs_info->scrub_lock);
2741 if (fs_info->scrub_workers_refcnt == 0) {
2742 if (is_dev_replace)
2743 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
2744 &fs_info->generic_worker);
2745 else
2746 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
2747 fs_info->thread_pool_size,
2748 &fs_info->generic_worker);
2749 fs_info->scrub_workers.idle_thresh = 4;
2750 ret = btrfs_start_workers(&fs_info->scrub_workers);
2751 if (ret)
2752 goto out;
2753 btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
2754 "scrubwrc",
2755 fs_info->thread_pool_size,
2756 &fs_info->generic_worker);
2757 fs_info->scrub_wr_completion_workers.idle_thresh = 2;
2758 ret = btrfs_start_workers(
2759 &fs_info->scrub_wr_completion_workers);
2760 if (ret)
2761 goto out;
2762 btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
2763 &fs_info->generic_worker);
2764 ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
2765 if (ret)
2766 goto out;
2767 }
2768 ++fs_info->scrub_workers_refcnt;
2769out:
2770 mutex_unlock(&fs_info->scrub_lock);
2771
2772 return ret;
2773}
2774
2775static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
2776{
2777 mutex_lock(&fs_info->scrub_lock);
2778 if (--fs_info->scrub_workers_refcnt == 0) {
2779 btrfs_stop_workers(&fs_info->scrub_workers);
2780 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
2781 btrfs_stop_workers(&fs_info->scrub_nocow_workers);
2782 }
2783 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2784 mutex_unlock(&fs_info->scrub_lock);
2785}
2786
2787int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2788 u64 end, struct btrfs_scrub_progress *progress,
2789 int readonly, int is_dev_replace)
2790{
2791 struct scrub_ctx *sctx;
2792 int ret;
2793 struct btrfs_device *dev;
2794
2795 if (btrfs_fs_closing(fs_info))
2796 return -EINVAL;
2797
2798
2799
2800
2801 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2802 printk(KERN_ERR
2803 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
2804 fs_info->chunk_root->nodesize,
2805 fs_info->chunk_root->leafsize);
2806 return -EINVAL;
2807 }
2808
2809 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
2810
2811
2812
2813
2814
2815 printk(KERN_ERR
2816 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2817 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2818 return -EINVAL;
2819 }
2820
2821 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2822
2823 printk(KERN_ERR
2824 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2825 fs_info->chunk_root->sectorsize,
2826 (unsigned long long)PAGE_SIZE);
2827 return -EINVAL;
2828 }
2829
2830 if (fs_info->chunk_root->nodesize >
2831 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
2832 fs_info->chunk_root->sectorsize >
2833 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
2834
2835
2836
2837
2838 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
2839 fs_info->chunk_root->nodesize,
2840 SCRUB_MAX_PAGES_PER_BLOCK,
2841 fs_info->chunk_root->sectorsize,
2842 SCRUB_MAX_PAGES_PER_BLOCK);
2843 return -EINVAL;
2844 }
2845
2846 ret = scrub_workers_get(fs_info, is_dev_replace);
2847 if (ret)
2848 return ret;
2849
2850 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2851 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
2852 if (!dev || (dev->missing && !is_dev_replace)) {
2853 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2854 scrub_workers_put(fs_info);
2855 return -ENODEV;
2856 }
2857 mutex_lock(&fs_info->scrub_lock);
2858
2859 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2860 mutex_unlock(&fs_info->scrub_lock);
2861 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2862 scrub_workers_put(fs_info);
2863 return -EIO;
2864 }
2865
2866 btrfs_dev_replace_lock(&fs_info->dev_replace);
2867 if (dev->scrub_device ||
2868 (!is_dev_replace &&
2869 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
2870 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2871 mutex_unlock(&fs_info->scrub_lock);
2872 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2873 scrub_workers_put(fs_info);
2874 return -EINPROGRESS;
2875 }
2876 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2877 sctx = scrub_setup_ctx(dev, is_dev_replace);
2878 if (IS_ERR(sctx)) {
2879 mutex_unlock(&fs_info->scrub_lock);
2880 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2881 scrub_workers_put(fs_info);
2882 return PTR_ERR(sctx);
2883 }
2884 sctx->readonly = readonly;
2885 dev->scrub_device = sctx;
2886
2887 atomic_inc(&fs_info->scrubs_running);
2888 mutex_unlock(&fs_info->scrub_lock);
2889 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2890
2891 if (!is_dev_replace) {
2892 down_read(&fs_info->scrub_super_lock);
2893 ret = scrub_supers(sctx, dev);
2894 up_read(&fs_info->scrub_super_lock);
2895 }
2896
2897 if (!ret)
2898 ret = scrub_enumerate_chunks(sctx, dev, start, end,
2899 is_dev_replace);
2900
2901 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2902 atomic_dec(&fs_info->scrubs_running);
2903 wake_up(&fs_info->scrub_pause_wait);
2904
2905 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
2906
2907 if (progress)
2908 memcpy(progress, &sctx->stat, sizeof(*progress));
2909
2910 mutex_lock(&fs_info->scrub_lock);
2911 dev->scrub_device = NULL;
2912 mutex_unlock(&fs_info->scrub_lock);
2913
2914 scrub_free_ctx(sctx);
2915 scrub_workers_put(fs_info);
2916
2917 return ret;
2918}
2919
2920void btrfs_scrub_pause(struct btrfs_root *root)
2921{
2922 struct btrfs_fs_info *fs_info = root->fs_info;
2923
2924 mutex_lock(&fs_info->scrub_lock);
2925 atomic_inc(&fs_info->scrub_pause_req);
2926 while (atomic_read(&fs_info->scrubs_paused) !=
2927 atomic_read(&fs_info->scrubs_running)) {
2928 mutex_unlock(&fs_info->scrub_lock);
2929 wait_event(fs_info->scrub_pause_wait,
2930 atomic_read(&fs_info->scrubs_paused) ==
2931 atomic_read(&fs_info->scrubs_running));
2932 mutex_lock(&fs_info->scrub_lock);
2933 }
2934 mutex_unlock(&fs_info->scrub_lock);
2935}
2936
2937void btrfs_scrub_continue(struct btrfs_root *root)
2938{
2939 struct btrfs_fs_info *fs_info = root->fs_info;
2940
2941 atomic_dec(&fs_info->scrub_pause_req);
2942 wake_up(&fs_info->scrub_pause_wait);
2943}
2944
2945void btrfs_scrub_pause_super(struct btrfs_root *root)
2946{
2947 down_write(&root->fs_info->scrub_super_lock);
2948}
2949
2950void btrfs_scrub_continue_super(struct btrfs_root *root)
2951{
2952 up_write(&root->fs_info->scrub_super_lock);
2953}
2954
2955int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
2956{
2957 mutex_lock(&fs_info->scrub_lock);
2958 if (!atomic_read(&fs_info->scrubs_running)) {
2959 mutex_unlock(&fs_info->scrub_lock);
2960 return -ENOTCONN;
2961 }
2962
2963 atomic_inc(&fs_info->scrub_cancel_req);
2964 while (atomic_read(&fs_info->scrubs_running)) {
2965 mutex_unlock(&fs_info->scrub_lock);
2966 wait_event(fs_info->scrub_pause_wait,
2967 atomic_read(&fs_info->scrubs_running) == 0);
2968 mutex_lock(&fs_info->scrub_lock);
2969 }
2970 atomic_dec(&fs_info->scrub_cancel_req);
2971 mutex_unlock(&fs_info->scrub_lock);
2972
2973 return 0;
2974}
2975
2976int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
2977 struct btrfs_device *dev)
2978{
2979 struct scrub_ctx *sctx;
2980
2981 mutex_lock(&fs_info->scrub_lock);
2982 sctx = dev->scrub_device;
2983 if (!sctx) {
2984 mutex_unlock(&fs_info->scrub_lock);
2985 return -ENOTCONN;
2986 }
2987 atomic_inc(&sctx->cancel_req);
2988 while (dev->scrub_device) {
2989 mutex_unlock(&fs_info->scrub_lock);
2990 wait_event(fs_info->scrub_pause_wait,
2991 dev->scrub_device == NULL);
2992 mutex_lock(&fs_info->scrub_lock);
2993 }
2994 mutex_unlock(&fs_info->scrub_lock);
2995
2996 return 0;
2997}
2998
2999int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
3000{
3001 struct btrfs_fs_info *fs_info = root->fs_info;
3002 struct btrfs_device *dev;
3003 int ret;
3004
3005
3006
3007
3008
3009 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3010 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3011 if (!dev) {
3012 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3013 return -ENODEV;
3014 }
3015 ret = btrfs_scrub_cancel_dev(fs_info, dev);
3016 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3017
3018 return ret;
3019}
3020
3021int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3022 struct btrfs_scrub_progress *progress)
3023{
3024 struct btrfs_device *dev;
3025 struct scrub_ctx *sctx = NULL;
3026
3027 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3028 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
3029 if (dev)
3030 sctx = dev->scrub_device;
3031 if (sctx)
3032 memcpy(progress, &sctx->stat, sizeof(*progress));
3033 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3034
3035 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
3036}
3037
3038static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3039 u64 extent_logical, u64 extent_len,
3040 u64 *extent_physical,
3041 struct btrfs_device **extent_dev,
3042 int *extent_mirror_num)
3043{
3044 u64 mapped_length;
3045 struct btrfs_bio *bbio = NULL;
3046 int ret;
3047
3048 mapped_length = extent_len;
3049 ret = btrfs_map_block(fs_info, READ, extent_logical,
3050 &mapped_length, &bbio, 0);
3051 if (ret || !bbio || mapped_length < extent_len ||
3052 !bbio->stripes[0].dev->bdev) {
3053 kfree(bbio);
3054 return;
3055 }
3056
3057 *extent_physical = bbio->stripes[0].physical;
3058 *extent_mirror_num = bbio->mirror_num;
3059 *extent_dev = bbio->stripes[0].dev;
3060 kfree(bbio);
3061}
3062
3063static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
3064 struct scrub_wr_ctx *wr_ctx,
3065 struct btrfs_fs_info *fs_info,
3066 struct btrfs_device *dev,
3067 int is_dev_replace)
3068{
3069 WARN_ON(wr_ctx->wr_curr_bio != NULL);
3070
3071 mutex_init(&wr_ctx->wr_lock);
3072 wr_ctx->wr_curr_bio = NULL;
3073 if (!is_dev_replace)
3074 return 0;
3075
3076 WARN_ON(!dev->bdev);
3077 wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
3078 bio_get_nr_vecs(dev->bdev));
3079 wr_ctx->tgtdev = dev;
3080 atomic_set(&wr_ctx->flush_all_writes, 0);
3081 return 0;
3082}
3083
3084static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
3085{
3086 mutex_lock(&wr_ctx->wr_lock);
3087 kfree(wr_ctx->wr_curr_bio);
3088 wr_ctx->wr_curr_bio = NULL;
3089 mutex_unlock(&wr_ctx->wr_lock);
3090}
3091
3092static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3093 int mirror_num, u64 physical_for_dev_replace)
3094{
3095 struct scrub_copy_nocow_ctx *nocow_ctx;
3096 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3097
3098 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
3099 if (!nocow_ctx) {
3100 spin_lock(&sctx->stat_lock);
3101 sctx->stat.malloc_errors++;
3102 spin_unlock(&sctx->stat_lock);
3103 return -ENOMEM;
3104 }
3105
3106 scrub_pending_trans_workers_inc(sctx);
3107
3108 nocow_ctx->sctx = sctx;
3109 nocow_ctx->logical = logical;
3110 nocow_ctx->len = len;
3111 nocow_ctx->mirror_num = mirror_num;
3112 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3113 nocow_ctx->work.func = copy_nocow_pages_worker;
3114 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3115 &nocow_ctx->work);
3116
3117 return 0;
3118}
3119
3120static void copy_nocow_pages_worker(struct btrfs_work *work)
3121{
3122 struct scrub_copy_nocow_ctx *nocow_ctx =
3123 container_of(work, struct scrub_copy_nocow_ctx, work);
3124 struct scrub_ctx *sctx = nocow_ctx->sctx;
3125 u64 logical = nocow_ctx->logical;
3126 u64 len = nocow_ctx->len;
3127 int mirror_num = nocow_ctx->mirror_num;
3128 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3129 int ret;
3130 struct btrfs_trans_handle *trans = NULL;
3131 struct btrfs_fs_info *fs_info;
3132 struct btrfs_path *path;
3133 struct btrfs_root *root;
3134 int not_written = 0;
3135
3136 fs_info = sctx->dev_root->fs_info;
3137 root = fs_info->extent_root;
3138
3139 path = btrfs_alloc_path();
3140 if (!path) {
3141 spin_lock(&sctx->stat_lock);
3142 sctx->stat.malloc_errors++;
3143 spin_unlock(&sctx->stat_lock);
3144 not_written = 1;
3145 goto out;
3146 }
3147
3148 trans = btrfs_join_transaction(root);
3149 if (IS_ERR(trans)) {
3150 not_written = 1;
3151 goto out;
3152 }
3153
3154 ret = iterate_inodes_from_logical(logical, fs_info, path,
3155 copy_nocow_pages_for_inode,
3156 nocow_ctx);
3157 if (ret != 0 && ret != -ENOENT) {
3158 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
3159 (unsigned long long)logical,
3160 (unsigned long long)physical_for_dev_replace,
3161 (unsigned long long)len,
3162 (unsigned long long)mirror_num, ret);
3163 not_written = 1;
3164 goto out;
3165 }
3166
3167out:
3168 if (trans && !IS_ERR(trans))
3169 btrfs_end_transaction(trans, root);
3170 if (not_written)
3171 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
3172 num_uncorrectable_read_errors);
3173
3174 btrfs_free_path(path);
3175 kfree(nocow_ctx);
3176
3177 scrub_pending_trans_workers_dec(sctx);
3178}
3179
3180static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3181{
3182 unsigned long index;
3183 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3184 int ret = 0;
3185 struct btrfs_key key;
3186 struct inode *inode = NULL;
3187 struct btrfs_root *local_root;
3188 u64 physical_for_dev_replace;
3189 u64 len;
3190 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3191 int srcu_index;
3192
3193 key.objectid = root;
3194 key.type = BTRFS_ROOT_ITEM_KEY;
3195 key.offset = (u64)-1;
3196
3197 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3198
3199 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3200 if (IS_ERR(local_root)) {
3201 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3202 return PTR_ERR(local_root);
3203 }
3204
3205 key.type = BTRFS_INODE_ITEM_KEY;
3206 key.objectid = inum;
3207 key.offset = 0;
3208 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3209 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3210 if (IS_ERR(inode))
3211 return PTR_ERR(inode);
3212
3213 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3214 len = nocow_ctx->len;
3215 while (len >= PAGE_CACHE_SIZE) {
3216 struct page *page = NULL;
3217 int ret_sub;
3218
3219 index = offset >> PAGE_CACHE_SHIFT;
3220
3221 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3222 if (!page) {
3223 pr_err("find_or_create_page() failed\n");
3224 ret = -ENOMEM;
3225 goto next_page;
3226 }
3227
3228 if (PageUptodate(page)) {
3229 if (PageDirty(page))
3230 goto next_page;
3231 } else {
3232 ClearPageError(page);
3233 ret_sub = extent_read_full_page(&BTRFS_I(inode)->
3234 io_tree,
3235 page, btrfs_get_extent,
3236 nocow_ctx->mirror_num);
3237 if (ret_sub) {
3238 ret = ret_sub;
3239 goto next_page;
3240 }
3241 wait_on_page_locked(page);
3242 if (!PageUptodate(page)) {
3243 ret = -EIO;
3244 goto next_page;
3245 }
3246 }
3247 ret_sub = write_page_nocow(nocow_ctx->sctx,
3248 physical_for_dev_replace, page);
3249 if (ret_sub) {
3250 ret = ret_sub;
3251 goto next_page;
3252 }
3253
3254next_page:
3255 if (page) {
3256 unlock_page(page);
3257 put_page(page);
3258 }
3259 offset += PAGE_CACHE_SIZE;
3260 physical_for_dev_replace += PAGE_CACHE_SIZE;
3261 len -= PAGE_CACHE_SIZE;
3262 }
3263
3264 if (inode)
3265 iput(inode);
3266 return ret;
3267}
3268
3269static int write_page_nocow(struct scrub_ctx *sctx,
3270 u64 physical_for_dev_replace, struct page *page)
3271{
3272 struct bio *bio;
3273 struct btrfs_device *dev;
3274 int ret;
3275 DECLARE_COMPLETION_ONSTACK(compl);
3276
3277 dev = sctx->wr_ctx.tgtdev;
3278 if (!dev)
3279 return -EIO;
3280 if (!dev->bdev) {
3281 printk_ratelimited(KERN_WARNING
3282 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3283 return -EIO;
3284 }
3285 bio = bio_alloc(GFP_NOFS, 1);
3286 if (!bio) {
3287 spin_lock(&sctx->stat_lock);
3288 sctx->stat.malloc_errors++;
3289 spin_unlock(&sctx->stat_lock);
3290 return -ENOMEM;
3291 }
3292 bio->bi_private = &compl;
3293 bio->bi_end_io = scrub_complete_bio_end_io;
3294 bio->bi_size = 0;
3295 bio->bi_sector = physical_for_dev_replace >> 9;
3296 bio->bi_bdev = dev->bdev;
3297 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3298 if (ret != PAGE_CACHE_SIZE) {
3299leave_with_eio:
3300 bio_put(bio);
3301 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
3302 return -EIO;
3303 }
3304 btrfsic_submit_bio(WRITE_SYNC, bio);
3305 wait_for_completion(&compl);
3306
3307 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
3308 goto leave_with_eio;
3309
3310 bio_put(bio);
3311 return 0;
3312}
3313