1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_page {
67 struct scrub_block *sblock;
68 struct page *page;
69 struct btrfs_device *dev;
70 u64 flags;
71 u64 generation;
72 u64 logical;
73 u64 physical;
74 u64 physical_for_dev_replace;
75 atomic_t ref_count;
76 struct {
77 unsigned int mirror_num:8;
78 unsigned int have_csum:1;
79 unsigned int io_error:1;
80 };
81 u8 csum[BTRFS_CSUM_SIZE];
82};
83
84struct scrub_bio {
85 int index;
86 struct scrub_ctx *sctx;
87 struct btrfs_device *dev;
88 struct bio *bio;
89 int err;
90 u64 logical;
91 u64 physical;
92#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
93 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
94#else
95 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
96#endif
97 int page_count;
98 int next_free;
99 struct btrfs_work work;
100};
101
102struct scrub_block {
103 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
104 int page_count;
105 atomic_t outstanding_pages;
106 atomic_t ref_count;
107 struct scrub_ctx *sctx;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113 };
114};
115
116struct scrub_wr_ctx {
117 struct scrub_bio *wr_curr_bio;
118 struct btrfs_device *tgtdev;
119 int pages_per_wr_bio;
120 atomic_t flush_all_writes;
121 struct mutex wr_lock;
122};
123
124struct scrub_ctx {
125 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
126 struct btrfs_root *dev_root;
127 int first_free;
128 int curr;
129 atomic_t bios_in_flight;
130 atomic_t workers_pending;
131 spinlock_t list_lock;
132 wait_queue_head_t list_wait;
133 u16 csum_size;
134 struct list_head csum_list;
135 atomic_t cancel_req;
136 int readonly;
137 int pages_per_rd_bio;
138 u32 sectorsize;
139 u32 nodesize;
140 u32 leafsize;
141
142 int is_dev_replace;
143 struct scrub_wr_ctx wr_ctx;
144
145
146
147
148 struct btrfs_scrub_progress stat;
149 spinlock_t stat_lock;
150};
151
152struct scrub_fixup_nodatasum {
153 struct scrub_ctx *sctx;
154 struct btrfs_device *dev;
155 u64 logical;
156 struct btrfs_root *root;
157 struct btrfs_work work;
158 int mirror_num;
159};
160
161struct scrub_copy_nocow_ctx {
162 struct scrub_ctx *sctx;
163 u64 logical;
164 u64 len;
165 int mirror_num;
166 u64 physical_for_dev_replace;
167 struct btrfs_work work;
168};
169
170struct scrub_warning {
171 struct btrfs_path *path;
172 u64 extent_item_size;
173 char *scratch_buf;
174 char *msg_buf;
175 const char *errstr;
176 sector_t sector;
177 u64 logical;
178 struct btrfs_device *dev;
179 int msg_bufsize;
180 int scratch_bufsize;
181};
182
183
184static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
185static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
186static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
187static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
188static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
189static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
190 struct btrfs_fs_info *fs_info,
191 struct scrub_block *original_sblock,
192 u64 length, u64 logical,
193 struct scrub_block *sblocks_for_recheck);
194static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
195 struct scrub_block *sblock, int is_metadata,
196 int have_csum, u8 *csum, u64 generation,
197 u16 csum_size);
198static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
199 struct scrub_block *sblock,
200 int is_metadata, int have_csum,
201 const u8 *csum, u64 generation,
202 u16 csum_size);
203static void scrub_complete_bio_end_io(struct bio *bio, int err);
204static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
205 struct scrub_block *sblock_good,
206 int force_write);
207static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
208 struct scrub_block *sblock_good,
209 int page_num, int force_write);
210static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
211static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
212 int page_num);
213static int scrub_checksum_data(struct scrub_block *sblock);
214static int scrub_checksum_tree_block(struct scrub_block *sblock);
215static int scrub_checksum_super(struct scrub_block *sblock);
216static void scrub_block_get(struct scrub_block *sblock);
217static void scrub_block_put(struct scrub_block *sblock);
218static void scrub_page_get(struct scrub_page *spage);
219static void scrub_page_put(struct scrub_page *spage);
220static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
221 struct scrub_page *spage);
222static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
223 u64 physical, struct btrfs_device *dev, u64 flags,
224 u64 gen, int mirror_num, u8 *csum, int force,
225 u64 physical_for_dev_replace);
226static void scrub_bio_end_io(struct bio *bio, int err);
227static void scrub_bio_end_io_worker(struct btrfs_work *work);
228static void scrub_block_complete(struct scrub_block *sblock);
229static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
230 u64 extent_logical, u64 extent_len,
231 u64 *extent_physical,
232 struct btrfs_device **extent_dev,
233 int *extent_mirror_num);
234static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
235 struct scrub_wr_ctx *wr_ctx,
236 struct btrfs_fs_info *fs_info,
237 struct btrfs_device *dev,
238 int is_dev_replace);
239static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
240static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
241 struct scrub_page *spage);
242static void scrub_wr_submit(struct scrub_ctx *sctx);
243static void scrub_wr_bio_end_io(struct bio *bio, int err);
244static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
245static int write_page_nocow(struct scrub_ctx *sctx,
246 u64 physical_for_dev_replace, struct page *page);
247static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
248 void *ctx);
249static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
250 int mirror_num, u64 physical_for_dev_replace);
251static void copy_nocow_pages_worker(struct btrfs_work *work);
252
253
254static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
255{
256 atomic_inc(&sctx->bios_in_flight);
257}
258
259static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
260{
261 atomic_dec(&sctx->bios_in_flight);
262 wake_up(&sctx->list_wait);
263}
264
265
266
267
268
269static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
270{
271 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
272
273
274
275
276
277
278
279
280
281
282 mutex_lock(&fs_info->scrub_lock);
283 atomic_inc(&fs_info->scrubs_running);
284 atomic_inc(&fs_info->scrubs_paused);
285 mutex_unlock(&fs_info->scrub_lock);
286 atomic_inc(&sctx->workers_pending);
287}
288
289
290static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
291{
292 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
293
294
295
296
297
298 mutex_lock(&fs_info->scrub_lock);
299 atomic_dec(&fs_info->scrubs_running);
300 atomic_dec(&fs_info->scrubs_paused);
301 mutex_unlock(&fs_info->scrub_lock);
302 atomic_dec(&sctx->workers_pending);
303 wake_up(&fs_info->scrub_pause_wait);
304 wake_up(&sctx->list_wait);
305}
306
307static void scrub_free_csums(struct scrub_ctx *sctx)
308{
309 while (!list_empty(&sctx->csum_list)) {
310 struct btrfs_ordered_sum *sum;
311 sum = list_first_entry(&sctx->csum_list,
312 struct btrfs_ordered_sum, list);
313 list_del(&sum->list);
314 kfree(sum);
315 }
316}
317
318static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
319{
320 int i;
321
322 if (!sctx)
323 return;
324
325 scrub_free_wr_ctx(&sctx->wr_ctx);
326
327
328 if (sctx->curr != -1) {
329 struct scrub_bio *sbio = sctx->bios[sctx->curr];
330
331 for (i = 0; i < sbio->page_count; i++) {
332 WARN_ON(!sbio->pagev[i]->page);
333 scrub_block_put(sbio->pagev[i]->sblock);
334 }
335 bio_put(sbio->bio);
336 }
337
338 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
339 struct scrub_bio *sbio = sctx->bios[i];
340
341 if (!sbio)
342 break;
343 kfree(sbio);
344 }
345
346 scrub_free_csums(sctx);
347 kfree(sctx);
348}
349
350static noinline_for_stack
351struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
352{
353 struct scrub_ctx *sctx;
354 int i;
355 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
356 int pages_per_rd_bio;
357 int ret;
358
359
360
361
362
363
364
365
366 if (dev->bdev)
367 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
368 bio_get_nr_vecs(dev->bdev));
369 else
370 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
371 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
372 if (!sctx)
373 goto nomem;
374 sctx->is_dev_replace = is_dev_replace;
375 sctx->pages_per_rd_bio = pages_per_rd_bio;
376 sctx->curr = -1;
377 sctx->dev_root = dev->dev_root;
378 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
379 struct scrub_bio *sbio;
380
381 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
382 if (!sbio)
383 goto nomem;
384 sctx->bios[i] = sbio;
385
386 sbio->index = i;
387 sbio->sctx = sctx;
388 sbio->page_count = 0;
389 sbio->work.func = scrub_bio_end_io_worker;
390
391 if (i != SCRUB_BIOS_PER_SCTX - 1)
392 sctx->bios[i]->next_free = i + 1;
393 else
394 sctx->bios[i]->next_free = -1;
395 }
396 sctx->first_free = 0;
397 sctx->nodesize = dev->dev_root->nodesize;
398 sctx->leafsize = dev->dev_root->leafsize;
399 sctx->sectorsize = dev->dev_root->sectorsize;
400 atomic_set(&sctx->bios_in_flight, 0);
401 atomic_set(&sctx->workers_pending, 0);
402 atomic_set(&sctx->cancel_req, 0);
403 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
404 INIT_LIST_HEAD(&sctx->csum_list);
405
406 spin_lock_init(&sctx->list_lock);
407 spin_lock_init(&sctx->stat_lock);
408 init_waitqueue_head(&sctx->list_wait);
409
410 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
411 fs_info->dev_replace.tgtdev, is_dev_replace);
412 if (ret) {
413 scrub_free_ctx(sctx);
414 return ERR_PTR(ret);
415 }
416 return sctx;
417
418nomem:
419 scrub_free_ctx(sctx);
420 return ERR_PTR(-ENOMEM);
421}
422
423static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
424 void *warn_ctx)
425{
426 u64 isize;
427 u32 nlink;
428 int ret;
429 int i;
430 struct extent_buffer *eb;
431 struct btrfs_inode_item *inode_item;
432 struct scrub_warning *swarn = warn_ctx;
433 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
434 struct inode_fs_paths *ipath = NULL;
435 struct btrfs_root *local_root;
436 struct btrfs_key root_key;
437
438 root_key.objectid = root;
439 root_key.type = BTRFS_ROOT_ITEM_KEY;
440 root_key.offset = (u64)-1;
441 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
442 if (IS_ERR(local_root)) {
443 ret = PTR_ERR(local_root);
444 goto err;
445 }
446
447 ret = inode_item_info(inum, 0, local_root, swarn->path);
448 if (ret) {
449 btrfs_release_path(swarn->path);
450 goto err;
451 }
452
453 eb = swarn->path->nodes[0];
454 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
455 struct btrfs_inode_item);
456 isize = btrfs_inode_size(eb, inode_item);
457 nlink = btrfs_inode_nlink(eb, inode_item);
458 btrfs_release_path(swarn->path);
459
460 ipath = init_ipath(4096, local_root, swarn->path);
461 if (IS_ERR(ipath)) {
462 ret = PTR_ERR(ipath);
463 ipath = NULL;
464 goto err;
465 }
466 ret = paths_from_inode(inum, ipath);
467
468 if (ret < 0)
469 goto err;
470
471
472
473
474
475 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
476 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
477 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
478 "length %llu, links %u (path: %s)\n", swarn->errstr,
479 swarn->logical, rcu_str_deref(swarn->dev->name),
480 (unsigned long long)swarn->sector, root, inum, offset,
481 min(isize - offset, (u64)PAGE_SIZE), nlink,
482 (char *)(unsigned long)ipath->fspath->val[i]);
483
484 free_ipath(ipath);
485 return 0;
486
487err:
488 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
489 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
490 "resolving failed with ret=%d\n", swarn->errstr,
491 swarn->logical, rcu_str_deref(swarn->dev->name),
492 (unsigned long long)swarn->sector, root, inum, offset, ret);
493
494 free_ipath(ipath);
495 return 0;
496}
497
498static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
499{
500 struct btrfs_device *dev;
501 struct btrfs_fs_info *fs_info;
502 struct btrfs_path *path;
503 struct btrfs_key found_key;
504 struct extent_buffer *eb;
505 struct btrfs_extent_item *ei;
506 struct scrub_warning swarn;
507 unsigned long ptr = 0;
508 u64 extent_item_pos;
509 u64 flags = 0;
510 u64 ref_root;
511 u32 item_size;
512 u8 ref_level;
513 const int bufsize = 4096;
514 int ret;
515
516 WARN_ON(sblock->page_count < 1);
517 dev = sblock->pagev[0]->dev;
518 fs_info = sblock->sctx->dev_root->fs_info;
519
520 path = btrfs_alloc_path();
521
522 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
523 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
524 swarn.sector = (sblock->pagev[0]->physical) >> 9;
525 swarn.logical = sblock->pagev[0]->logical;
526 swarn.errstr = errstr;
527 swarn.dev = NULL;
528 swarn.msg_bufsize = bufsize;
529 swarn.scratch_bufsize = bufsize;
530
531 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
532 goto out;
533
534 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
535 &flags);
536 if (ret < 0)
537 goto out;
538
539 extent_item_pos = swarn.logical - found_key.objectid;
540 swarn.extent_item_size = found_key.offset;
541
542 eb = path->nodes[0];
543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
544 item_size = btrfs_item_size_nr(eb, path->slots[0]);
545
546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
547 do {
548 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
549 &ref_root, &ref_level);
550 printk_in_rcu(KERN_WARNING
551 "btrfs: %s at logical %llu on dev %s, "
552 "sector %llu: metadata %s (level %d) in tree "
553 "%llu\n", errstr, swarn.logical,
554 rcu_str_deref(dev->name),
555 (unsigned long long)swarn.sector,
556 ref_level ? "node" : "leaf",
557 ret < 0 ? -1 : ref_level,
558 ret < 0 ? -1 : ref_root);
559 } while (ret != 1);
560 btrfs_release_path(path);
561 } else {
562 btrfs_release_path(path);
563 swarn.path = path;
564 swarn.dev = dev;
565 iterate_extent_inodes(fs_info, found_key.objectid,
566 extent_item_pos, 1,
567 scrub_print_warning_inode, &swarn);
568 }
569
570out:
571 btrfs_free_path(path);
572 kfree(swarn.scratch_buf);
573 kfree(swarn.msg_buf);
574}
575
576static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
577{
578 struct page *page = NULL;
579 unsigned long index;
580 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
581 int ret;
582 int corrected = 0;
583 struct btrfs_key key;
584 struct inode *inode = NULL;
585 struct btrfs_fs_info *fs_info;
586 u64 end = offset + PAGE_SIZE - 1;
587 struct btrfs_root *local_root;
588 int srcu_index;
589
590 key.objectid = root;
591 key.type = BTRFS_ROOT_ITEM_KEY;
592 key.offset = (u64)-1;
593
594 fs_info = fixup->root->fs_info;
595 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
596
597 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
598 if (IS_ERR(local_root)) {
599 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
600 return PTR_ERR(local_root);
601 }
602
603 key.type = BTRFS_INODE_ITEM_KEY;
604 key.objectid = inum;
605 key.offset = 0;
606 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
607 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
608 if (IS_ERR(inode))
609 return PTR_ERR(inode);
610
611 index = offset >> PAGE_CACHE_SHIFT;
612
613 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
614 if (!page) {
615 ret = -ENOMEM;
616 goto out;
617 }
618
619 if (PageUptodate(page)) {
620 if (PageDirty(page)) {
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637 ret = -EIO;
638 goto out;
639 }
640 fs_info = BTRFS_I(inode)->root->fs_info;
641 ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
642 fixup->logical, page,
643 fixup->mirror_num);
644 unlock_page(page);
645 corrected = !ret;
646 } else {
647
648
649
650
651
652 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
653 EXTENT_DAMAGED, GFP_NOFS);
654 if (ret) {
655
656 WARN_ON(ret > 0);
657 if (ret > 0)
658 ret = -EFAULT;
659 goto out;
660 }
661
662 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
663 btrfs_get_extent,
664 fixup->mirror_num);
665 wait_on_page_locked(page);
666
667 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
668 end, EXTENT_DAMAGED, 0, NULL);
669 if (!corrected)
670 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
671 EXTENT_DAMAGED, GFP_NOFS);
672 }
673
674out:
675 if (page)
676 put_page(page);
677 if (inode)
678 iput(inode);
679
680 if (ret < 0)
681 return ret;
682
683 if (ret == 0 && corrected) {
684
685
686
687
688 return 1;
689 }
690
691 return -EIO;
692}
693
694static void scrub_fixup_nodatasum(struct btrfs_work *work)
695{
696 int ret;
697 struct scrub_fixup_nodatasum *fixup;
698 struct scrub_ctx *sctx;
699 struct btrfs_trans_handle *trans = NULL;
700 struct btrfs_fs_info *fs_info;
701 struct btrfs_path *path;
702 int uncorrectable = 0;
703
704 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
705 sctx = fixup->sctx;
706 fs_info = fixup->root->fs_info;
707
708 path = btrfs_alloc_path();
709 if (!path) {
710 spin_lock(&sctx->stat_lock);
711 ++sctx->stat.malloc_errors;
712 spin_unlock(&sctx->stat_lock);
713 uncorrectable = 1;
714 goto out;
715 }
716
717 trans = btrfs_join_transaction(fixup->root);
718 if (IS_ERR(trans)) {
719 uncorrectable = 1;
720 goto out;
721 }
722
723
724
725
726
727
728
729
730
731
732 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
733 path, scrub_fixup_readpage,
734 fixup);
735 if (ret < 0) {
736 uncorrectable = 1;
737 goto out;
738 }
739 WARN_ON(ret != 1);
740
741 spin_lock(&sctx->stat_lock);
742 ++sctx->stat.corrected_errors;
743 spin_unlock(&sctx->stat_lock);
744
745out:
746 if (trans && !IS_ERR(trans))
747 btrfs_end_transaction(trans, fixup->root);
748 if (uncorrectable) {
749 spin_lock(&sctx->stat_lock);
750 ++sctx->stat.uncorrectable_errors;
751 spin_unlock(&sctx->stat_lock);
752 btrfs_dev_replace_stats_inc(
753 &sctx->dev_root->fs_info->dev_replace.
754 num_uncorrectable_read_errors);
755 printk_ratelimited_in_rcu(KERN_ERR
756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
757 (unsigned long long)fixup->logical,
758 rcu_str_deref(fixup->dev->name));
759 }
760
761 btrfs_free_path(path);
762 kfree(fixup);
763
764 scrub_pending_trans_workers_dec(sctx);
765}
766
767
768
769
770
771
772
773
774
775static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
776{
777 struct scrub_ctx *sctx = sblock_to_check->sctx;
778 struct btrfs_device *dev;
779 struct btrfs_fs_info *fs_info;
780 u64 length;
781 u64 logical;
782 u64 generation;
783 unsigned int failed_mirror_index;
784 unsigned int is_metadata;
785 unsigned int have_csum;
786 u8 *csum;
787 struct scrub_block *sblocks_for_recheck;
788 struct scrub_block *sblock_bad;
789 int ret;
790 int mirror_index;
791 int page_num;
792 int success;
793 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
794 DEFAULT_RATELIMIT_BURST);
795
796 BUG_ON(sblock_to_check->page_count < 1);
797 fs_info = sctx->dev_root->fs_info;
798 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
799
800
801
802
803
804 spin_lock(&sctx->stat_lock);
805 ++sctx->stat.super_errors;
806 spin_unlock(&sctx->stat_lock);
807 return 0;
808 }
809 length = sblock_to_check->page_count * PAGE_SIZE;
810 logical = sblock_to_check->pagev[0]->logical;
811 generation = sblock_to_check->pagev[0]->generation;
812 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
813 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
814 is_metadata = !(sblock_to_check->pagev[0]->flags &
815 BTRFS_EXTENT_FLAG_DATA);
816 have_csum = sblock_to_check->pagev[0]->have_csum;
817 csum = sblock_to_check->pagev[0]->csum;
818 dev = sblock_to_check->pagev[0]->dev;
819
820 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
821 sblocks_for_recheck = NULL;
822 goto nodatasum_case;
823 }
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
855 sizeof(*sblocks_for_recheck),
856 GFP_NOFS);
857 if (!sblocks_for_recheck) {
858 spin_lock(&sctx->stat_lock);
859 sctx->stat.malloc_errors++;
860 sctx->stat.read_errors++;
861 sctx->stat.uncorrectable_errors++;
862 spin_unlock(&sctx->stat_lock);
863 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
864 goto out;
865 }
866
867
868 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
869 logical, sblocks_for_recheck);
870 if (ret) {
871 spin_lock(&sctx->stat_lock);
872 sctx->stat.read_errors++;
873 sctx->stat.uncorrectable_errors++;
874 spin_unlock(&sctx->stat_lock);
875 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
876 goto out;
877 }
878 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
879 sblock_bad = sblocks_for_recheck + failed_mirror_index;
880
881
882 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
883 csum, generation, sctx->csum_size);
884
885 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
886 sblock_bad->no_io_error_seen) {
887
888
889
890
891
892
893
894
895 spin_lock(&sctx->stat_lock);
896 sctx->stat.unverified_errors++;
897 spin_unlock(&sctx->stat_lock);
898
899 if (sctx->is_dev_replace)
900 scrub_write_block_to_dev_replace(sblock_bad);
901 goto out;
902 }
903
904 if (!sblock_bad->no_io_error_seen) {
905 spin_lock(&sctx->stat_lock);
906 sctx->stat.read_errors++;
907 spin_unlock(&sctx->stat_lock);
908 if (__ratelimit(&_rs))
909 scrub_print_warning("i/o error", sblock_to_check);
910 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
911 } else if (sblock_bad->checksum_error) {
912 spin_lock(&sctx->stat_lock);
913 sctx->stat.csum_errors++;
914 spin_unlock(&sctx->stat_lock);
915 if (__ratelimit(&_rs))
916 scrub_print_warning("checksum error", sblock_to_check);
917 btrfs_dev_stat_inc_and_print(dev,
918 BTRFS_DEV_STAT_CORRUPTION_ERRS);
919 } else if (sblock_bad->header_error) {
920 spin_lock(&sctx->stat_lock);
921 sctx->stat.verify_errors++;
922 spin_unlock(&sctx->stat_lock);
923 if (__ratelimit(&_rs))
924 scrub_print_warning("checksum/header error",
925 sblock_to_check);
926 if (sblock_bad->generation_error)
927 btrfs_dev_stat_inc_and_print(dev,
928 BTRFS_DEV_STAT_GENERATION_ERRS);
929 else
930 btrfs_dev_stat_inc_and_print(dev,
931 BTRFS_DEV_STAT_CORRUPTION_ERRS);
932 }
933
934 if (sctx->readonly && !sctx->is_dev_replace)
935 goto did_not_correct_error;
936
937 if (!is_metadata && !have_csum) {
938 struct scrub_fixup_nodatasum *fixup_nodatasum;
939
940nodatasum_case:
941 WARN_ON(sctx->is_dev_replace);
942
943
944
945
946
947
948
949
950 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
951 if (!fixup_nodatasum)
952 goto did_not_correct_error;
953 fixup_nodatasum->sctx = sctx;
954 fixup_nodatasum->dev = dev;
955 fixup_nodatasum->logical = logical;
956 fixup_nodatasum->root = fs_info->extent_root;
957 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
958 scrub_pending_trans_workers_inc(sctx);
959 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
960 btrfs_queue_worker(&fs_info->scrub_workers,
961 &fixup_nodatasum->work);
962 goto out;
963 }
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980 for (mirror_index = 0;
981 mirror_index < BTRFS_MAX_MIRRORS &&
982 sblocks_for_recheck[mirror_index].page_count > 0;
983 mirror_index++) {
984 struct scrub_block *sblock_other;
985
986 if (mirror_index == failed_mirror_index)
987 continue;
988 sblock_other = sblocks_for_recheck + mirror_index;
989
990
991 scrub_recheck_block(fs_info, sblock_other, is_metadata,
992 have_csum, csum, generation,
993 sctx->csum_size);
994
995 if (!sblock_other->header_error &&
996 !sblock_other->checksum_error &&
997 sblock_other->no_io_error_seen) {
998 if (sctx->is_dev_replace) {
999 scrub_write_block_to_dev_replace(sblock_other);
1000 } else {
1001 int force_write = is_metadata || have_csum;
1002
1003 ret = scrub_repair_block_from_good_copy(
1004 sblock_bad, sblock_other,
1005 force_write);
1006 }
1007 if (0 == ret)
1008 goto corrected_error;
1009 }
1010 }
1011
1012
1013
1014
1015 if (sctx->is_dev_replace) {
1016 success = 1;
1017 for (page_num = 0; page_num < sblock_bad->page_count;
1018 page_num++) {
1019 int sub_success;
1020
1021 sub_success = 0;
1022 for (mirror_index = 0;
1023 mirror_index < BTRFS_MAX_MIRRORS &&
1024 sblocks_for_recheck[mirror_index].page_count > 0;
1025 mirror_index++) {
1026 struct scrub_block *sblock_other =
1027 sblocks_for_recheck + mirror_index;
1028 struct scrub_page *page_other =
1029 sblock_other->pagev[page_num];
1030
1031 if (!page_other->io_error) {
1032 ret = scrub_write_page_to_dev_replace(
1033 sblock_other, page_num);
1034 if (ret == 0) {
1035
1036 sub_success = 1;
1037 break;
1038 } else {
1039 btrfs_dev_replace_stats_inc(
1040 &sctx->dev_root->
1041 fs_info->dev_replace.
1042 num_write_errors);
1043 }
1044 }
1045 }
1046
1047 if (!sub_success) {
1048
1049
1050
1051
1052
1053
1054
1055 success = 0;
1056 ret = scrub_write_page_to_dev_replace(
1057 sblock_bad, page_num);
1058 if (ret)
1059 btrfs_dev_replace_stats_inc(
1060 &sctx->dev_root->fs_info->
1061 dev_replace.num_write_errors);
1062 }
1063 }
1064
1065 goto out;
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095 if (sblock_bad->no_io_error_seen)
1096 goto did_not_correct_error;
1097
1098 success = 1;
1099 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1100 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1101
1102 if (!page_bad->io_error)
1103 continue;
1104
1105 for (mirror_index = 0;
1106 mirror_index < BTRFS_MAX_MIRRORS &&
1107 sblocks_for_recheck[mirror_index].page_count > 0;
1108 mirror_index++) {
1109 struct scrub_block *sblock_other = sblocks_for_recheck +
1110 mirror_index;
1111 struct scrub_page *page_other = sblock_other->pagev[
1112 page_num];
1113
1114 if (!page_other->io_error) {
1115 ret = scrub_repair_page_from_good_copy(
1116 sblock_bad, sblock_other, page_num, 0);
1117 if (0 == ret) {
1118 page_bad->io_error = 0;
1119 break;
1120 }
1121 }
1122 }
1123
1124 if (page_bad->io_error) {
1125
1126 success = 0;
1127 }
1128 }
1129
1130 if (success) {
1131 if (is_metadata || have_csum) {
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141 scrub_recheck_block(fs_info, sblock_bad,
1142 is_metadata, have_csum, csum,
1143 generation, sctx->csum_size);
1144 if (!sblock_bad->header_error &&
1145 !sblock_bad->checksum_error &&
1146 sblock_bad->no_io_error_seen)
1147 goto corrected_error;
1148 else
1149 goto did_not_correct_error;
1150 } else {
1151corrected_error:
1152 spin_lock(&sctx->stat_lock);
1153 sctx->stat.corrected_errors++;
1154 spin_unlock(&sctx->stat_lock);
1155 printk_ratelimited_in_rcu(KERN_ERR
1156 "btrfs: fixed up error at logical %llu on dev %s\n",
1157 (unsigned long long)logical,
1158 rcu_str_deref(dev->name));
1159 }
1160 } else {
1161did_not_correct_error:
1162 spin_lock(&sctx->stat_lock);
1163 sctx->stat.uncorrectable_errors++;
1164 spin_unlock(&sctx->stat_lock);
1165 printk_ratelimited_in_rcu(KERN_ERR
1166 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
1167 (unsigned long long)logical,
1168 rcu_str_deref(dev->name));
1169 }
1170
1171out:
1172 if (sblocks_for_recheck) {
1173 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1174 mirror_index++) {
1175 struct scrub_block *sblock = sblocks_for_recheck +
1176 mirror_index;
1177 int page_index;
1178
1179 for (page_index = 0; page_index < sblock->page_count;
1180 page_index++) {
1181 sblock->pagev[page_index]->sblock = NULL;
1182 scrub_page_put(sblock->pagev[page_index]);
1183 }
1184 }
1185 kfree(sblocks_for_recheck);
1186 }
1187
1188 return 0;
1189}
1190
1191static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1192 struct btrfs_fs_info *fs_info,
1193 struct scrub_block *original_sblock,
1194 u64 length, u64 logical,
1195 struct scrub_block *sblocks_for_recheck)
1196{
1197 int page_index;
1198 int mirror_index;
1199 int ret;
1200
1201
1202
1203
1204
1205
1206
1207 page_index = 0;
1208 while (length > 0) {
1209 u64 sublen = min_t(u64, length, PAGE_SIZE);
1210 u64 mapped_length = sublen;
1211 struct btrfs_bio *bbio = NULL;
1212
1213
1214
1215
1216
1217 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
1218 &mapped_length, &bbio, 0);
1219 if (ret || !bbio || mapped_length < sublen) {
1220 kfree(bbio);
1221 return -EIO;
1222 }
1223
1224 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1225 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
1226 mirror_index++) {
1227 struct scrub_block *sblock;
1228 struct scrub_page *page;
1229
1230 if (mirror_index >= BTRFS_MAX_MIRRORS)
1231 continue;
1232
1233 sblock = sblocks_for_recheck + mirror_index;
1234 sblock->sctx = sctx;
1235 page = kzalloc(sizeof(*page), GFP_NOFS);
1236 if (!page) {
1237leave_nomem:
1238 spin_lock(&sctx->stat_lock);
1239 sctx->stat.malloc_errors++;
1240 spin_unlock(&sctx->stat_lock);
1241 kfree(bbio);
1242 return -ENOMEM;
1243 }
1244 scrub_page_get(page);
1245 sblock->pagev[page_index] = page;
1246 page->logical = logical;
1247 page->physical = bbio->stripes[mirror_index].physical;
1248 BUG_ON(page_index >= original_sblock->page_count);
1249 page->physical_for_dev_replace =
1250 original_sblock->pagev[page_index]->
1251 physical_for_dev_replace;
1252
1253 page->dev = bbio->stripes[mirror_index].dev;
1254 page->mirror_num = mirror_index + 1;
1255 sblock->page_count++;
1256 page->page = alloc_page(GFP_NOFS);
1257 if (!page->page)
1258 goto leave_nomem;
1259 }
1260 kfree(bbio);
1261 length -= sublen;
1262 logical += sublen;
1263 page_index++;
1264 }
1265
1266 return 0;
1267}
1268
1269
1270
1271
1272
1273
1274
1275
1276static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1277 struct scrub_block *sblock, int is_metadata,
1278 int have_csum, u8 *csum, u64 generation,
1279 u16 csum_size)
1280{
1281 int page_num;
1282
1283 sblock->no_io_error_seen = 1;
1284 sblock->header_error = 0;
1285 sblock->checksum_error = 0;
1286
1287 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1288 struct bio *bio;
1289 struct scrub_page *page = sblock->pagev[page_num];
1290 DECLARE_COMPLETION_ONSTACK(complete);
1291
1292 if (page->dev->bdev == NULL) {
1293 page->io_error = 1;
1294 sblock->no_io_error_seen = 0;
1295 continue;
1296 }
1297
1298 WARN_ON(!page->page);
1299 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1300 if (!bio) {
1301 page->io_error = 1;
1302 sblock->no_io_error_seen = 0;
1303 continue;
1304 }
1305 bio->bi_bdev = page->dev->bdev;
1306 bio->bi_sector = page->physical >> 9;
1307 bio->bi_end_io = scrub_complete_bio_end_io;
1308 bio->bi_private = &complete;
1309
1310 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1311 btrfsic_submit_bio(READ, bio);
1312
1313
1314 wait_for_completion(&complete);
1315
1316 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
1317 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1318 sblock->no_io_error_seen = 0;
1319 bio_put(bio);
1320 }
1321
1322 if (sblock->no_io_error_seen)
1323 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1324 have_csum, csum, generation,
1325 csum_size);
1326
1327 return;
1328}
1329
1330static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1331 struct scrub_block *sblock,
1332 int is_metadata, int have_csum,
1333 const u8 *csum, u64 generation,
1334 u16 csum_size)
1335{
1336 int page_num;
1337 u8 calculated_csum[BTRFS_CSUM_SIZE];
1338 u32 crc = ~(u32)0;
1339 void *mapped_buffer;
1340
1341 WARN_ON(!sblock->pagev[0]->page);
1342 if (is_metadata) {
1343 struct btrfs_header *h;
1344
1345 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1346 h = (struct btrfs_header *)mapped_buffer;
1347
1348 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
1349 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1350 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1351 BTRFS_UUID_SIZE)) {
1352 sblock->header_error = 1;
1353 } else if (generation != le64_to_cpu(h->generation)) {
1354 sblock->header_error = 1;
1355 sblock->generation_error = 1;
1356 }
1357 csum = h->csum;
1358 } else {
1359 if (!have_csum)
1360 return;
1361
1362 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1363 }
1364
1365 for (page_num = 0;;) {
1366 if (page_num == 0 && is_metadata)
1367 crc = btrfs_csum_data(
1368 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1369 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1370 else
1371 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1372
1373 kunmap_atomic(mapped_buffer);
1374 page_num++;
1375 if (page_num >= sblock->page_count)
1376 break;
1377 WARN_ON(!sblock->pagev[page_num]->page);
1378
1379 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1380 }
1381
1382 btrfs_csum_final(crc, calculated_csum);
1383 if (memcmp(calculated_csum, csum, csum_size))
1384 sblock->checksum_error = 1;
1385}
1386
1387static void scrub_complete_bio_end_io(struct bio *bio, int err)
1388{
1389 complete((struct completion *)bio->bi_private);
1390}
1391
1392static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1393 struct scrub_block *sblock_good,
1394 int force_write)
1395{
1396 int page_num;
1397 int ret = 0;
1398
1399 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1400 int ret_sub;
1401
1402 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1403 sblock_good,
1404 page_num,
1405 force_write);
1406 if (ret_sub)
1407 ret = ret_sub;
1408 }
1409
1410 return ret;
1411}
1412
1413static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1414 struct scrub_block *sblock_good,
1415 int page_num, int force_write)
1416{
1417 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1418 struct scrub_page *page_good = sblock_good->pagev[page_num];
1419
1420 BUG_ON(page_bad->page == NULL);
1421 BUG_ON(page_good->page == NULL);
1422 if (force_write || sblock_bad->header_error ||
1423 sblock_bad->checksum_error || page_bad->io_error) {
1424 struct bio *bio;
1425 int ret;
1426 DECLARE_COMPLETION_ONSTACK(complete);
1427
1428 if (!page_bad->dev->bdev) {
1429 printk_ratelimited(KERN_WARNING
1430 "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
1431 return -EIO;
1432 }
1433
1434 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1435 if (!bio)
1436 return -EIO;
1437 bio->bi_bdev = page_bad->dev->bdev;
1438 bio->bi_sector = page_bad->physical >> 9;
1439 bio->bi_end_io = scrub_complete_bio_end_io;
1440 bio->bi_private = &complete;
1441
1442 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1443 if (PAGE_SIZE != ret) {
1444 bio_put(bio);
1445 return -EIO;
1446 }
1447 btrfsic_submit_bio(WRITE, bio);
1448
1449
1450 wait_for_completion(&complete);
1451 if (!bio_flagged(bio, BIO_UPTODATE)) {
1452 btrfs_dev_stat_inc_and_print(page_bad->dev,
1453 BTRFS_DEV_STAT_WRITE_ERRS);
1454 btrfs_dev_replace_stats_inc(
1455 &sblock_bad->sctx->dev_root->fs_info->
1456 dev_replace.num_write_errors);
1457 bio_put(bio);
1458 return -EIO;
1459 }
1460 bio_put(bio);
1461 }
1462
1463 return 0;
1464}
1465
1466static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1467{
1468 int page_num;
1469
1470 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1471 int ret;
1472
1473 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1474 if (ret)
1475 btrfs_dev_replace_stats_inc(
1476 &sblock->sctx->dev_root->fs_info->dev_replace.
1477 num_write_errors);
1478 }
1479}
1480
1481static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1482 int page_num)
1483{
1484 struct scrub_page *spage = sblock->pagev[page_num];
1485
1486 BUG_ON(spage->page == NULL);
1487 if (spage->io_error) {
1488 void *mapped_buffer = kmap_atomic(spage->page);
1489
1490 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
1491 flush_dcache_page(spage->page);
1492 kunmap_atomic(mapped_buffer);
1493 }
1494 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1495}
1496
1497static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1498 struct scrub_page *spage)
1499{
1500 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1501 struct scrub_bio *sbio;
1502 int ret;
1503
1504 mutex_lock(&wr_ctx->wr_lock);
1505again:
1506 if (!wr_ctx->wr_curr_bio) {
1507 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1508 GFP_NOFS);
1509 if (!wr_ctx->wr_curr_bio) {
1510 mutex_unlock(&wr_ctx->wr_lock);
1511 return -ENOMEM;
1512 }
1513 wr_ctx->wr_curr_bio->sctx = sctx;
1514 wr_ctx->wr_curr_bio->page_count = 0;
1515 }
1516 sbio = wr_ctx->wr_curr_bio;
1517 if (sbio->page_count == 0) {
1518 struct bio *bio;
1519
1520 sbio->physical = spage->physical_for_dev_replace;
1521 sbio->logical = spage->logical;
1522 sbio->dev = wr_ctx->tgtdev;
1523 bio = sbio->bio;
1524 if (!bio) {
1525 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1526 if (!bio) {
1527 mutex_unlock(&wr_ctx->wr_lock);
1528 return -ENOMEM;
1529 }
1530 sbio->bio = bio;
1531 }
1532
1533 bio->bi_private = sbio;
1534 bio->bi_end_io = scrub_wr_bio_end_io;
1535 bio->bi_bdev = sbio->dev->bdev;
1536 bio->bi_sector = sbio->physical >> 9;
1537 sbio->err = 0;
1538 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1539 spage->physical_for_dev_replace ||
1540 sbio->logical + sbio->page_count * PAGE_SIZE !=
1541 spage->logical) {
1542 scrub_wr_submit(sctx);
1543 goto again;
1544 }
1545
1546 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1547 if (ret != PAGE_SIZE) {
1548 if (sbio->page_count < 1) {
1549 bio_put(sbio->bio);
1550 sbio->bio = NULL;
1551 mutex_unlock(&wr_ctx->wr_lock);
1552 return -EIO;
1553 }
1554 scrub_wr_submit(sctx);
1555 goto again;
1556 }
1557
1558 sbio->pagev[sbio->page_count] = spage;
1559 scrub_page_get(spage);
1560 sbio->page_count++;
1561 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1562 scrub_wr_submit(sctx);
1563 mutex_unlock(&wr_ctx->wr_lock);
1564
1565 return 0;
1566}
1567
1568static void scrub_wr_submit(struct scrub_ctx *sctx)
1569{
1570 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1571 struct scrub_bio *sbio;
1572
1573 if (!wr_ctx->wr_curr_bio)
1574 return;
1575
1576 sbio = wr_ctx->wr_curr_bio;
1577 wr_ctx->wr_curr_bio = NULL;
1578 WARN_ON(!sbio->bio->bi_bdev);
1579 scrub_pending_bio_inc(sctx);
1580
1581
1582
1583
1584 btrfsic_submit_bio(WRITE, sbio->bio);
1585}
1586
1587static void scrub_wr_bio_end_io(struct bio *bio, int err)
1588{
1589 struct scrub_bio *sbio = bio->bi_private;
1590 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1591
1592 sbio->err = err;
1593 sbio->bio = bio;
1594
1595 sbio->work.func = scrub_wr_bio_end_io_worker;
1596 btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
1597}
1598
1599static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1600{
1601 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1602 struct scrub_ctx *sctx = sbio->sctx;
1603 int i;
1604
1605 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1606 if (sbio->err) {
1607 struct btrfs_dev_replace *dev_replace =
1608 &sbio->sctx->dev_root->fs_info->dev_replace;
1609
1610 for (i = 0; i < sbio->page_count; i++) {
1611 struct scrub_page *spage = sbio->pagev[i];
1612
1613 spage->io_error = 1;
1614 btrfs_dev_replace_stats_inc(&dev_replace->
1615 num_write_errors);
1616 }
1617 }
1618
1619 for (i = 0; i < sbio->page_count; i++)
1620 scrub_page_put(sbio->pagev[i]);
1621
1622 bio_put(sbio->bio);
1623 kfree(sbio);
1624 scrub_pending_bio_dec(sctx);
1625}
1626
1627static int scrub_checksum(struct scrub_block *sblock)
1628{
1629 u64 flags;
1630 int ret;
1631
1632 WARN_ON(sblock->page_count < 1);
1633 flags = sblock->pagev[0]->flags;
1634 ret = 0;
1635 if (flags & BTRFS_EXTENT_FLAG_DATA)
1636 ret = scrub_checksum_data(sblock);
1637 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1638 ret = scrub_checksum_tree_block(sblock);
1639 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1640 (void)scrub_checksum_super(sblock);
1641 else
1642 WARN_ON(1);
1643 if (ret)
1644 scrub_handle_errored_block(sblock);
1645
1646 return ret;
1647}
1648
1649static int scrub_checksum_data(struct scrub_block *sblock)
1650{
1651 struct scrub_ctx *sctx = sblock->sctx;
1652 u8 csum[BTRFS_CSUM_SIZE];
1653 u8 *on_disk_csum;
1654 struct page *page;
1655 void *buffer;
1656 u32 crc = ~(u32)0;
1657 int fail = 0;
1658 u64 len;
1659 int index;
1660
1661 BUG_ON(sblock->page_count < 1);
1662 if (!sblock->pagev[0]->have_csum)
1663 return 0;
1664
1665 on_disk_csum = sblock->pagev[0]->csum;
1666 page = sblock->pagev[0]->page;
1667 buffer = kmap_atomic(page);
1668
1669 len = sctx->sectorsize;
1670 index = 0;
1671 for (;;) {
1672 u64 l = min_t(u64, len, PAGE_SIZE);
1673
1674 crc = btrfs_csum_data(buffer, crc, l);
1675 kunmap_atomic(buffer);
1676 len -= l;
1677 if (len == 0)
1678 break;
1679 index++;
1680 BUG_ON(index >= sblock->page_count);
1681 BUG_ON(!sblock->pagev[index]->page);
1682 page = sblock->pagev[index]->page;
1683 buffer = kmap_atomic(page);
1684 }
1685
1686 btrfs_csum_final(crc, csum);
1687 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1688 fail = 1;
1689
1690 return fail;
1691}
1692
1693static int scrub_checksum_tree_block(struct scrub_block *sblock)
1694{
1695 struct scrub_ctx *sctx = sblock->sctx;
1696 struct btrfs_header *h;
1697 struct btrfs_root *root = sctx->dev_root;
1698 struct btrfs_fs_info *fs_info = root->fs_info;
1699 u8 calculated_csum[BTRFS_CSUM_SIZE];
1700 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1701 struct page *page;
1702 void *mapped_buffer;
1703 u64 mapped_size;
1704 void *p;
1705 u32 crc = ~(u32)0;
1706 int fail = 0;
1707 int crc_fail = 0;
1708 u64 len;
1709 int index;
1710
1711 BUG_ON(sblock->page_count < 1);
1712 page = sblock->pagev[0]->page;
1713 mapped_buffer = kmap_atomic(page);
1714 h = (struct btrfs_header *)mapped_buffer;
1715 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1716
1717
1718
1719
1720
1721
1722
1723 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
1724 ++fail;
1725
1726 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
1727 ++fail;
1728
1729 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1730 ++fail;
1731
1732 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1733 BTRFS_UUID_SIZE))
1734 ++fail;
1735
1736 WARN_ON(sctx->nodesize != sctx->leafsize);
1737 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1738 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1739 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1740 index = 0;
1741 for (;;) {
1742 u64 l = min_t(u64, len, mapped_size);
1743
1744 crc = btrfs_csum_data(p, crc, l);
1745 kunmap_atomic(mapped_buffer);
1746 len -= l;
1747 if (len == 0)
1748 break;
1749 index++;
1750 BUG_ON(index >= sblock->page_count);
1751 BUG_ON(!sblock->pagev[index]->page);
1752 page = sblock->pagev[index]->page;
1753 mapped_buffer = kmap_atomic(page);
1754 mapped_size = PAGE_SIZE;
1755 p = mapped_buffer;
1756 }
1757
1758 btrfs_csum_final(crc, calculated_csum);
1759 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1760 ++crc_fail;
1761
1762 return fail || crc_fail;
1763}
1764
1765static int scrub_checksum_super(struct scrub_block *sblock)
1766{
1767 struct btrfs_super_block *s;
1768 struct scrub_ctx *sctx = sblock->sctx;
1769 struct btrfs_root *root = sctx->dev_root;
1770 struct btrfs_fs_info *fs_info = root->fs_info;
1771 u8 calculated_csum[BTRFS_CSUM_SIZE];
1772 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1773 struct page *page;
1774 void *mapped_buffer;
1775 u64 mapped_size;
1776 void *p;
1777 u32 crc = ~(u32)0;
1778 int fail_gen = 0;
1779 int fail_cor = 0;
1780 u64 len;
1781 int index;
1782
1783 BUG_ON(sblock->page_count < 1);
1784 page = sblock->pagev[0]->page;
1785 mapped_buffer = kmap_atomic(page);
1786 s = (struct btrfs_super_block *)mapped_buffer;
1787 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1788
1789 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
1790 ++fail_cor;
1791
1792 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
1793 ++fail_gen;
1794
1795 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1796 ++fail_cor;
1797
1798 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1799 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1800 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1801 index = 0;
1802 for (;;) {
1803 u64 l = min_t(u64, len, mapped_size);
1804
1805 crc = btrfs_csum_data(p, crc, l);
1806 kunmap_atomic(mapped_buffer);
1807 len -= l;
1808 if (len == 0)
1809 break;
1810 index++;
1811 BUG_ON(index >= sblock->page_count);
1812 BUG_ON(!sblock->pagev[index]->page);
1813 page = sblock->pagev[index]->page;
1814 mapped_buffer = kmap_atomic(page);
1815 mapped_size = PAGE_SIZE;
1816 p = mapped_buffer;
1817 }
1818
1819 btrfs_csum_final(crc, calculated_csum);
1820 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1821 ++fail_cor;
1822
1823 if (fail_cor + fail_gen) {
1824
1825
1826
1827
1828
1829 spin_lock(&sctx->stat_lock);
1830 ++sctx->stat.super_errors;
1831 spin_unlock(&sctx->stat_lock);
1832 if (fail_cor)
1833 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1834 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1835 else
1836 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1837 BTRFS_DEV_STAT_GENERATION_ERRS);
1838 }
1839
1840 return fail_cor + fail_gen;
1841}
1842
1843static void scrub_block_get(struct scrub_block *sblock)
1844{
1845 atomic_inc(&sblock->ref_count);
1846}
1847
1848static void scrub_block_put(struct scrub_block *sblock)
1849{
1850 if (atomic_dec_and_test(&sblock->ref_count)) {
1851 int i;
1852
1853 for (i = 0; i < sblock->page_count; i++)
1854 scrub_page_put(sblock->pagev[i]);
1855 kfree(sblock);
1856 }
1857}
1858
1859static void scrub_page_get(struct scrub_page *spage)
1860{
1861 atomic_inc(&spage->ref_count);
1862}
1863
1864static void scrub_page_put(struct scrub_page *spage)
1865{
1866 if (atomic_dec_and_test(&spage->ref_count)) {
1867 if (spage->page)
1868 __free_page(spage->page);
1869 kfree(spage);
1870 }
1871}
1872
1873static void scrub_submit(struct scrub_ctx *sctx)
1874{
1875 struct scrub_bio *sbio;
1876
1877 if (sctx->curr == -1)
1878 return;
1879
1880 sbio = sctx->bios[sctx->curr];
1881 sctx->curr = -1;
1882 scrub_pending_bio_inc(sctx);
1883
1884 if (!sbio->bio->bi_bdev) {
1885
1886
1887
1888
1889
1890
1891
1892 printk_ratelimited(KERN_WARNING
1893 "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
1894 bio_endio(sbio->bio, -EIO);
1895 } else {
1896 btrfsic_submit_bio(READ, sbio->bio);
1897 }
1898}
1899
1900static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
1901 struct scrub_page *spage)
1902{
1903 struct scrub_block *sblock = spage->sblock;
1904 struct scrub_bio *sbio;
1905 int ret;
1906
1907again:
1908
1909
1910
1911 while (sctx->curr == -1) {
1912 spin_lock(&sctx->list_lock);
1913 sctx->curr = sctx->first_free;
1914 if (sctx->curr != -1) {
1915 sctx->first_free = sctx->bios[sctx->curr]->next_free;
1916 sctx->bios[sctx->curr]->next_free = -1;
1917 sctx->bios[sctx->curr]->page_count = 0;
1918 spin_unlock(&sctx->list_lock);
1919 } else {
1920 spin_unlock(&sctx->list_lock);
1921 wait_event(sctx->list_wait, sctx->first_free != -1);
1922 }
1923 }
1924 sbio = sctx->bios[sctx->curr];
1925 if (sbio->page_count == 0) {
1926 struct bio *bio;
1927
1928 sbio->physical = spage->physical;
1929 sbio->logical = spage->logical;
1930 sbio->dev = spage->dev;
1931 bio = sbio->bio;
1932 if (!bio) {
1933 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1934 if (!bio)
1935 return -ENOMEM;
1936 sbio->bio = bio;
1937 }
1938
1939 bio->bi_private = sbio;
1940 bio->bi_end_io = scrub_bio_end_io;
1941 bio->bi_bdev = sbio->dev->bdev;
1942 bio->bi_sector = sbio->physical >> 9;
1943 sbio->err = 0;
1944 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1945 spage->physical ||
1946 sbio->logical + sbio->page_count * PAGE_SIZE !=
1947 spage->logical ||
1948 sbio->dev != spage->dev) {
1949 scrub_submit(sctx);
1950 goto again;
1951 }
1952
1953 sbio->pagev[sbio->page_count] = spage;
1954 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1955 if (ret != PAGE_SIZE) {
1956 if (sbio->page_count < 1) {
1957 bio_put(sbio->bio);
1958 sbio->bio = NULL;
1959 return -EIO;
1960 }
1961 scrub_submit(sctx);
1962 goto again;
1963 }
1964
1965 scrub_block_get(sblock);
1966 atomic_inc(&sblock->outstanding_pages);
1967 sbio->page_count++;
1968 if (sbio->page_count == sctx->pages_per_rd_bio)
1969 scrub_submit(sctx);
1970
1971 return 0;
1972}
1973
1974static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1975 u64 physical, struct btrfs_device *dev, u64 flags,
1976 u64 gen, int mirror_num, u8 *csum, int force,
1977 u64 physical_for_dev_replace)
1978{
1979 struct scrub_block *sblock;
1980 int index;
1981
1982 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1983 if (!sblock) {
1984 spin_lock(&sctx->stat_lock);
1985 sctx->stat.malloc_errors++;
1986 spin_unlock(&sctx->stat_lock);
1987 return -ENOMEM;
1988 }
1989
1990
1991
1992 atomic_set(&sblock->ref_count, 1);
1993 sblock->sctx = sctx;
1994 sblock->no_io_error_seen = 1;
1995
1996 for (index = 0; len > 0; index++) {
1997 struct scrub_page *spage;
1998 u64 l = min_t(u64, len, PAGE_SIZE);
1999
2000 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2001 if (!spage) {
2002leave_nomem:
2003 spin_lock(&sctx->stat_lock);
2004 sctx->stat.malloc_errors++;
2005 spin_unlock(&sctx->stat_lock);
2006 scrub_block_put(sblock);
2007 return -ENOMEM;
2008 }
2009 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2010 scrub_page_get(spage);
2011 sblock->pagev[index] = spage;
2012 spage->sblock = sblock;
2013 spage->dev = dev;
2014 spage->flags = flags;
2015 spage->generation = gen;
2016 spage->logical = logical;
2017 spage->physical = physical;
2018 spage->physical_for_dev_replace = physical_for_dev_replace;
2019 spage->mirror_num = mirror_num;
2020 if (csum) {
2021 spage->have_csum = 1;
2022 memcpy(spage->csum, csum, sctx->csum_size);
2023 } else {
2024 spage->have_csum = 0;
2025 }
2026 sblock->page_count++;
2027 spage->page = alloc_page(GFP_NOFS);
2028 if (!spage->page)
2029 goto leave_nomem;
2030 len -= l;
2031 logical += l;
2032 physical += l;
2033 physical_for_dev_replace += l;
2034 }
2035
2036 WARN_ON(sblock->page_count == 0);
2037 for (index = 0; index < sblock->page_count; index++) {
2038 struct scrub_page *spage = sblock->pagev[index];
2039 int ret;
2040
2041 ret = scrub_add_page_to_rd_bio(sctx, spage);
2042 if (ret) {
2043 scrub_block_put(sblock);
2044 return ret;
2045 }
2046 }
2047
2048 if (force)
2049 scrub_submit(sctx);
2050
2051
2052 scrub_block_put(sblock);
2053 return 0;
2054}
2055
2056static void scrub_bio_end_io(struct bio *bio, int err)
2057{
2058 struct scrub_bio *sbio = bio->bi_private;
2059 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2060
2061 sbio->err = err;
2062 sbio->bio = bio;
2063
2064 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
2065}
2066
2067static void scrub_bio_end_io_worker(struct btrfs_work *work)
2068{
2069 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2070 struct scrub_ctx *sctx = sbio->sctx;
2071 int i;
2072
2073 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2074 if (sbio->err) {
2075 for (i = 0; i < sbio->page_count; i++) {
2076 struct scrub_page *spage = sbio->pagev[i];
2077
2078 spage->io_error = 1;
2079 spage->sblock->no_io_error_seen = 0;
2080 }
2081 }
2082
2083
2084 for (i = 0; i < sbio->page_count; i++) {
2085 struct scrub_page *spage = sbio->pagev[i];
2086 struct scrub_block *sblock = spage->sblock;
2087
2088 if (atomic_dec_and_test(&sblock->outstanding_pages))
2089 scrub_block_complete(sblock);
2090 scrub_block_put(sblock);
2091 }
2092
2093 bio_put(sbio->bio);
2094 sbio->bio = NULL;
2095 spin_lock(&sctx->list_lock);
2096 sbio->next_free = sctx->first_free;
2097 sctx->first_free = sbio->index;
2098 spin_unlock(&sctx->list_lock);
2099
2100 if (sctx->is_dev_replace &&
2101 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2102 mutex_lock(&sctx->wr_ctx.wr_lock);
2103 scrub_wr_submit(sctx);
2104 mutex_unlock(&sctx->wr_ctx.wr_lock);
2105 }
2106
2107 scrub_pending_bio_dec(sctx);
2108}
2109
2110static void scrub_block_complete(struct scrub_block *sblock)
2111{
2112 if (!sblock->no_io_error_seen) {
2113 scrub_handle_errored_block(sblock);
2114 } else {
2115
2116
2117
2118
2119
2120 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
2121 scrub_write_block_to_dev_replace(sblock);
2122 }
2123}
2124
2125static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
2126 u8 *csum)
2127{
2128 struct btrfs_ordered_sum *sum = NULL;
2129 int ret = 0;
2130 unsigned long i;
2131 unsigned long num_sectors;
2132
2133 while (!list_empty(&sctx->csum_list)) {
2134 sum = list_first_entry(&sctx->csum_list,
2135 struct btrfs_ordered_sum, list);
2136 if (sum->bytenr > logical)
2137 return 0;
2138 if (sum->bytenr + sum->len > logical)
2139 break;
2140
2141 ++sctx->stat.csum_discards;
2142 list_del(&sum->list);
2143 kfree(sum);
2144 sum = NULL;
2145 }
2146 if (!sum)
2147 return 0;
2148
2149 num_sectors = sum->len / sctx->sectorsize;
2150 for (i = 0; i < num_sectors; ++i) {
2151 if (sum->sums[i].bytenr == logical) {
2152 memcpy(csum, &sum->sums[i].sum, sctx->csum_size);
2153 ret = 1;
2154 break;
2155 }
2156 }
2157 if (ret && i == num_sectors - 1) {
2158 list_del(&sum->list);
2159 kfree(sum);
2160 }
2161 return ret;
2162}
2163
2164
2165static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2166 u64 physical, struct btrfs_device *dev, u64 flags,
2167 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2168{
2169 int ret;
2170 u8 csum[BTRFS_CSUM_SIZE];
2171 u32 blocksize;
2172
2173 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2174 blocksize = sctx->sectorsize;
2175 spin_lock(&sctx->stat_lock);
2176 sctx->stat.data_extents_scrubbed++;
2177 sctx->stat.data_bytes_scrubbed += len;
2178 spin_unlock(&sctx->stat_lock);
2179 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2180 WARN_ON(sctx->nodesize != sctx->leafsize);
2181 blocksize = sctx->nodesize;
2182 spin_lock(&sctx->stat_lock);
2183 sctx->stat.tree_extents_scrubbed++;
2184 sctx->stat.tree_bytes_scrubbed += len;
2185 spin_unlock(&sctx->stat_lock);
2186 } else {
2187 blocksize = sctx->sectorsize;
2188 WARN_ON(1);
2189 }
2190
2191 while (len) {
2192 u64 l = min_t(u64, len, blocksize);
2193 int have_csum = 0;
2194
2195 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2196
2197 have_csum = scrub_find_csum(sctx, logical, l, csum);
2198 if (have_csum == 0)
2199 ++sctx->stat.no_csum;
2200 if (sctx->is_dev_replace && !have_csum) {
2201 ret = copy_nocow_pages(sctx, logical, l,
2202 mirror_num,
2203 physical_for_dev_replace);
2204 goto behind_scrub_pages;
2205 }
2206 }
2207 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2208 mirror_num, have_csum ? csum : NULL, 0,
2209 physical_for_dev_replace);
2210behind_scrub_pages:
2211 if (ret)
2212 return ret;
2213 len -= l;
2214 logical += l;
2215 physical += l;
2216 physical_for_dev_replace += l;
2217 }
2218 return 0;
2219}
2220
2221static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2222 struct map_lookup *map,
2223 struct btrfs_device *scrub_dev,
2224 int num, u64 base, u64 length,
2225 int is_dev_replace)
2226{
2227 struct btrfs_path *path;
2228 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2229 struct btrfs_root *root = fs_info->extent_root;
2230 struct btrfs_root *csum_root = fs_info->csum_root;
2231 struct btrfs_extent_item *extent;
2232 struct blk_plug plug;
2233 u64 flags;
2234 int ret;
2235 int slot;
2236 u64 nstripes;
2237 struct extent_buffer *l;
2238 struct btrfs_key key;
2239 u64 physical;
2240 u64 logical;
2241 u64 logic_end;
2242 u64 generation;
2243 int mirror_num;
2244 struct reada_control *reada1;
2245 struct reada_control *reada2;
2246 struct btrfs_key key_start;
2247 struct btrfs_key key_end;
2248 u64 increment = map->stripe_len;
2249 u64 offset;
2250 u64 extent_logical;
2251 u64 extent_physical;
2252 u64 extent_len;
2253 struct btrfs_device *extent_dev;
2254 int extent_mirror_num;
2255 int stop_loop;
2256
2257 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2258 BTRFS_BLOCK_GROUP_RAID6)) {
2259 if (num >= nr_data_stripes(map)) {
2260 return 0;
2261 }
2262 }
2263
2264 nstripes = length;
2265 offset = 0;
2266 do_div(nstripes, map->stripe_len);
2267 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
2268 offset = map->stripe_len * num;
2269 increment = map->stripe_len * map->num_stripes;
2270 mirror_num = 1;
2271 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2272 int factor = map->num_stripes / map->sub_stripes;
2273 offset = map->stripe_len * (num / map->sub_stripes);
2274 increment = map->stripe_len * factor;
2275 mirror_num = num % map->sub_stripes + 1;
2276 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2277 increment = map->stripe_len;
2278 mirror_num = num % map->num_stripes + 1;
2279 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2280 increment = map->stripe_len;
2281 mirror_num = num % map->num_stripes + 1;
2282 } else {
2283 increment = map->stripe_len;
2284 mirror_num = 1;
2285 }
2286
2287 path = btrfs_alloc_path();
2288 if (!path)
2289 return -ENOMEM;
2290
2291
2292
2293
2294
2295
2296 path->search_commit_root = 1;
2297 path->skip_locking = 1;
2298
2299
2300
2301
2302
2303
2304 logical = base + offset;
2305
2306 wait_event(sctx->list_wait,
2307 atomic_read(&sctx->bios_in_flight) == 0);
2308 atomic_inc(&fs_info->scrubs_paused);
2309 wake_up(&fs_info->scrub_pause_wait);
2310
2311
2312 key_start.objectid = logical;
2313 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2314 key_start.offset = (u64)0;
2315 key_end.objectid = base + offset + nstripes * increment;
2316 key_end.type = BTRFS_METADATA_ITEM_KEY;
2317 key_end.offset = (u64)-1;
2318 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2319
2320 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2321 key_start.type = BTRFS_EXTENT_CSUM_KEY;
2322 key_start.offset = logical;
2323 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2324 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2325 key_end.offset = base + offset + nstripes * increment;
2326 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2327
2328 if (!IS_ERR(reada1))
2329 btrfs_reada_wait(reada1);
2330 if (!IS_ERR(reada2))
2331 btrfs_reada_wait(reada2);
2332
2333 mutex_lock(&fs_info->scrub_lock);
2334 while (atomic_read(&fs_info->scrub_pause_req)) {
2335 mutex_unlock(&fs_info->scrub_lock);
2336 wait_event(fs_info->scrub_pause_wait,
2337 atomic_read(&fs_info->scrub_pause_req) == 0);
2338 mutex_lock(&fs_info->scrub_lock);
2339 }
2340 atomic_dec(&fs_info->scrubs_paused);
2341 mutex_unlock(&fs_info->scrub_lock);
2342 wake_up(&fs_info->scrub_pause_wait);
2343
2344
2345
2346
2347
2348 blk_start_plug(&plug);
2349
2350
2351
2352
2353 logical = base + offset;
2354 physical = map->stripes[num].physical;
2355 logic_end = logical + increment * nstripes;
2356 ret = 0;
2357 while (logical < logic_end) {
2358
2359
2360
2361 if (atomic_read(&fs_info->scrub_cancel_req) ||
2362 atomic_read(&sctx->cancel_req)) {
2363 ret = -ECANCELED;
2364 goto out;
2365 }
2366
2367
2368
2369 if (atomic_read(&fs_info->scrub_pause_req)) {
2370
2371 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2372 scrub_submit(sctx);
2373 mutex_lock(&sctx->wr_ctx.wr_lock);
2374 scrub_wr_submit(sctx);
2375 mutex_unlock(&sctx->wr_ctx.wr_lock);
2376 wait_event(sctx->list_wait,
2377 atomic_read(&sctx->bios_in_flight) == 0);
2378 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2379 atomic_inc(&fs_info->scrubs_paused);
2380 wake_up(&fs_info->scrub_pause_wait);
2381 mutex_lock(&fs_info->scrub_lock);
2382 while (atomic_read(&fs_info->scrub_pause_req)) {
2383 mutex_unlock(&fs_info->scrub_lock);
2384 wait_event(fs_info->scrub_pause_wait,
2385 atomic_read(&fs_info->scrub_pause_req) == 0);
2386 mutex_lock(&fs_info->scrub_lock);
2387 }
2388 atomic_dec(&fs_info->scrubs_paused);
2389 mutex_unlock(&fs_info->scrub_lock);
2390 wake_up(&fs_info->scrub_pause_wait);
2391 }
2392
2393 key.objectid = logical;
2394 key.type = BTRFS_EXTENT_ITEM_KEY;
2395 key.offset = (u64)-1;
2396
2397 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2398 if (ret < 0)
2399 goto out;
2400
2401 if (ret > 0) {
2402 ret = btrfs_previous_item(root, path, 0,
2403 BTRFS_EXTENT_ITEM_KEY);
2404 if (ret < 0)
2405 goto out;
2406 if (ret > 0) {
2407
2408
2409 btrfs_release_path(path);
2410 ret = btrfs_search_slot(NULL, root, &key,
2411 path, 0, 0);
2412 if (ret < 0)
2413 goto out;
2414 }
2415 }
2416
2417 stop_loop = 0;
2418 while (1) {
2419 u64 bytes;
2420
2421 l = path->nodes[0];
2422 slot = path->slots[0];
2423 if (slot >= btrfs_header_nritems(l)) {
2424 ret = btrfs_next_leaf(root, path);
2425 if (ret == 0)
2426 continue;
2427 if (ret < 0)
2428 goto out;
2429
2430 stop_loop = 1;
2431 break;
2432 }
2433 btrfs_item_key_to_cpu(l, &key, slot);
2434
2435 if (key.type == BTRFS_METADATA_ITEM_KEY)
2436 bytes = root->leafsize;
2437 else
2438 bytes = key.offset;
2439
2440 if (key.objectid + bytes <= logical)
2441 goto next;
2442
2443 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2444 key.type != BTRFS_METADATA_ITEM_KEY)
2445 goto next;
2446
2447 if (key.objectid >= logical + map->stripe_len) {
2448
2449 if (key.objectid >= logic_end)
2450 stop_loop = 1;
2451 break;
2452 }
2453
2454 extent = btrfs_item_ptr(l, slot,
2455 struct btrfs_extent_item);
2456 flags = btrfs_extent_flags(l, extent);
2457 generation = btrfs_extent_generation(l, extent);
2458
2459 if (key.objectid < logical &&
2460 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2461 printk(KERN_ERR
2462 "btrfs scrub: tree block %llu spanning "
2463 "stripes, ignored. logical=%llu\n",
2464 (unsigned long long)key.objectid,
2465 (unsigned long long)logical);
2466 goto next;
2467 }
2468
2469again:
2470 extent_logical = key.objectid;
2471 extent_len = bytes;
2472
2473
2474
2475
2476 if (extent_logical < logical) {
2477 extent_len -= logical - extent_logical;
2478 extent_logical = logical;
2479 }
2480 if (extent_logical + extent_len >
2481 logical + map->stripe_len) {
2482 extent_len = logical + map->stripe_len -
2483 extent_logical;
2484 }
2485
2486 extent_physical = extent_logical - logical + physical;
2487 extent_dev = scrub_dev;
2488 extent_mirror_num = mirror_num;
2489 if (is_dev_replace)
2490 scrub_remap_extent(fs_info, extent_logical,
2491 extent_len, &extent_physical,
2492 &extent_dev,
2493 &extent_mirror_num);
2494
2495 ret = btrfs_lookup_csums_range(csum_root, logical,
2496 logical + map->stripe_len - 1,
2497 &sctx->csum_list, 1);
2498 if (ret)
2499 goto out;
2500
2501 ret = scrub_extent(sctx, extent_logical, extent_len,
2502 extent_physical, extent_dev, flags,
2503 generation, extent_mirror_num,
2504 extent_physical);
2505 if (ret)
2506 goto out;
2507
2508 if (extent_logical + extent_len <
2509 key.objectid + bytes) {
2510 logical += increment;
2511 physical += map->stripe_len;
2512
2513 if (logical < key.objectid + bytes) {
2514 cond_resched();
2515 goto again;
2516 }
2517
2518 if (logical >= logic_end) {
2519 stop_loop = 1;
2520 break;
2521 }
2522 }
2523next:
2524 path->slots[0]++;
2525 }
2526 btrfs_release_path(path);
2527 logical += increment;
2528 physical += map->stripe_len;
2529 spin_lock(&sctx->stat_lock);
2530 if (stop_loop)
2531 sctx->stat.last_physical = map->stripes[num].physical +
2532 length;
2533 else
2534 sctx->stat.last_physical = physical;
2535 spin_unlock(&sctx->stat_lock);
2536 if (stop_loop)
2537 break;
2538 }
2539out:
2540
2541 scrub_submit(sctx);
2542 mutex_lock(&sctx->wr_ctx.wr_lock);
2543 scrub_wr_submit(sctx);
2544 mutex_unlock(&sctx->wr_ctx.wr_lock);
2545
2546 blk_finish_plug(&plug);
2547 btrfs_free_path(path);
2548 return ret < 0 ? ret : 0;
2549}
2550
2551static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
2552 struct btrfs_device *scrub_dev,
2553 u64 chunk_tree, u64 chunk_objectid,
2554 u64 chunk_offset, u64 length,
2555 u64 dev_offset, int is_dev_replace)
2556{
2557 struct btrfs_mapping_tree *map_tree =
2558 &sctx->dev_root->fs_info->mapping_tree;
2559 struct map_lookup *map;
2560 struct extent_map *em;
2561 int i;
2562 int ret = 0;
2563
2564 read_lock(&map_tree->map_tree.lock);
2565 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2566 read_unlock(&map_tree->map_tree.lock);
2567
2568 if (!em)
2569 return -EINVAL;
2570
2571 map = (struct map_lookup *)em->bdev;
2572 if (em->start != chunk_offset)
2573 goto out;
2574
2575 if (em->len < length)
2576 goto out;
2577
2578 for (i = 0; i < map->num_stripes; ++i) {
2579 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
2580 map->stripes[i].physical == dev_offset) {
2581 ret = scrub_stripe(sctx, map, scrub_dev, i,
2582 chunk_offset, length,
2583 is_dev_replace);
2584 if (ret)
2585 goto out;
2586 }
2587 }
2588out:
2589 free_extent_map(em);
2590
2591 return ret;
2592}
2593
2594static noinline_for_stack
2595int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2596 struct btrfs_device *scrub_dev, u64 start, u64 end,
2597 int is_dev_replace)
2598{
2599 struct btrfs_dev_extent *dev_extent = NULL;
2600 struct btrfs_path *path;
2601 struct btrfs_root *root = sctx->dev_root;
2602 struct btrfs_fs_info *fs_info = root->fs_info;
2603 u64 length;
2604 u64 chunk_tree;
2605 u64 chunk_objectid;
2606 u64 chunk_offset;
2607 int ret;
2608 int slot;
2609 struct extent_buffer *l;
2610 struct btrfs_key key;
2611 struct btrfs_key found_key;
2612 struct btrfs_block_group_cache *cache;
2613 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
2614
2615 path = btrfs_alloc_path();
2616 if (!path)
2617 return -ENOMEM;
2618
2619 path->reada = 2;
2620 path->search_commit_root = 1;
2621 path->skip_locking = 1;
2622
2623 key.objectid = scrub_dev->devid;
2624 key.offset = 0ull;
2625 key.type = BTRFS_DEV_EXTENT_KEY;
2626
2627 while (1) {
2628 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2629 if (ret < 0)
2630 break;
2631 if (ret > 0) {
2632 if (path->slots[0] >=
2633 btrfs_header_nritems(path->nodes[0])) {
2634 ret = btrfs_next_leaf(root, path);
2635 if (ret)
2636 break;
2637 }
2638 }
2639
2640 l = path->nodes[0];
2641 slot = path->slots[0];
2642
2643 btrfs_item_key_to_cpu(l, &found_key, slot);
2644
2645 if (found_key.objectid != scrub_dev->devid)
2646 break;
2647
2648 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
2649 break;
2650
2651 if (found_key.offset >= end)
2652 break;
2653
2654 if (found_key.offset < key.offset)
2655 break;
2656
2657 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2658 length = btrfs_dev_extent_length(l, dev_extent);
2659
2660 if (found_key.offset + length <= start) {
2661 key.offset = found_key.offset + length;
2662 btrfs_release_path(path);
2663 continue;
2664 }
2665
2666 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2667 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2668 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2669
2670
2671
2672
2673
2674 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2675 if (!cache) {
2676 ret = -ENOENT;
2677 break;
2678 }
2679 dev_replace->cursor_right = found_key.offset + length;
2680 dev_replace->cursor_left = found_key.offset;
2681 dev_replace->item_needs_writeback = 1;
2682 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
2683 chunk_offset, length, found_key.offset,
2684 is_dev_replace);
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2697 scrub_submit(sctx);
2698 mutex_lock(&sctx->wr_ctx.wr_lock);
2699 scrub_wr_submit(sctx);
2700 mutex_unlock(&sctx->wr_ctx.wr_lock);
2701
2702 wait_event(sctx->list_wait,
2703 atomic_read(&sctx->bios_in_flight) == 0);
2704 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2705 atomic_inc(&fs_info->scrubs_paused);
2706 wake_up(&fs_info->scrub_pause_wait);
2707 wait_event(sctx->list_wait,
2708 atomic_read(&sctx->workers_pending) == 0);
2709
2710 mutex_lock(&fs_info->scrub_lock);
2711 while (atomic_read(&fs_info->scrub_pause_req)) {
2712 mutex_unlock(&fs_info->scrub_lock);
2713 wait_event(fs_info->scrub_pause_wait,
2714 atomic_read(&fs_info->scrub_pause_req) == 0);
2715 mutex_lock(&fs_info->scrub_lock);
2716 }
2717 atomic_dec(&fs_info->scrubs_paused);
2718 mutex_unlock(&fs_info->scrub_lock);
2719 wake_up(&fs_info->scrub_pause_wait);
2720
2721 dev_replace->cursor_left = dev_replace->cursor_right;
2722 dev_replace->item_needs_writeback = 1;
2723 btrfs_put_block_group(cache);
2724 if (ret)
2725 break;
2726 if (is_dev_replace &&
2727 atomic64_read(&dev_replace->num_write_errors) > 0) {
2728 ret = -EIO;
2729 break;
2730 }
2731 if (sctx->stat.malloc_errors > 0) {
2732 ret = -ENOMEM;
2733 break;
2734 }
2735
2736 key.offset = found_key.offset + length;
2737 btrfs_release_path(path);
2738 }
2739
2740 btrfs_free_path(path);
2741
2742
2743
2744
2745
2746 return ret < 0 ? ret : 0;
2747}
2748
2749static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
2750 struct btrfs_device *scrub_dev)
2751{
2752 int i;
2753 u64 bytenr;
2754 u64 gen;
2755 int ret;
2756 struct btrfs_root *root = sctx->dev_root;
2757
2758 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
2759 return -EIO;
2760
2761 gen = root->fs_info->last_trans_committed;
2762
2763 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2764 bytenr = btrfs_sb_offset(i);
2765 if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
2766 break;
2767
2768 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
2769 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
2770 NULL, 1, bytenr);
2771 if (ret)
2772 return ret;
2773 }
2774 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2775
2776 return 0;
2777}
2778
2779
2780
2781
2782static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2783 int is_dev_replace)
2784{
2785 int ret = 0;
2786
2787 mutex_lock(&fs_info->scrub_lock);
2788 if (fs_info->scrub_workers_refcnt == 0) {
2789 if (is_dev_replace)
2790 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
2791 &fs_info->generic_worker);
2792 else
2793 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
2794 fs_info->thread_pool_size,
2795 &fs_info->generic_worker);
2796 fs_info->scrub_workers.idle_thresh = 4;
2797 ret = btrfs_start_workers(&fs_info->scrub_workers);
2798 if (ret)
2799 goto out;
2800 btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
2801 "scrubwrc",
2802 fs_info->thread_pool_size,
2803 &fs_info->generic_worker);
2804 fs_info->scrub_wr_completion_workers.idle_thresh = 2;
2805 ret = btrfs_start_workers(
2806 &fs_info->scrub_wr_completion_workers);
2807 if (ret)
2808 goto out;
2809 btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
2810 &fs_info->generic_worker);
2811 ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
2812 if (ret)
2813 goto out;
2814 }
2815 ++fs_info->scrub_workers_refcnt;
2816out:
2817 mutex_unlock(&fs_info->scrub_lock);
2818
2819 return ret;
2820}
2821
2822static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
2823{
2824 mutex_lock(&fs_info->scrub_lock);
2825 if (--fs_info->scrub_workers_refcnt == 0) {
2826 btrfs_stop_workers(&fs_info->scrub_workers);
2827 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
2828 btrfs_stop_workers(&fs_info->scrub_nocow_workers);
2829 }
2830 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2831 mutex_unlock(&fs_info->scrub_lock);
2832}
2833
2834int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2835 u64 end, struct btrfs_scrub_progress *progress,
2836 int readonly, int is_dev_replace)
2837{
2838 struct scrub_ctx *sctx;
2839 int ret;
2840 struct btrfs_device *dev;
2841
2842 if (btrfs_fs_closing(fs_info))
2843 return -EINVAL;
2844
2845
2846
2847
2848 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2849 printk(KERN_ERR
2850 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
2851 fs_info->chunk_root->nodesize,
2852 fs_info->chunk_root->leafsize);
2853 return -EINVAL;
2854 }
2855
2856 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
2857
2858
2859
2860
2861
2862 printk(KERN_ERR
2863 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2864 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2865 return -EINVAL;
2866 }
2867
2868 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2869
2870 printk(KERN_ERR
2871 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2872 fs_info->chunk_root->sectorsize,
2873 (unsigned long long)PAGE_SIZE);
2874 return -EINVAL;
2875 }
2876
2877 if (fs_info->chunk_root->nodesize >
2878 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
2879 fs_info->chunk_root->sectorsize >
2880 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
2881
2882
2883
2884
2885 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
2886 fs_info->chunk_root->nodesize,
2887 SCRUB_MAX_PAGES_PER_BLOCK,
2888 fs_info->chunk_root->sectorsize,
2889 SCRUB_MAX_PAGES_PER_BLOCK);
2890 return -EINVAL;
2891 }
2892
2893 ret = scrub_workers_get(fs_info, is_dev_replace);
2894 if (ret)
2895 return ret;
2896
2897 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2898 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
2899 if (!dev || (dev->missing && !is_dev_replace)) {
2900 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2901 scrub_workers_put(fs_info);
2902 return -ENODEV;
2903 }
2904 mutex_lock(&fs_info->scrub_lock);
2905
2906 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2907 mutex_unlock(&fs_info->scrub_lock);
2908 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2909 scrub_workers_put(fs_info);
2910 return -EIO;
2911 }
2912
2913 btrfs_dev_replace_lock(&fs_info->dev_replace);
2914 if (dev->scrub_device ||
2915 (!is_dev_replace &&
2916 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
2917 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2918 mutex_unlock(&fs_info->scrub_lock);
2919 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2920 scrub_workers_put(fs_info);
2921 return -EINPROGRESS;
2922 }
2923 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2924 sctx = scrub_setup_ctx(dev, is_dev_replace);
2925 if (IS_ERR(sctx)) {
2926 mutex_unlock(&fs_info->scrub_lock);
2927 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2928 scrub_workers_put(fs_info);
2929 return PTR_ERR(sctx);
2930 }
2931 sctx->readonly = readonly;
2932 dev->scrub_device = sctx;
2933
2934 atomic_inc(&fs_info->scrubs_running);
2935 mutex_unlock(&fs_info->scrub_lock);
2936 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2937
2938 if (!is_dev_replace) {
2939 down_read(&fs_info->scrub_super_lock);
2940 ret = scrub_supers(sctx, dev);
2941 up_read(&fs_info->scrub_super_lock);
2942 }
2943
2944 if (!ret)
2945 ret = scrub_enumerate_chunks(sctx, dev, start, end,
2946 is_dev_replace);
2947
2948 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2949 atomic_dec(&fs_info->scrubs_running);
2950 wake_up(&fs_info->scrub_pause_wait);
2951
2952 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
2953
2954 if (progress)
2955 memcpy(progress, &sctx->stat, sizeof(*progress));
2956
2957 mutex_lock(&fs_info->scrub_lock);
2958 dev->scrub_device = NULL;
2959 mutex_unlock(&fs_info->scrub_lock);
2960
2961 scrub_free_ctx(sctx);
2962 scrub_workers_put(fs_info);
2963
2964 return ret;
2965}
2966
2967void btrfs_scrub_pause(struct btrfs_root *root)
2968{
2969 struct btrfs_fs_info *fs_info = root->fs_info;
2970
2971 mutex_lock(&fs_info->scrub_lock);
2972 atomic_inc(&fs_info->scrub_pause_req);
2973 while (atomic_read(&fs_info->scrubs_paused) !=
2974 atomic_read(&fs_info->scrubs_running)) {
2975 mutex_unlock(&fs_info->scrub_lock);
2976 wait_event(fs_info->scrub_pause_wait,
2977 atomic_read(&fs_info->scrubs_paused) ==
2978 atomic_read(&fs_info->scrubs_running));
2979 mutex_lock(&fs_info->scrub_lock);
2980 }
2981 mutex_unlock(&fs_info->scrub_lock);
2982}
2983
2984void btrfs_scrub_continue(struct btrfs_root *root)
2985{
2986 struct btrfs_fs_info *fs_info = root->fs_info;
2987
2988 atomic_dec(&fs_info->scrub_pause_req);
2989 wake_up(&fs_info->scrub_pause_wait);
2990}
2991
2992void btrfs_scrub_pause_super(struct btrfs_root *root)
2993{
2994 down_write(&root->fs_info->scrub_super_lock);
2995}
2996
2997void btrfs_scrub_continue_super(struct btrfs_root *root)
2998{
2999 up_write(&root->fs_info->scrub_super_lock);
3000}
3001
3002int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3003{
3004 mutex_lock(&fs_info->scrub_lock);
3005 if (!atomic_read(&fs_info->scrubs_running)) {
3006 mutex_unlock(&fs_info->scrub_lock);
3007 return -ENOTCONN;
3008 }
3009
3010 atomic_inc(&fs_info->scrub_cancel_req);
3011 while (atomic_read(&fs_info->scrubs_running)) {
3012 mutex_unlock(&fs_info->scrub_lock);
3013 wait_event(fs_info->scrub_pause_wait,
3014 atomic_read(&fs_info->scrubs_running) == 0);
3015 mutex_lock(&fs_info->scrub_lock);
3016 }
3017 atomic_dec(&fs_info->scrub_cancel_req);
3018 mutex_unlock(&fs_info->scrub_lock);
3019
3020 return 0;
3021}
3022
3023int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3024 struct btrfs_device *dev)
3025{
3026 struct scrub_ctx *sctx;
3027
3028 mutex_lock(&fs_info->scrub_lock);
3029 sctx = dev->scrub_device;
3030 if (!sctx) {
3031 mutex_unlock(&fs_info->scrub_lock);
3032 return -ENOTCONN;
3033 }
3034 atomic_inc(&sctx->cancel_req);
3035 while (dev->scrub_device) {
3036 mutex_unlock(&fs_info->scrub_lock);
3037 wait_event(fs_info->scrub_pause_wait,
3038 dev->scrub_device == NULL);
3039 mutex_lock(&fs_info->scrub_lock);
3040 }
3041 mutex_unlock(&fs_info->scrub_lock);
3042
3043 return 0;
3044}
3045
3046int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3047 struct btrfs_scrub_progress *progress)
3048{
3049 struct btrfs_device *dev;
3050 struct scrub_ctx *sctx = NULL;
3051
3052 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3053 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
3054 if (dev)
3055 sctx = dev->scrub_device;
3056 if (sctx)
3057 memcpy(progress, &sctx->stat, sizeof(*progress));
3058 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3059
3060 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
3061}
3062
3063static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3064 u64 extent_logical, u64 extent_len,
3065 u64 *extent_physical,
3066 struct btrfs_device **extent_dev,
3067 int *extent_mirror_num)
3068{
3069 u64 mapped_length;
3070 struct btrfs_bio *bbio = NULL;
3071 int ret;
3072
3073 mapped_length = extent_len;
3074 ret = btrfs_map_block(fs_info, READ, extent_logical,
3075 &mapped_length, &bbio, 0);
3076 if (ret || !bbio || mapped_length < extent_len ||
3077 !bbio->stripes[0].dev->bdev) {
3078 kfree(bbio);
3079 return;
3080 }
3081
3082 *extent_physical = bbio->stripes[0].physical;
3083 *extent_mirror_num = bbio->mirror_num;
3084 *extent_dev = bbio->stripes[0].dev;
3085 kfree(bbio);
3086}
3087
3088static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
3089 struct scrub_wr_ctx *wr_ctx,
3090 struct btrfs_fs_info *fs_info,
3091 struct btrfs_device *dev,
3092 int is_dev_replace)
3093{
3094 WARN_ON(wr_ctx->wr_curr_bio != NULL);
3095
3096 mutex_init(&wr_ctx->wr_lock);
3097 wr_ctx->wr_curr_bio = NULL;
3098 if (!is_dev_replace)
3099 return 0;
3100
3101 WARN_ON(!dev->bdev);
3102 wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
3103 bio_get_nr_vecs(dev->bdev));
3104 wr_ctx->tgtdev = dev;
3105 atomic_set(&wr_ctx->flush_all_writes, 0);
3106 return 0;
3107}
3108
3109static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
3110{
3111 mutex_lock(&wr_ctx->wr_lock);
3112 kfree(wr_ctx->wr_curr_bio);
3113 wr_ctx->wr_curr_bio = NULL;
3114 mutex_unlock(&wr_ctx->wr_lock);
3115}
3116
3117static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3118 int mirror_num, u64 physical_for_dev_replace)
3119{
3120 struct scrub_copy_nocow_ctx *nocow_ctx;
3121 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3122
3123 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
3124 if (!nocow_ctx) {
3125 spin_lock(&sctx->stat_lock);
3126 sctx->stat.malloc_errors++;
3127 spin_unlock(&sctx->stat_lock);
3128 return -ENOMEM;
3129 }
3130
3131 scrub_pending_trans_workers_inc(sctx);
3132
3133 nocow_ctx->sctx = sctx;
3134 nocow_ctx->logical = logical;
3135 nocow_ctx->len = len;
3136 nocow_ctx->mirror_num = mirror_num;
3137 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3138 nocow_ctx->work.func = copy_nocow_pages_worker;
3139 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3140 &nocow_ctx->work);
3141
3142 return 0;
3143}
3144
3145static void copy_nocow_pages_worker(struct btrfs_work *work)
3146{
3147 struct scrub_copy_nocow_ctx *nocow_ctx =
3148 container_of(work, struct scrub_copy_nocow_ctx, work);
3149 struct scrub_ctx *sctx = nocow_ctx->sctx;
3150 u64 logical = nocow_ctx->logical;
3151 u64 len = nocow_ctx->len;
3152 int mirror_num = nocow_ctx->mirror_num;
3153 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3154 int ret;
3155 struct btrfs_trans_handle *trans = NULL;
3156 struct btrfs_fs_info *fs_info;
3157 struct btrfs_path *path;
3158 struct btrfs_root *root;
3159 int not_written = 0;
3160
3161 fs_info = sctx->dev_root->fs_info;
3162 root = fs_info->extent_root;
3163
3164 path = btrfs_alloc_path();
3165 if (!path) {
3166 spin_lock(&sctx->stat_lock);
3167 sctx->stat.malloc_errors++;
3168 spin_unlock(&sctx->stat_lock);
3169 not_written = 1;
3170 goto out;
3171 }
3172
3173 trans = btrfs_join_transaction(root);
3174 if (IS_ERR(trans)) {
3175 not_written = 1;
3176 goto out;
3177 }
3178
3179 ret = iterate_inodes_from_logical(logical, fs_info, path,
3180 copy_nocow_pages_for_inode,
3181 nocow_ctx);
3182 if (ret != 0 && ret != -ENOENT) {
3183 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
3184 (unsigned long long)logical,
3185 (unsigned long long)physical_for_dev_replace,
3186 (unsigned long long)len,
3187 (unsigned long long)mirror_num, ret);
3188 not_written = 1;
3189 goto out;
3190 }
3191
3192out:
3193 if (trans && !IS_ERR(trans))
3194 btrfs_end_transaction(trans, root);
3195 if (not_written)
3196 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
3197 num_uncorrectable_read_errors);
3198
3199 btrfs_free_path(path);
3200 kfree(nocow_ctx);
3201
3202 scrub_pending_trans_workers_dec(sctx);
3203}
3204
3205static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3206{
3207 unsigned long index;
3208 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3209 int ret = 0;
3210 struct btrfs_key key;
3211 struct inode *inode = NULL;
3212 struct btrfs_root *local_root;
3213 u64 physical_for_dev_replace;
3214 u64 len;
3215 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3216 int srcu_index;
3217
3218 key.objectid = root;
3219 key.type = BTRFS_ROOT_ITEM_KEY;
3220 key.offset = (u64)-1;
3221
3222 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3223
3224 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3225 if (IS_ERR(local_root)) {
3226 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3227 return PTR_ERR(local_root);
3228 }
3229
3230 key.type = BTRFS_INODE_ITEM_KEY;
3231 key.objectid = inum;
3232 key.offset = 0;
3233 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3234 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3235 if (IS_ERR(inode))
3236 return PTR_ERR(inode);
3237
3238 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3239 len = nocow_ctx->len;
3240 while (len >= PAGE_CACHE_SIZE) {
3241 struct page *page = NULL;
3242 int ret_sub;
3243
3244 index = offset >> PAGE_CACHE_SHIFT;
3245
3246 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3247 if (!page) {
3248 pr_err("find_or_create_page() failed\n");
3249 ret = -ENOMEM;
3250 goto next_page;
3251 }
3252
3253 if (PageUptodate(page)) {
3254 if (PageDirty(page))
3255 goto next_page;
3256 } else {
3257 ClearPageError(page);
3258 ret_sub = extent_read_full_page(&BTRFS_I(inode)->
3259 io_tree,
3260 page, btrfs_get_extent,
3261 nocow_ctx->mirror_num);
3262 if (ret_sub) {
3263 ret = ret_sub;
3264 goto next_page;
3265 }
3266 wait_on_page_locked(page);
3267 if (!PageUptodate(page)) {
3268 ret = -EIO;
3269 goto next_page;
3270 }
3271 }
3272 ret_sub = write_page_nocow(nocow_ctx->sctx,
3273 physical_for_dev_replace, page);
3274 if (ret_sub) {
3275 ret = ret_sub;
3276 goto next_page;
3277 }
3278
3279next_page:
3280 if (page) {
3281 unlock_page(page);
3282 put_page(page);
3283 }
3284 offset += PAGE_CACHE_SIZE;
3285 physical_for_dev_replace += PAGE_CACHE_SIZE;
3286 len -= PAGE_CACHE_SIZE;
3287 }
3288
3289 if (inode)
3290 iput(inode);
3291 return ret;
3292}
3293
3294static int write_page_nocow(struct scrub_ctx *sctx,
3295 u64 physical_for_dev_replace, struct page *page)
3296{
3297 struct bio *bio;
3298 struct btrfs_device *dev;
3299 int ret;
3300 DECLARE_COMPLETION_ONSTACK(compl);
3301
3302 dev = sctx->wr_ctx.tgtdev;
3303 if (!dev)
3304 return -EIO;
3305 if (!dev->bdev) {
3306 printk_ratelimited(KERN_WARNING
3307 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3308 return -EIO;
3309 }
3310 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
3311 if (!bio) {
3312 spin_lock(&sctx->stat_lock);
3313 sctx->stat.malloc_errors++;
3314 spin_unlock(&sctx->stat_lock);
3315 return -ENOMEM;
3316 }
3317 bio->bi_private = &compl;
3318 bio->bi_end_io = scrub_complete_bio_end_io;
3319 bio->bi_size = 0;
3320 bio->bi_sector = physical_for_dev_replace >> 9;
3321 bio->bi_bdev = dev->bdev;
3322 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3323 if (ret != PAGE_CACHE_SIZE) {
3324leave_with_eio:
3325 bio_put(bio);
3326 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
3327 return -EIO;
3328 }
3329 btrfsic_submit_bio(WRITE_SYNC, bio);
3330 wait_for_completion(&compl);
3331
3332 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
3333 goto leave_with_eio;
3334
3335 bio_put(bio);
3336 return 0;
3337}
3338