1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_page {
67 struct scrub_block *sblock;
68 struct page *page;
69 struct btrfs_device *dev;
70 u64 flags;
71 u64 generation;
72 u64 logical;
73 u64 physical;
74 u64 physical_for_dev_replace;
75 atomic_t ref_count;
76 struct {
77 unsigned int mirror_num:8;
78 unsigned int have_csum:1;
79 unsigned int io_error:1;
80 };
81 u8 csum[BTRFS_CSUM_SIZE];
82};
83
84struct scrub_bio {
85 int index;
86 struct scrub_ctx *sctx;
87 struct btrfs_device *dev;
88 struct bio *bio;
89 int err;
90 u64 logical;
91 u64 physical;
92#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
93 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
94#else
95 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
96#endif
97 int page_count;
98 int next_free;
99 struct btrfs_work work;
100};
101
102struct scrub_block {
103 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
104 int page_count;
105 atomic_t outstanding_pages;
106 atomic_t ref_count;
107 struct scrub_ctx *sctx;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113 };
114};
115
116struct scrub_wr_ctx {
117 struct scrub_bio *wr_curr_bio;
118 struct btrfs_device *tgtdev;
119 int pages_per_wr_bio;
120 atomic_t flush_all_writes;
121 struct mutex wr_lock;
122};
123
124struct scrub_ctx {
125 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
126 struct btrfs_root *dev_root;
127 int first_free;
128 int curr;
129 atomic_t bios_in_flight;
130 atomic_t workers_pending;
131 spinlock_t list_lock;
132 wait_queue_head_t list_wait;
133 u16 csum_size;
134 struct list_head csum_list;
135 atomic_t cancel_req;
136 int readonly;
137 int pages_per_rd_bio;
138 u32 sectorsize;
139 u32 nodesize;
140 u32 leafsize;
141
142 int is_dev_replace;
143 struct scrub_wr_ctx wr_ctx;
144
145
146
147
148 struct btrfs_scrub_progress stat;
149 spinlock_t stat_lock;
150};
151
152struct scrub_fixup_nodatasum {
153 struct scrub_ctx *sctx;
154 struct btrfs_device *dev;
155 u64 logical;
156 struct btrfs_root *root;
157 struct btrfs_work work;
158 int mirror_num;
159};
160
161struct scrub_copy_nocow_ctx {
162 struct scrub_ctx *sctx;
163 u64 logical;
164 u64 len;
165 int mirror_num;
166 u64 physical_for_dev_replace;
167 struct btrfs_work work;
168};
169
170struct scrub_warning {
171 struct btrfs_path *path;
172 u64 extent_item_size;
173 char *scratch_buf;
174 char *msg_buf;
175 const char *errstr;
176 sector_t sector;
177 u64 logical;
178 struct btrfs_device *dev;
179 int msg_bufsize;
180 int scratch_bufsize;
181};
182
183
184static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
185static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
186static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
187static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
188static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
189static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
190 struct btrfs_fs_info *fs_info,
191 struct scrub_block *original_sblock,
192 u64 length, u64 logical,
193 struct scrub_block *sblocks_for_recheck);
194static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
195 struct scrub_block *sblock, int is_metadata,
196 int have_csum, u8 *csum, u64 generation,
197 u16 csum_size);
198static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
199 struct scrub_block *sblock,
200 int is_metadata, int have_csum,
201 const u8 *csum, u64 generation,
202 u16 csum_size);
203static void scrub_complete_bio_end_io(struct bio *bio, int err);
204static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
205 struct scrub_block *sblock_good,
206 int force_write);
207static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
208 struct scrub_block *sblock_good,
209 int page_num, int force_write);
210static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
211static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
212 int page_num);
213static int scrub_checksum_data(struct scrub_block *sblock);
214static int scrub_checksum_tree_block(struct scrub_block *sblock);
215static int scrub_checksum_super(struct scrub_block *sblock);
216static void scrub_block_get(struct scrub_block *sblock);
217static void scrub_block_put(struct scrub_block *sblock);
218static void scrub_page_get(struct scrub_page *spage);
219static void scrub_page_put(struct scrub_page *spage);
220static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
221 struct scrub_page *spage);
222static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
223 u64 physical, struct btrfs_device *dev, u64 flags,
224 u64 gen, int mirror_num, u8 *csum, int force,
225 u64 physical_for_dev_replace);
226static void scrub_bio_end_io(struct bio *bio, int err);
227static void scrub_bio_end_io_worker(struct btrfs_work *work);
228static void scrub_block_complete(struct scrub_block *sblock);
229static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
230 u64 extent_logical, u64 extent_len,
231 u64 *extent_physical,
232 struct btrfs_device **extent_dev,
233 int *extent_mirror_num);
234static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
235 struct scrub_wr_ctx *wr_ctx,
236 struct btrfs_fs_info *fs_info,
237 struct btrfs_device *dev,
238 int is_dev_replace);
239static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
240static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
241 struct scrub_page *spage);
242static void scrub_wr_submit(struct scrub_ctx *sctx);
243static void scrub_wr_bio_end_io(struct bio *bio, int err);
244static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
245static int write_page_nocow(struct scrub_ctx *sctx,
246 u64 physical_for_dev_replace, struct page *page);
247static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
248 void *ctx);
249static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
250 int mirror_num, u64 physical_for_dev_replace);
251static void copy_nocow_pages_worker(struct btrfs_work *work);
252
253
254static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
255{
256 atomic_inc(&sctx->bios_in_flight);
257}
258
259static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
260{
261 atomic_dec(&sctx->bios_in_flight);
262 wake_up(&sctx->list_wait);
263}
264
265
266
267
268
269static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
270{
271 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
272
273
274
275
276
277
278
279
280
281
282 mutex_lock(&fs_info->scrub_lock);
283 atomic_inc(&fs_info->scrubs_running);
284 atomic_inc(&fs_info->scrubs_paused);
285 mutex_unlock(&fs_info->scrub_lock);
286 atomic_inc(&sctx->workers_pending);
287}
288
289
290static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
291{
292 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
293
294
295
296
297
298 mutex_lock(&fs_info->scrub_lock);
299 atomic_dec(&fs_info->scrubs_running);
300 atomic_dec(&fs_info->scrubs_paused);
301 mutex_unlock(&fs_info->scrub_lock);
302 atomic_dec(&sctx->workers_pending);
303 wake_up(&fs_info->scrub_pause_wait);
304 wake_up(&sctx->list_wait);
305}
306
307static void scrub_free_csums(struct scrub_ctx *sctx)
308{
309 while (!list_empty(&sctx->csum_list)) {
310 struct btrfs_ordered_sum *sum;
311 sum = list_first_entry(&sctx->csum_list,
312 struct btrfs_ordered_sum, list);
313 list_del(&sum->list);
314 kfree(sum);
315 }
316}
317
318static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
319{
320 int i;
321
322 if (!sctx)
323 return;
324
325 scrub_free_wr_ctx(&sctx->wr_ctx);
326
327
328 if (sctx->curr != -1) {
329 struct scrub_bio *sbio = sctx->bios[sctx->curr];
330
331 for (i = 0; i < sbio->page_count; i++) {
332 WARN_ON(!sbio->pagev[i]->page);
333 scrub_block_put(sbio->pagev[i]->sblock);
334 }
335 bio_put(sbio->bio);
336 }
337
338 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
339 struct scrub_bio *sbio = sctx->bios[i];
340
341 if (!sbio)
342 break;
343 kfree(sbio);
344 }
345
346 scrub_free_csums(sctx);
347 kfree(sctx);
348}
349
350static noinline_for_stack
351struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
352{
353 struct scrub_ctx *sctx;
354 int i;
355 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
356 int pages_per_rd_bio;
357 int ret;
358
359
360
361
362
363
364
365
366 if (dev->bdev)
367 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
368 bio_get_nr_vecs(dev->bdev));
369 else
370 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
371 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
372 if (!sctx)
373 goto nomem;
374 sctx->is_dev_replace = is_dev_replace;
375 sctx->pages_per_rd_bio = pages_per_rd_bio;
376 sctx->curr = -1;
377 sctx->dev_root = dev->dev_root;
378 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
379 struct scrub_bio *sbio;
380
381 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
382 if (!sbio)
383 goto nomem;
384 sctx->bios[i] = sbio;
385
386 sbio->index = i;
387 sbio->sctx = sctx;
388 sbio->page_count = 0;
389 sbio->work.func = scrub_bio_end_io_worker;
390
391 if (i != SCRUB_BIOS_PER_SCTX - 1)
392 sctx->bios[i]->next_free = i + 1;
393 else
394 sctx->bios[i]->next_free = -1;
395 }
396 sctx->first_free = 0;
397 sctx->nodesize = dev->dev_root->nodesize;
398 sctx->leafsize = dev->dev_root->leafsize;
399 sctx->sectorsize = dev->dev_root->sectorsize;
400 atomic_set(&sctx->bios_in_flight, 0);
401 atomic_set(&sctx->workers_pending, 0);
402 atomic_set(&sctx->cancel_req, 0);
403 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
404 INIT_LIST_HEAD(&sctx->csum_list);
405
406 spin_lock_init(&sctx->list_lock);
407 spin_lock_init(&sctx->stat_lock);
408 init_waitqueue_head(&sctx->list_wait);
409
410 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
411 fs_info->dev_replace.tgtdev, is_dev_replace);
412 if (ret) {
413 scrub_free_ctx(sctx);
414 return ERR_PTR(ret);
415 }
416 return sctx;
417
418nomem:
419 scrub_free_ctx(sctx);
420 return ERR_PTR(-ENOMEM);
421}
422
423static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
424 void *warn_ctx)
425{
426 u64 isize;
427 u32 nlink;
428 int ret;
429 int i;
430 struct extent_buffer *eb;
431 struct btrfs_inode_item *inode_item;
432 struct scrub_warning *swarn = warn_ctx;
433 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
434 struct inode_fs_paths *ipath = NULL;
435 struct btrfs_root *local_root;
436 struct btrfs_key root_key;
437
438 root_key.objectid = root;
439 root_key.type = BTRFS_ROOT_ITEM_KEY;
440 root_key.offset = (u64)-1;
441 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
442 if (IS_ERR(local_root)) {
443 ret = PTR_ERR(local_root);
444 goto err;
445 }
446
447 ret = inode_item_info(inum, 0, local_root, swarn->path);
448 if (ret) {
449 btrfs_release_path(swarn->path);
450 goto err;
451 }
452
453 eb = swarn->path->nodes[0];
454 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
455 struct btrfs_inode_item);
456 isize = btrfs_inode_size(eb, inode_item);
457 nlink = btrfs_inode_nlink(eb, inode_item);
458 btrfs_release_path(swarn->path);
459
460 ipath = init_ipath(4096, local_root, swarn->path);
461 if (IS_ERR(ipath)) {
462 ret = PTR_ERR(ipath);
463 ipath = NULL;
464 goto err;
465 }
466 ret = paths_from_inode(inum, ipath);
467
468 if (ret < 0)
469 goto err;
470
471
472
473
474
475 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
476 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
477 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
478 "length %llu, links %u (path: %s)\n", swarn->errstr,
479 swarn->logical, rcu_str_deref(swarn->dev->name),
480 (unsigned long long)swarn->sector, root, inum, offset,
481 min(isize - offset, (u64)PAGE_SIZE), nlink,
482 (char *)(unsigned long)ipath->fspath->val[i]);
483
484 free_ipath(ipath);
485 return 0;
486
487err:
488 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
489 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
490 "resolving failed with ret=%d\n", swarn->errstr,
491 swarn->logical, rcu_str_deref(swarn->dev->name),
492 (unsigned long long)swarn->sector, root, inum, offset, ret);
493
494 free_ipath(ipath);
495 return 0;
496}
497
498static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
499{
500 struct btrfs_device *dev;
501 struct btrfs_fs_info *fs_info;
502 struct btrfs_path *path;
503 struct btrfs_key found_key;
504 struct extent_buffer *eb;
505 struct btrfs_extent_item *ei;
506 struct scrub_warning swarn;
507 unsigned long ptr = 0;
508 u64 extent_item_pos;
509 u64 flags = 0;
510 u64 ref_root;
511 u32 item_size;
512 u8 ref_level;
513 const int bufsize = 4096;
514 int ret;
515
516 WARN_ON(sblock->page_count < 1);
517 dev = sblock->pagev[0]->dev;
518 fs_info = sblock->sctx->dev_root->fs_info;
519
520 path = btrfs_alloc_path();
521
522 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
523 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
524 swarn.sector = (sblock->pagev[0]->physical) >> 9;
525 swarn.logical = sblock->pagev[0]->logical;
526 swarn.errstr = errstr;
527 swarn.dev = NULL;
528 swarn.msg_bufsize = bufsize;
529 swarn.scratch_bufsize = bufsize;
530
531 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
532 goto out;
533
534 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
535 &flags);
536 if (ret < 0)
537 goto out;
538
539 extent_item_pos = swarn.logical - found_key.objectid;
540 swarn.extent_item_size = found_key.offset;
541
542 eb = path->nodes[0];
543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
544 item_size = btrfs_item_size_nr(eb, path->slots[0]);
545
546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
547 do {
548 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
549 &ref_root, &ref_level);
550 printk_in_rcu(KERN_WARNING
551 "btrfs: %s at logical %llu on dev %s, "
552 "sector %llu: metadata %s (level %d) in tree "
553 "%llu\n", errstr, swarn.logical,
554 rcu_str_deref(dev->name),
555 (unsigned long long)swarn.sector,
556 ref_level ? "node" : "leaf",
557 ret < 0 ? -1 : ref_level,
558 ret < 0 ? -1 : ref_root);
559 } while (ret != 1);
560 btrfs_release_path(path);
561 } else {
562 btrfs_release_path(path);
563 swarn.path = path;
564 swarn.dev = dev;
565 iterate_extent_inodes(fs_info, found_key.objectid,
566 extent_item_pos, 1,
567 scrub_print_warning_inode, &swarn);
568 }
569
570out:
571 btrfs_free_path(path);
572 kfree(swarn.scratch_buf);
573 kfree(swarn.msg_buf);
574}
575
576static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
577{
578 struct page *page = NULL;
579 unsigned long index;
580 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
581 int ret;
582 int corrected = 0;
583 struct btrfs_key key;
584 struct inode *inode = NULL;
585 struct btrfs_fs_info *fs_info;
586 u64 end = offset + PAGE_SIZE - 1;
587 struct btrfs_root *local_root;
588 int srcu_index;
589
590 key.objectid = root;
591 key.type = BTRFS_ROOT_ITEM_KEY;
592 key.offset = (u64)-1;
593
594 fs_info = fixup->root->fs_info;
595 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
596
597 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
598 if (IS_ERR(local_root)) {
599 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
600 return PTR_ERR(local_root);
601 }
602
603 key.type = BTRFS_INODE_ITEM_KEY;
604 key.objectid = inum;
605 key.offset = 0;
606 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
607 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
608 if (IS_ERR(inode))
609 return PTR_ERR(inode);
610
611 index = offset >> PAGE_CACHE_SHIFT;
612
613 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
614 if (!page) {
615 ret = -ENOMEM;
616 goto out;
617 }
618
619 if (PageUptodate(page)) {
620 if (PageDirty(page)) {
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637 ret = -EIO;
638 goto out;
639 }
640 fs_info = BTRFS_I(inode)->root->fs_info;
641 ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
642 fixup->logical, page,
643 fixup->mirror_num);
644 unlock_page(page);
645 corrected = !ret;
646 } else {
647
648
649
650
651
652 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
653 EXTENT_DAMAGED, GFP_NOFS);
654 if (ret) {
655
656 WARN_ON(ret > 0);
657 if (ret > 0)
658 ret = -EFAULT;
659 goto out;
660 }
661
662 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
663 btrfs_get_extent,
664 fixup->mirror_num);
665 wait_on_page_locked(page);
666
667 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
668 end, EXTENT_DAMAGED, 0, NULL);
669 if (!corrected)
670 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
671 EXTENT_DAMAGED, GFP_NOFS);
672 }
673
674out:
675 if (page)
676 put_page(page);
677 if (inode)
678 iput(inode);
679
680 if (ret < 0)
681 return ret;
682
683 if (ret == 0 && corrected) {
684
685
686
687
688 return 1;
689 }
690
691 return -EIO;
692}
693
694static void scrub_fixup_nodatasum(struct btrfs_work *work)
695{
696 int ret;
697 struct scrub_fixup_nodatasum *fixup;
698 struct scrub_ctx *sctx;
699 struct btrfs_trans_handle *trans = NULL;
700 struct btrfs_fs_info *fs_info;
701 struct btrfs_path *path;
702 int uncorrectable = 0;
703
704 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
705 sctx = fixup->sctx;
706 fs_info = fixup->root->fs_info;
707
708 path = btrfs_alloc_path();
709 if (!path) {
710 spin_lock(&sctx->stat_lock);
711 ++sctx->stat.malloc_errors;
712 spin_unlock(&sctx->stat_lock);
713 uncorrectable = 1;
714 goto out;
715 }
716
717 trans = btrfs_join_transaction(fixup->root);
718 if (IS_ERR(trans)) {
719 uncorrectable = 1;
720 goto out;
721 }
722
723
724
725
726
727
728
729
730
731
732 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
733 path, scrub_fixup_readpage,
734 fixup);
735 if (ret < 0) {
736 uncorrectable = 1;
737 goto out;
738 }
739 WARN_ON(ret != 1);
740
741 spin_lock(&sctx->stat_lock);
742 ++sctx->stat.corrected_errors;
743 spin_unlock(&sctx->stat_lock);
744
745out:
746 if (trans && !IS_ERR(trans))
747 btrfs_end_transaction(trans, fixup->root);
748 if (uncorrectable) {
749 spin_lock(&sctx->stat_lock);
750 ++sctx->stat.uncorrectable_errors;
751 spin_unlock(&sctx->stat_lock);
752 btrfs_dev_replace_stats_inc(
753 &sctx->dev_root->fs_info->dev_replace.
754 num_uncorrectable_read_errors);
755 printk_ratelimited_in_rcu(KERN_ERR
756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
757 (unsigned long long)fixup->logical,
758 rcu_str_deref(fixup->dev->name));
759 }
760
761 btrfs_free_path(path);
762 kfree(fixup);
763
764 scrub_pending_trans_workers_dec(sctx);
765}
766
767
768
769
770
771
772
773
774
775static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
776{
777 struct scrub_ctx *sctx = sblock_to_check->sctx;
778 struct btrfs_device *dev;
779 struct btrfs_fs_info *fs_info;
780 u64 length;
781 u64 logical;
782 u64 generation;
783 unsigned int failed_mirror_index;
784 unsigned int is_metadata;
785 unsigned int have_csum;
786 u8 *csum;
787 struct scrub_block *sblocks_for_recheck;
788 struct scrub_block *sblock_bad;
789 int ret;
790 int mirror_index;
791 int page_num;
792 int success;
793 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
794 DEFAULT_RATELIMIT_BURST);
795
796 BUG_ON(sblock_to_check->page_count < 1);
797 fs_info = sctx->dev_root->fs_info;
798 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
799
800
801
802
803
804 spin_lock(&sctx->stat_lock);
805 ++sctx->stat.super_errors;
806 spin_unlock(&sctx->stat_lock);
807 return 0;
808 }
809 length = sblock_to_check->page_count * PAGE_SIZE;
810 logical = sblock_to_check->pagev[0]->logical;
811 generation = sblock_to_check->pagev[0]->generation;
812 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
813 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
814 is_metadata = !(sblock_to_check->pagev[0]->flags &
815 BTRFS_EXTENT_FLAG_DATA);
816 have_csum = sblock_to_check->pagev[0]->have_csum;
817 csum = sblock_to_check->pagev[0]->csum;
818 dev = sblock_to_check->pagev[0]->dev;
819
820 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
821 sblocks_for_recheck = NULL;
822 goto nodatasum_case;
823 }
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
855 sizeof(*sblocks_for_recheck),
856 GFP_NOFS);
857 if (!sblocks_for_recheck) {
858 spin_lock(&sctx->stat_lock);
859 sctx->stat.malloc_errors++;
860 sctx->stat.read_errors++;
861 sctx->stat.uncorrectable_errors++;
862 spin_unlock(&sctx->stat_lock);
863 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
864 goto out;
865 }
866
867
868 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
869 logical, sblocks_for_recheck);
870 if (ret) {
871 spin_lock(&sctx->stat_lock);
872 sctx->stat.read_errors++;
873 sctx->stat.uncorrectable_errors++;
874 spin_unlock(&sctx->stat_lock);
875 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
876 goto out;
877 }
878 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
879 sblock_bad = sblocks_for_recheck + failed_mirror_index;
880
881
882 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
883 csum, generation, sctx->csum_size);
884
885 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
886 sblock_bad->no_io_error_seen) {
887
888
889
890
891
892
893
894
895 spin_lock(&sctx->stat_lock);
896 sctx->stat.unverified_errors++;
897 spin_unlock(&sctx->stat_lock);
898
899 if (sctx->is_dev_replace)
900 scrub_write_block_to_dev_replace(sblock_bad);
901 goto out;
902 }
903
904 if (!sblock_bad->no_io_error_seen) {
905 spin_lock(&sctx->stat_lock);
906 sctx->stat.read_errors++;
907 spin_unlock(&sctx->stat_lock);
908 if (__ratelimit(&_rs))
909 scrub_print_warning("i/o error", sblock_to_check);
910 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
911 } else if (sblock_bad->checksum_error) {
912 spin_lock(&sctx->stat_lock);
913 sctx->stat.csum_errors++;
914 spin_unlock(&sctx->stat_lock);
915 if (__ratelimit(&_rs))
916 scrub_print_warning("checksum error", sblock_to_check);
917 btrfs_dev_stat_inc_and_print(dev,
918 BTRFS_DEV_STAT_CORRUPTION_ERRS);
919 } else if (sblock_bad->header_error) {
920 spin_lock(&sctx->stat_lock);
921 sctx->stat.verify_errors++;
922 spin_unlock(&sctx->stat_lock);
923 if (__ratelimit(&_rs))
924 scrub_print_warning("checksum/header error",
925 sblock_to_check);
926 if (sblock_bad->generation_error)
927 btrfs_dev_stat_inc_and_print(dev,
928 BTRFS_DEV_STAT_GENERATION_ERRS);
929 else
930 btrfs_dev_stat_inc_and_print(dev,
931 BTRFS_DEV_STAT_CORRUPTION_ERRS);
932 }
933
934 if (sctx->readonly && !sctx->is_dev_replace)
935 goto did_not_correct_error;
936
937 if (!is_metadata && !have_csum) {
938 struct scrub_fixup_nodatasum *fixup_nodatasum;
939
940nodatasum_case:
941 WARN_ON(sctx->is_dev_replace);
942
943
944
945
946
947
948
949
950 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
951 if (!fixup_nodatasum)
952 goto did_not_correct_error;
953 fixup_nodatasum->sctx = sctx;
954 fixup_nodatasum->dev = dev;
955 fixup_nodatasum->logical = logical;
956 fixup_nodatasum->root = fs_info->extent_root;
957 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
958 scrub_pending_trans_workers_inc(sctx);
959 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
960 btrfs_queue_worker(&fs_info->scrub_workers,
961 &fixup_nodatasum->work);
962 goto out;
963 }
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980 for (mirror_index = 0;
981 mirror_index < BTRFS_MAX_MIRRORS &&
982 sblocks_for_recheck[mirror_index].page_count > 0;
983 mirror_index++) {
984 struct scrub_block *sblock_other;
985
986 if (mirror_index == failed_mirror_index)
987 continue;
988 sblock_other = sblocks_for_recheck + mirror_index;
989
990
991 scrub_recheck_block(fs_info, sblock_other, is_metadata,
992 have_csum, csum, generation,
993 sctx->csum_size);
994
995 if (!sblock_other->header_error &&
996 !sblock_other->checksum_error &&
997 sblock_other->no_io_error_seen) {
998 if (sctx->is_dev_replace) {
999 scrub_write_block_to_dev_replace(sblock_other);
1000 } else {
1001 int force_write = is_metadata || have_csum;
1002
1003 ret = scrub_repair_block_from_good_copy(
1004 sblock_bad, sblock_other,
1005 force_write);
1006 }
1007 if (0 == ret)
1008 goto corrected_error;
1009 }
1010 }
1011
1012
1013
1014
1015 if (sctx->is_dev_replace) {
1016 success = 1;
1017 for (page_num = 0; page_num < sblock_bad->page_count;
1018 page_num++) {
1019 int sub_success;
1020
1021 sub_success = 0;
1022 for (mirror_index = 0;
1023 mirror_index < BTRFS_MAX_MIRRORS &&
1024 sblocks_for_recheck[mirror_index].page_count > 0;
1025 mirror_index++) {
1026 struct scrub_block *sblock_other =
1027 sblocks_for_recheck + mirror_index;
1028 struct scrub_page *page_other =
1029 sblock_other->pagev[page_num];
1030
1031 if (!page_other->io_error) {
1032 ret = scrub_write_page_to_dev_replace(
1033 sblock_other, page_num);
1034 if (ret == 0) {
1035
1036 sub_success = 1;
1037 break;
1038 } else {
1039 btrfs_dev_replace_stats_inc(
1040 &sctx->dev_root->
1041 fs_info->dev_replace.
1042 num_write_errors);
1043 }
1044 }
1045 }
1046
1047 if (!sub_success) {
1048
1049
1050
1051
1052
1053
1054
1055 success = 0;
1056 ret = scrub_write_page_to_dev_replace(
1057 sblock_bad, page_num);
1058 if (ret)
1059 btrfs_dev_replace_stats_inc(
1060 &sctx->dev_root->fs_info->
1061 dev_replace.num_write_errors);
1062 }
1063 }
1064
1065 goto out;
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095 if (sblock_bad->no_io_error_seen)
1096 goto did_not_correct_error;
1097
1098 success = 1;
1099 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1100 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1101
1102 if (!page_bad->io_error)
1103 continue;
1104
1105 for (mirror_index = 0;
1106 mirror_index < BTRFS_MAX_MIRRORS &&
1107 sblocks_for_recheck[mirror_index].page_count > 0;
1108 mirror_index++) {
1109 struct scrub_block *sblock_other = sblocks_for_recheck +
1110 mirror_index;
1111 struct scrub_page *page_other = sblock_other->pagev[
1112 page_num];
1113
1114 if (!page_other->io_error) {
1115 ret = scrub_repair_page_from_good_copy(
1116 sblock_bad, sblock_other, page_num, 0);
1117 if (0 == ret) {
1118 page_bad->io_error = 0;
1119 break;
1120 }
1121 }
1122 }
1123
1124 if (page_bad->io_error) {
1125
1126 success = 0;
1127 }
1128 }
1129
1130 if (success) {
1131 if (is_metadata || have_csum) {
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141 scrub_recheck_block(fs_info, sblock_bad,
1142 is_metadata, have_csum, csum,
1143 generation, sctx->csum_size);
1144 if (!sblock_bad->header_error &&
1145 !sblock_bad->checksum_error &&
1146 sblock_bad->no_io_error_seen)
1147 goto corrected_error;
1148 else
1149 goto did_not_correct_error;
1150 } else {
1151corrected_error:
1152 spin_lock(&sctx->stat_lock);
1153 sctx->stat.corrected_errors++;
1154 spin_unlock(&sctx->stat_lock);
1155 printk_ratelimited_in_rcu(KERN_ERR
1156 "btrfs: fixed up error at logical %llu on dev %s\n",
1157 (unsigned long long)logical,
1158 rcu_str_deref(dev->name));
1159 }
1160 } else {
1161did_not_correct_error:
1162 spin_lock(&sctx->stat_lock);
1163 sctx->stat.uncorrectable_errors++;
1164 spin_unlock(&sctx->stat_lock);
1165 printk_ratelimited_in_rcu(KERN_ERR
1166 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
1167 (unsigned long long)logical,
1168 rcu_str_deref(dev->name));
1169 }
1170
1171out:
1172 if (sblocks_for_recheck) {
1173 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1174 mirror_index++) {
1175 struct scrub_block *sblock = sblocks_for_recheck +
1176 mirror_index;
1177 int page_index;
1178
1179 for (page_index = 0; page_index < sblock->page_count;
1180 page_index++) {
1181 sblock->pagev[page_index]->sblock = NULL;
1182 scrub_page_put(sblock->pagev[page_index]);
1183 }
1184 }
1185 kfree(sblocks_for_recheck);
1186 }
1187
1188 return 0;
1189}
1190
1191static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1192 struct btrfs_fs_info *fs_info,
1193 struct scrub_block *original_sblock,
1194 u64 length, u64 logical,
1195 struct scrub_block *sblocks_for_recheck)
1196{
1197 int page_index;
1198 int mirror_index;
1199 int ret;
1200
1201
1202
1203
1204
1205
1206
1207 page_index = 0;
1208 while (length > 0) {
1209 u64 sublen = min_t(u64, length, PAGE_SIZE);
1210 u64 mapped_length = sublen;
1211 struct btrfs_bio *bbio = NULL;
1212
1213
1214
1215
1216
1217 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
1218 &mapped_length, &bbio, 0);
1219 if (ret || !bbio || mapped_length < sublen) {
1220 kfree(bbio);
1221 return -EIO;
1222 }
1223
1224 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1225 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
1226 mirror_index++) {
1227 struct scrub_block *sblock;
1228 struct scrub_page *page;
1229
1230 if (mirror_index >= BTRFS_MAX_MIRRORS)
1231 continue;
1232
1233 sblock = sblocks_for_recheck + mirror_index;
1234 sblock->sctx = sctx;
1235 page = kzalloc(sizeof(*page), GFP_NOFS);
1236 if (!page) {
1237leave_nomem:
1238 spin_lock(&sctx->stat_lock);
1239 sctx->stat.malloc_errors++;
1240 spin_unlock(&sctx->stat_lock);
1241 kfree(bbio);
1242 return -ENOMEM;
1243 }
1244 scrub_page_get(page);
1245 sblock->pagev[page_index] = page;
1246 page->logical = logical;
1247 page->physical = bbio->stripes[mirror_index].physical;
1248 BUG_ON(page_index >= original_sblock->page_count);
1249 page->physical_for_dev_replace =
1250 original_sblock->pagev[page_index]->
1251 physical_for_dev_replace;
1252
1253 page->dev = bbio->stripes[mirror_index].dev;
1254 page->mirror_num = mirror_index + 1;
1255 sblock->page_count++;
1256 page->page = alloc_page(GFP_NOFS);
1257 if (!page->page)
1258 goto leave_nomem;
1259 }
1260 kfree(bbio);
1261 length -= sublen;
1262 logical += sublen;
1263 page_index++;
1264 }
1265
1266 return 0;
1267}
1268
1269
1270
1271
1272
1273
1274
1275
1276static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1277 struct scrub_block *sblock, int is_metadata,
1278 int have_csum, u8 *csum, u64 generation,
1279 u16 csum_size)
1280{
1281 int page_num;
1282
1283 sblock->no_io_error_seen = 1;
1284 sblock->header_error = 0;
1285 sblock->checksum_error = 0;
1286
1287 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1288 struct bio *bio;
1289 struct scrub_page *page = sblock->pagev[page_num];
1290 DECLARE_COMPLETION_ONSTACK(complete);
1291
1292 if (page->dev->bdev == NULL) {
1293 page->io_error = 1;
1294 sblock->no_io_error_seen = 0;
1295 continue;
1296 }
1297
1298 WARN_ON(!page->page);
1299 bio = bio_alloc(GFP_NOFS, 1);
1300 if (!bio) {
1301 page->io_error = 1;
1302 sblock->no_io_error_seen = 0;
1303 continue;
1304 }
1305 bio->bi_bdev = page->dev->bdev;
1306 bio->bi_sector = page->physical >> 9;
1307 bio->bi_end_io = scrub_complete_bio_end_io;
1308 bio->bi_private = &complete;
1309
1310 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1311 btrfsic_submit_bio(READ, bio);
1312
1313
1314 wait_for_completion(&complete);
1315
1316 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
1317 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1318 sblock->no_io_error_seen = 0;
1319 bio_put(bio);
1320 }
1321
1322 if (sblock->no_io_error_seen)
1323 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1324 have_csum, csum, generation,
1325 csum_size);
1326
1327 return;
1328}
1329
1330static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1331 struct scrub_block *sblock,
1332 int is_metadata, int have_csum,
1333 const u8 *csum, u64 generation,
1334 u16 csum_size)
1335{
1336 int page_num;
1337 u8 calculated_csum[BTRFS_CSUM_SIZE];
1338 u32 crc = ~(u32)0;
1339 struct btrfs_root *root = fs_info->extent_root;
1340 void *mapped_buffer;
1341
1342 WARN_ON(!sblock->pagev[0]->page);
1343 if (is_metadata) {
1344 struct btrfs_header *h;
1345
1346 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1347 h = (struct btrfs_header *)mapped_buffer;
1348
1349 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
1350 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1351 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1352 BTRFS_UUID_SIZE)) {
1353 sblock->header_error = 1;
1354 } else if (generation != le64_to_cpu(h->generation)) {
1355 sblock->header_error = 1;
1356 sblock->generation_error = 1;
1357 }
1358 csum = h->csum;
1359 } else {
1360 if (!have_csum)
1361 return;
1362
1363 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1364 }
1365
1366 for (page_num = 0;;) {
1367 if (page_num == 0 && is_metadata)
1368 crc = btrfs_csum_data(root,
1369 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1370 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1371 else
1372 crc = btrfs_csum_data(root, mapped_buffer, crc,
1373 PAGE_SIZE);
1374
1375 kunmap_atomic(mapped_buffer);
1376 page_num++;
1377 if (page_num >= sblock->page_count)
1378 break;
1379 WARN_ON(!sblock->pagev[page_num]->page);
1380
1381 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1382 }
1383
1384 btrfs_csum_final(crc, calculated_csum);
1385 if (memcmp(calculated_csum, csum, csum_size))
1386 sblock->checksum_error = 1;
1387}
1388
1389static void scrub_complete_bio_end_io(struct bio *bio, int err)
1390{
1391 complete((struct completion *)bio->bi_private);
1392}
1393
1394static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1395 struct scrub_block *sblock_good,
1396 int force_write)
1397{
1398 int page_num;
1399 int ret = 0;
1400
1401 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1402 int ret_sub;
1403
1404 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1405 sblock_good,
1406 page_num,
1407 force_write);
1408 if (ret_sub)
1409 ret = ret_sub;
1410 }
1411
1412 return ret;
1413}
1414
1415static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1416 struct scrub_block *sblock_good,
1417 int page_num, int force_write)
1418{
1419 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1420 struct scrub_page *page_good = sblock_good->pagev[page_num];
1421
1422 BUG_ON(page_bad->page == NULL);
1423 BUG_ON(page_good->page == NULL);
1424 if (force_write || sblock_bad->header_error ||
1425 sblock_bad->checksum_error || page_bad->io_error) {
1426 struct bio *bio;
1427 int ret;
1428 DECLARE_COMPLETION_ONSTACK(complete);
1429
1430 if (!page_bad->dev->bdev) {
1431 printk_ratelimited(KERN_WARNING
1432 "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
1433 return -EIO;
1434 }
1435
1436 bio = bio_alloc(GFP_NOFS, 1);
1437 if (!bio)
1438 return -EIO;
1439 bio->bi_bdev = page_bad->dev->bdev;
1440 bio->bi_sector = page_bad->physical >> 9;
1441 bio->bi_end_io = scrub_complete_bio_end_io;
1442 bio->bi_private = &complete;
1443
1444 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1445 if (PAGE_SIZE != ret) {
1446 bio_put(bio);
1447 return -EIO;
1448 }
1449 btrfsic_submit_bio(WRITE, bio);
1450
1451
1452 wait_for_completion(&complete);
1453 if (!bio_flagged(bio, BIO_UPTODATE)) {
1454 btrfs_dev_stat_inc_and_print(page_bad->dev,
1455 BTRFS_DEV_STAT_WRITE_ERRS);
1456 btrfs_dev_replace_stats_inc(
1457 &sblock_bad->sctx->dev_root->fs_info->
1458 dev_replace.num_write_errors);
1459 bio_put(bio);
1460 return -EIO;
1461 }
1462 bio_put(bio);
1463 }
1464
1465 return 0;
1466}
1467
1468static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1469{
1470 int page_num;
1471
1472 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1473 int ret;
1474
1475 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1476 if (ret)
1477 btrfs_dev_replace_stats_inc(
1478 &sblock->sctx->dev_root->fs_info->dev_replace.
1479 num_write_errors);
1480 }
1481}
1482
1483static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1484 int page_num)
1485{
1486 struct scrub_page *spage = sblock->pagev[page_num];
1487
1488 BUG_ON(spage->page == NULL);
1489 if (spage->io_error) {
1490 void *mapped_buffer = kmap_atomic(spage->page);
1491
1492 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
1493 flush_dcache_page(spage->page);
1494 kunmap_atomic(mapped_buffer);
1495 }
1496 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1497}
1498
1499static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1500 struct scrub_page *spage)
1501{
1502 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1503 struct scrub_bio *sbio;
1504 int ret;
1505
1506 mutex_lock(&wr_ctx->wr_lock);
1507again:
1508 if (!wr_ctx->wr_curr_bio) {
1509 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1510 GFP_NOFS);
1511 if (!wr_ctx->wr_curr_bio) {
1512 mutex_unlock(&wr_ctx->wr_lock);
1513 return -ENOMEM;
1514 }
1515 wr_ctx->wr_curr_bio->sctx = sctx;
1516 wr_ctx->wr_curr_bio->page_count = 0;
1517 }
1518 sbio = wr_ctx->wr_curr_bio;
1519 if (sbio->page_count == 0) {
1520 struct bio *bio;
1521
1522 sbio->physical = spage->physical_for_dev_replace;
1523 sbio->logical = spage->logical;
1524 sbio->dev = wr_ctx->tgtdev;
1525 bio = sbio->bio;
1526 if (!bio) {
1527 bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1528 if (!bio) {
1529 mutex_unlock(&wr_ctx->wr_lock);
1530 return -ENOMEM;
1531 }
1532 sbio->bio = bio;
1533 }
1534
1535 bio->bi_private = sbio;
1536 bio->bi_end_io = scrub_wr_bio_end_io;
1537 bio->bi_bdev = sbio->dev->bdev;
1538 bio->bi_sector = sbio->physical >> 9;
1539 sbio->err = 0;
1540 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1541 spage->physical_for_dev_replace ||
1542 sbio->logical + sbio->page_count * PAGE_SIZE !=
1543 spage->logical) {
1544 scrub_wr_submit(sctx);
1545 goto again;
1546 }
1547
1548 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1549 if (ret != PAGE_SIZE) {
1550 if (sbio->page_count < 1) {
1551 bio_put(sbio->bio);
1552 sbio->bio = NULL;
1553 mutex_unlock(&wr_ctx->wr_lock);
1554 return -EIO;
1555 }
1556 scrub_wr_submit(sctx);
1557 goto again;
1558 }
1559
1560 sbio->pagev[sbio->page_count] = spage;
1561 scrub_page_get(spage);
1562 sbio->page_count++;
1563 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1564 scrub_wr_submit(sctx);
1565 mutex_unlock(&wr_ctx->wr_lock);
1566
1567 return 0;
1568}
1569
1570static void scrub_wr_submit(struct scrub_ctx *sctx)
1571{
1572 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1573 struct scrub_bio *sbio;
1574
1575 if (!wr_ctx->wr_curr_bio)
1576 return;
1577
1578 sbio = wr_ctx->wr_curr_bio;
1579 wr_ctx->wr_curr_bio = NULL;
1580 WARN_ON(!sbio->bio->bi_bdev);
1581 scrub_pending_bio_inc(sctx);
1582
1583
1584
1585
1586 btrfsic_submit_bio(WRITE, sbio->bio);
1587}
1588
1589static void scrub_wr_bio_end_io(struct bio *bio, int err)
1590{
1591 struct scrub_bio *sbio = bio->bi_private;
1592 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1593
1594 sbio->err = err;
1595 sbio->bio = bio;
1596
1597 sbio->work.func = scrub_wr_bio_end_io_worker;
1598 btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
1599}
1600
1601static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1602{
1603 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1604 struct scrub_ctx *sctx = sbio->sctx;
1605 int i;
1606
1607 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1608 if (sbio->err) {
1609 struct btrfs_dev_replace *dev_replace =
1610 &sbio->sctx->dev_root->fs_info->dev_replace;
1611
1612 for (i = 0; i < sbio->page_count; i++) {
1613 struct scrub_page *spage = sbio->pagev[i];
1614
1615 spage->io_error = 1;
1616 btrfs_dev_replace_stats_inc(&dev_replace->
1617 num_write_errors);
1618 }
1619 }
1620
1621 for (i = 0; i < sbio->page_count; i++)
1622 scrub_page_put(sbio->pagev[i]);
1623
1624 bio_put(sbio->bio);
1625 kfree(sbio);
1626 scrub_pending_bio_dec(sctx);
1627}
1628
1629static int scrub_checksum(struct scrub_block *sblock)
1630{
1631 u64 flags;
1632 int ret;
1633
1634 WARN_ON(sblock->page_count < 1);
1635 flags = sblock->pagev[0]->flags;
1636 ret = 0;
1637 if (flags & BTRFS_EXTENT_FLAG_DATA)
1638 ret = scrub_checksum_data(sblock);
1639 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1640 ret = scrub_checksum_tree_block(sblock);
1641 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1642 (void)scrub_checksum_super(sblock);
1643 else
1644 WARN_ON(1);
1645 if (ret)
1646 scrub_handle_errored_block(sblock);
1647
1648 return ret;
1649}
1650
1651static int scrub_checksum_data(struct scrub_block *sblock)
1652{
1653 struct scrub_ctx *sctx = sblock->sctx;
1654 u8 csum[BTRFS_CSUM_SIZE];
1655 u8 *on_disk_csum;
1656 struct page *page;
1657 void *buffer;
1658 u32 crc = ~(u32)0;
1659 int fail = 0;
1660 struct btrfs_root *root = sctx->dev_root;
1661 u64 len;
1662 int index;
1663
1664 BUG_ON(sblock->page_count < 1);
1665 if (!sblock->pagev[0]->have_csum)
1666 return 0;
1667
1668 on_disk_csum = sblock->pagev[0]->csum;
1669 page = sblock->pagev[0]->page;
1670 buffer = kmap_atomic(page);
1671
1672 len = sctx->sectorsize;
1673 index = 0;
1674 for (;;) {
1675 u64 l = min_t(u64, len, PAGE_SIZE);
1676
1677 crc = btrfs_csum_data(root, buffer, crc, l);
1678 kunmap_atomic(buffer);
1679 len -= l;
1680 if (len == 0)
1681 break;
1682 index++;
1683 BUG_ON(index >= sblock->page_count);
1684 BUG_ON(!sblock->pagev[index]->page);
1685 page = sblock->pagev[index]->page;
1686 buffer = kmap_atomic(page);
1687 }
1688
1689 btrfs_csum_final(crc, csum);
1690 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1691 fail = 1;
1692
1693 return fail;
1694}
1695
1696static int scrub_checksum_tree_block(struct scrub_block *sblock)
1697{
1698 struct scrub_ctx *sctx = sblock->sctx;
1699 struct btrfs_header *h;
1700 struct btrfs_root *root = sctx->dev_root;
1701 struct btrfs_fs_info *fs_info = root->fs_info;
1702 u8 calculated_csum[BTRFS_CSUM_SIZE];
1703 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1704 struct page *page;
1705 void *mapped_buffer;
1706 u64 mapped_size;
1707 void *p;
1708 u32 crc = ~(u32)0;
1709 int fail = 0;
1710 int crc_fail = 0;
1711 u64 len;
1712 int index;
1713
1714 BUG_ON(sblock->page_count < 1);
1715 page = sblock->pagev[0]->page;
1716 mapped_buffer = kmap_atomic(page);
1717 h = (struct btrfs_header *)mapped_buffer;
1718 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1719
1720
1721
1722
1723
1724
1725
1726 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
1727 ++fail;
1728
1729 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
1730 ++fail;
1731
1732 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1733 ++fail;
1734
1735 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1736 BTRFS_UUID_SIZE))
1737 ++fail;
1738
1739 WARN_ON(sctx->nodesize != sctx->leafsize);
1740 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1741 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1742 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1743 index = 0;
1744 for (;;) {
1745 u64 l = min_t(u64, len, mapped_size);
1746
1747 crc = btrfs_csum_data(root, p, crc, l);
1748 kunmap_atomic(mapped_buffer);
1749 len -= l;
1750 if (len == 0)
1751 break;
1752 index++;
1753 BUG_ON(index >= sblock->page_count);
1754 BUG_ON(!sblock->pagev[index]->page);
1755 page = sblock->pagev[index]->page;
1756 mapped_buffer = kmap_atomic(page);
1757 mapped_size = PAGE_SIZE;
1758 p = mapped_buffer;
1759 }
1760
1761 btrfs_csum_final(crc, calculated_csum);
1762 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1763 ++crc_fail;
1764
1765 return fail || crc_fail;
1766}
1767
1768static int scrub_checksum_super(struct scrub_block *sblock)
1769{
1770 struct btrfs_super_block *s;
1771 struct scrub_ctx *sctx = sblock->sctx;
1772 struct btrfs_root *root = sctx->dev_root;
1773 struct btrfs_fs_info *fs_info = root->fs_info;
1774 u8 calculated_csum[BTRFS_CSUM_SIZE];
1775 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1776 struct page *page;
1777 void *mapped_buffer;
1778 u64 mapped_size;
1779 void *p;
1780 u32 crc = ~(u32)0;
1781 int fail_gen = 0;
1782 int fail_cor = 0;
1783 u64 len;
1784 int index;
1785
1786 BUG_ON(sblock->page_count < 1);
1787 page = sblock->pagev[0]->page;
1788 mapped_buffer = kmap_atomic(page);
1789 s = (struct btrfs_super_block *)mapped_buffer;
1790 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1791
1792 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
1793 ++fail_cor;
1794
1795 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
1796 ++fail_gen;
1797
1798 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1799 ++fail_cor;
1800
1801 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1802 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1803 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1804 index = 0;
1805 for (;;) {
1806 u64 l = min_t(u64, len, mapped_size);
1807
1808 crc = btrfs_csum_data(root, p, crc, l);
1809 kunmap_atomic(mapped_buffer);
1810 len -= l;
1811 if (len == 0)
1812 break;
1813 index++;
1814 BUG_ON(index >= sblock->page_count);
1815 BUG_ON(!sblock->pagev[index]->page);
1816 page = sblock->pagev[index]->page;
1817 mapped_buffer = kmap_atomic(page);
1818 mapped_size = PAGE_SIZE;
1819 p = mapped_buffer;
1820 }
1821
1822 btrfs_csum_final(crc, calculated_csum);
1823 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1824 ++fail_cor;
1825
1826 if (fail_cor + fail_gen) {
1827
1828
1829
1830
1831
1832 spin_lock(&sctx->stat_lock);
1833 ++sctx->stat.super_errors;
1834 spin_unlock(&sctx->stat_lock);
1835 if (fail_cor)
1836 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1837 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1838 else
1839 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1840 BTRFS_DEV_STAT_GENERATION_ERRS);
1841 }
1842
1843 return fail_cor + fail_gen;
1844}
1845
1846static void scrub_block_get(struct scrub_block *sblock)
1847{
1848 atomic_inc(&sblock->ref_count);
1849}
1850
1851static void scrub_block_put(struct scrub_block *sblock)
1852{
1853 if (atomic_dec_and_test(&sblock->ref_count)) {
1854 int i;
1855
1856 for (i = 0; i < sblock->page_count; i++)
1857 scrub_page_put(sblock->pagev[i]);
1858 kfree(sblock);
1859 }
1860}
1861
1862static void scrub_page_get(struct scrub_page *spage)
1863{
1864 atomic_inc(&spage->ref_count);
1865}
1866
1867static void scrub_page_put(struct scrub_page *spage)
1868{
1869 if (atomic_dec_and_test(&spage->ref_count)) {
1870 if (spage->page)
1871 __free_page(spage->page);
1872 kfree(spage);
1873 }
1874}
1875
1876static void scrub_submit(struct scrub_ctx *sctx)
1877{
1878 struct scrub_bio *sbio;
1879
1880 if (sctx->curr == -1)
1881 return;
1882
1883 sbio = sctx->bios[sctx->curr];
1884 sctx->curr = -1;
1885 scrub_pending_bio_inc(sctx);
1886
1887 if (!sbio->bio->bi_bdev) {
1888
1889
1890
1891
1892
1893
1894
1895 printk_ratelimited(KERN_WARNING
1896 "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
1897 bio_endio(sbio->bio, -EIO);
1898 } else {
1899 btrfsic_submit_bio(READ, sbio->bio);
1900 }
1901}
1902
1903static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
1904 struct scrub_page *spage)
1905{
1906 struct scrub_block *sblock = spage->sblock;
1907 struct scrub_bio *sbio;
1908 int ret;
1909
1910again:
1911
1912
1913
1914 while (sctx->curr == -1) {
1915 spin_lock(&sctx->list_lock);
1916 sctx->curr = sctx->first_free;
1917 if (sctx->curr != -1) {
1918 sctx->first_free = sctx->bios[sctx->curr]->next_free;
1919 sctx->bios[sctx->curr]->next_free = -1;
1920 sctx->bios[sctx->curr]->page_count = 0;
1921 spin_unlock(&sctx->list_lock);
1922 } else {
1923 spin_unlock(&sctx->list_lock);
1924 wait_event(sctx->list_wait, sctx->first_free != -1);
1925 }
1926 }
1927 sbio = sctx->bios[sctx->curr];
1928 if (sbio->page_count == 0) {
1929 struct bio *bio;
1930
1931 sbio->physical = spage->physical;
1932 sbio->logical = spage->logical;
1933 sbio->dev = spage->dev;
1934 bio = sbio->bio;
1935 if (!bio) {
1936 bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1937 if (!bio)
1938 return -ENOMEM;
1939 sbio->bio = bio;
1940 }
1941
1942 bio->bi_private = sbio;
1943 bio->bi_end_io = scrub_bio_end_io;
1944 bio->bi_bdev = sbio->dev->bdev;
1945 bio->bi_sector = sbio->physical >> 9;
1946 sbio->err = 0;
1947 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1948 spage->physical ||
1949 sbio->logical + sbio->page_count * PAGE_SIZE !=
1950 spage->logical ||
1951 sbio->dev != spage->dev) {
1952 scrub_submit(sctx);
1953 goto again;
1954 }
1955
1956 sbio->pagev[sbio->page_count] = spage;
1957 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1958 if (ret != PAGE_SIZE) {
1959 if (sbio->page_count < 1) {
1960 bio_put(sbio->bio);
1961 sbio->bio = NULL;
1962 return -EIO;
1963 }
1964 scrub_submit(sctx);
1965 goto again;
1966 }
1967
1968 scrub_block_get(sblock);
1969 atomic_inc(&sblock->outstanding_pages);
1970 sbio->page_count++;
1971 if (sbio->page_count == sctx->pages_per_rd_bio)
1972 scrub_submit(sctx);
1973
1974 return 0;
1975}
1976
1977static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1978 u64 physical, struct btrfs_device *dev, u64 flags,
1979 u64 gen, int mirror_num, u8 *csum, int force,
1980 u64 physical_for_dev_replace)
1981{
1982 struct scrub_block *sblock;
1983 int index;
1984
1985 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1986 if (!sblock) {
1987 spin_lock(&sctx->stat_lock);
1988 sctx->stat.malloc_errors++;
1989 spin_unlock(&sctx->stat_lock);
1990 return -ENOMEM;
1991 }
1992
1993
1994
1995 atomic_set(&sblock->ref_count, 1);
1996 sblock->sctx = sctx;
1997 sblock->no_io_error_seen = 1;
1998
1999 for (index = 0; len > 0; index++) {
2000 struct scrub_page *spage;
2001 u64 l = min_t(u64, len, PAGE_SIZE);
2002
2003 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2004 if (!spage) {
2005leave_nomem:
2006 spin_lock(&sctx->stat_lock);
2007 sctx->stat.malloc_errors++;
2008 spin_unlock(&sctx->stat_lock);
2009 scrub_block_put(sblock);
2010 return -ENOMEM;
2011 }
2012 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2013 scrub_page_get(spage);
2014 sblock->pagev[index] = spage;
2015 spage->sblock = sblock;
2016 spage->dev = dev;
2017 spage->flags = flags;
2018 spage->generation = gen;
2019 spage->logical = logical;
2020 spage->physical = physical;
2021 spage->physical_for_dev_replace = physical_for_dev_replace;
2022 spage->mirror_num = mirror_num;
2023 if (csum) {
2024 spage->have_csum = 1;
2025 memcpy(spage->csum, csum, sctx->csum_size);
2026 } else {
2027 spage->have_csum = 0;
2028 }
2029 sblock->page_count++;
2030 spage->page = alloc_page(GFP_NOFS);
2031 if (!spage->page)
2032 goto leave_nomem;
2033 len -= l;
2034 logical += l;
2035 physical += l;
2036 physical_for_dev_replace += l;
2037 }
2038
2039 WARN_ON(sblock->page_count == 0);
2040 for (index = 0; index < sblock->page_count; index++) {
2041 struct scrub_page *spage = sblock->pagev[index];
2042 int ret;
2043
2044 ret = scrub_add_page_to_rd_bio(sctx, spage);
2045 if (ret) {
2046 scrub_block_put(sblock);
2047 return ret;
2048 }
2049 }
2050
2051 if (force)
2052 scrub_submit(sctx);
2053
2054
2055 scrub_block_put(sblock);
2056 return 0;
2057}
2058
2059static void scrub_bio_end_io(struct bio *bio, int err)
2060{
2061 struct scrub_bio *sbio = bio->bi_private;
2062 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2063
2064 sbio->err = err;
2065 sbio->bio = bio;
2066
2067 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
2068}
2069
2070static void scrub_bio_end_io_worker(struct btrfs_work *work)
2071{
2072 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2073 struct scrub_ctx *sctx = sbio->sctx;
2074 int i;
2075
2076 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2077 if (sbio->err) {
2078 for (i = 0; i < sbio->page_count; i++) {
2079 struct scrub_page *spage = sbio->pagev[i];
2080
2081 spage->io_error = 1;
2082 spage->sblock->no_io_error_seen = 0;
2083 }
2084 }
2085
2086
2087 for (i = 0; i < sbio->page_count; i++) {
2088 struct scrub_page *spage = sbio->pagev[i];
2089 struct scrub_block *sblock = spage->sblock;
2090
2091 if (atomic_dec_and_test(&sblock->outstanding_pages))
2092 scrub_block_complete(sblock);
2093 scrub_block_put(sblock);
2094 }
2095
2096 bio_put(sbio->bio);
2097 sbio->bio = NULL;
2098 spin_lock(&sctx->list_lock);
2099 sbio->next_free = sctx->first_free;
2100 sctx->first_free = sbio->index;
2101 spin_unlock(&sctx->list_lock);
2102
2103 if (sctx->is_dev_replace &&
2104 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2105 mutex_lock(&sctx->wr_ctx.wr_lock);
2106 scrub_wr_submit(sctx);
2107 mutex_unlock(&sctx->wr_ctx.wr_lock);
2108 }
2109
2110 scrub_pending_bio_dec(sctx);
2111}
2112
2113static void scrub_block_complete(struct scrub_block *sblock)
2114{
2115 if (!sblock->no_io_error_seen) {
2116 scrub_handle_errored_block(sblock);
2117 } else {
2118
2119
2120
2121
2122
2123 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
2124 scrub_write_block_to_dev_replace(sblock);
2125 }
2126}
2127
2128static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
2129 u8 *csum)
2130{
2131 struct btrfs_ordered_sum *sum = NULL;
2132 int ret = 0;
2133 unsigned long i;
2134 unsigned long num_sectors;
2135
2136 while (!list_empty(&sctx->csum_list)) {
2137 sum = list_first_entry(&sctx->csum_list,
2138 struct btrfs_ordered_sum, list);
2139 if (sum->bytenr > logical)
2140 return 0;
2141 if (sum->bytenr + sum->len > logical)
2142 break;
2143
2144 ++sctx->stat.csum_discards;
2145 list_del(&sum->list);
2146 kfree(sum);
2147 sum = NULL;
2148 }
2149 if (!sum)
2150 return 0;
2151
2152 num_sectors = sum->len / sctx->sectorsize;
2153 for (i = 0; i < num_sectors; ++i) {
2154 if (sum->sums[i].bytenr == logical) {
2155 memcpy(csum, &sum->sums[i].sum, sctx->csum_size);
2156 ret = 1;
2157 break;
2158 }
2159 }
2160 if (ret && i == num_sectors - 1) {
2161 list_del(&sum->list);
2162 kfree(sum);
2163 }
2164 return ret;
2165}
2166
2167
2168static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2169 u64 physical, struct btrfs_device *dev, u64 flags,
2170 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2171{
2172 int ret;
2173 u8 csum[BTRFS_CSUM_SIZE];
2174 u32 blocksize;
2175
2176 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2177 blocksize = sctx->sectorsize;
2178 spin_lock(&sctx->stat_lock);
2179 sctx->stat.data_extents_scrubbed++;
2180 sctx->stat.data_bytes_scrubbed += len;
2181 spin_unlock(&sctx->stat_lock);
2182 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2183 WARN_ON(sctx->nodesize != sctx->leafsize);
2184 blocksize = sctx->nodesize;
2185 spin_lock(&sctx->stat_lock);
2186 sctx->stat.tree_extents_scrubbed++;
2187 sctx->stat.tree_bytes_scrubbed += len;
2188 spin_unlock(&sctx->stat_lock);
2189 } else {
2190 blocksize = sctx->sectorsize;
2191 WARN_ON(1);
2192 }
2193
2194 while (len) {
2195 u64 l = min_t(u64, len, blocksize);
2196 int have_csum = 0;
2197
2198 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2199
2200 have_csum = scrub_find_csum(sctx, logical, l, csum);
2201 if (have_csum == 0)
2202 ++sctx->stat.no_csum;
2203 if (sctx->is_dev_replace && !have_csum) {
2204 ret = copy_nocow_pages(sctx, logical, l,
2205 mirror_num,
2206 physical_for_dev_replace);
2207 goto behind_scrub_pages;
2208 }
2209 }
2210 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2211 mirror_num, have_csum ? csum : NULL, 0,
2212 physical_for_dev_replace);
2213behind_scrub_pages:
2214 if (ret)
2215 return ret;
2216 len -= l;
2217 logical += l;
2218 physical += l;
2219 physical_for_dev_replace += l;
2220 }
2221 return 0;
2222}
2223
2224static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2225 struct map_lookup *map,
2226 struct btrfs_device *scrub_dev,
2227 int num, u64 base, u64 length,
2228 int is_dev_replace)
2229{
2230 struct btrfs_path *path;
2231 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2232 struct btrfs_root *root = fs_info->extent_root;
2233 struct btrfs_root *csum_root = fs_info->csum_root;
2234 struct btrfs_extent_item *extent;
2235 struct blk_plug plug;
2236 u64 flags;
2237 int ret;
2238 int slot;
2239 int i;
2240 u64 nstripes;
2241 struct extent_buffer *l;
2242 struct btrfs_key key;
2243 u64 physical;
2244 u64 logical;
2245 u64 generation;
2246 int mirror_num;
2247 struct reada_control *reada1;
2248 struct reada_control *reada2;
2249 struct btrfs_key key_start;
2250 struct btrfs_key key_end;
2251 u64 increment = map->stripe_len;
2252 u64 offset;
2253 u64 extent_logical;
2254 u64 extent_physical;
2255 u64 extent_len;
2256 struct btrfs_device *extent_dev;
2257 int extent_mirror_num;
2258
2259 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2260 BTRFS_BLOCK_GROUP_RAID6)) {
2261 if (num >= nr_data_stripes(map)) {
2262 return 0;
2263 }
2264 }
2265
2266 nstripes = length;
2267 offset = 0;
2268 do_div(nstripes, map->stripe_len);
2269 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
2270 offset = map->stripe_len * num;
2271 increment = map->stripe_len * map->num_stripes;
2272 mirror_num = 1;
2273 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2274 int factor = map->num_stripes / map->sub_stripes;
2275 offset = map->stripe_len * (num / map->sub_stripes);
2276 increment = map->stripe_len * factor;
2277 mirror_num = num % map->sub_stripes + 1;
2278 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2279 increment = map->stripe_len;
2280 mirror_num = num % map->num_stripes + 1;
2281 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2282 increment = map->stripe_len;
2283 mirror_num = num % map->num_stripes + 1;
2284 } else {
2285 increment = map->stripe_len;
2286 mirror_num = 1;
2287 }
2288
2289 path = btrfs_alloc_path();
2290 if (!path)
2291 return -ENOMEM;
2292
2293
2294
2295
2296
2297
2298 path->search_commit_root = 1;
2299 path->skip_locking = 1;
2300
2301
2302
2303
2304
2305
2306 logical = base + offset;
2307
2308 wait_event(sctx->list_wait,
2309 atomic_read(&sctx->bios_in_flight) == 0);
2310 atomic_inc(&fs_info->scrubs_paused);
2311 wake_up(&fs_info->scrub_pause_wait);
2312
2313
2314 key_start.objectid = logical;
2315 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2316 key_start.offset = (u64)0;
2317 key_end.objectid = base + offset + nstripes * increment;
2318 key_end.type = BTRFS_EXTENT_ITEM_KEY;
2319 key_end.offset = (u64)0;
2320 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2321
2322 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2323 key_start.type = BTRFS_EXTENT_CSUM_KEY;
2324 key_start.offset = logical;
2325 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2326 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2327 key_end.offset = base + offset + nstripes * increment;
2328 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2329
2330 if (!IS_ERR(reada1))
2331 btrfs_reada_wait(reada1);
2332 if (!IS_ERR(reada2))
2333 btrfs_reada_wait(reada2);
2334
2335 mutex_lock(&fs_info->scrub_lock);
2336 while (atomic_read(&fs_info->scrub_pause_req)) {
2337 mutex_unlock(&fs_info->scrub_lock);
2338 wait_event(fs_info->scrub_pause_wait,
2339 atomic_read(&fs_info->scrub_pause_req) == 0);
2340 mutex_lock(&fs_info->scrub_lock);
2341 }
2342 atomic_dec(&fs_info->scrubs_paused);
2343 mutex_unlock(&fs_info->scrub_lock);
2344 wake_up(&fs_info->scrub_pause_wait);
2345
2346
2347
2348
2349
2350 blk_start_plug(&plug);
2351
2352
2353
2354
2355 logical = base + offset;
2356 physical = map->stripes[num].physical;
2357 ret = 0;
2358 for (i = 0; i < nstripes; ++i) {
2359
2360
2361
2362 if (atomic_read(&fs_info->scrub_cancel_req) ||
2363 atomic_read(&sctx->cancel_req)) {
2364 ret = -ECANCELED;
2365 goto out;
2366 }
2367
2368
2369
2370 if (atomic_read(&fs_info->scrub_pause_req)) {
2371
2372 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2373 scrub_submit(sctx);
2374 mutex_lock(&sctx->wr_ctx.wr_lock);
2375 scrub_wr_submit(sctx);
2376 mutex_unlock(&sctx->wr_ctx.wr_lock);
2377 wait_event(sctx->list_wait,
2378 atomic_read(&sctx->bios_in_flight) == 0);
2379 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2380 atomic_inc(&fs_info->scrubs_paused);
2381 wake_up(&fs_info->scrub_pause_wait);
2382 mutex_lock(&fs_info->scrub_lock);
2383 while (atomic_read(&fs_info->scrub_pause_req)) {
2384 mutex_unlock(&fs_info->scrub_lock);
2385 wait_event(fs_info->scrub_pause_wait,
2386 atomic_read(&fs_info->scrub_pause_req) == 0);
2387 mutex_lock(&fs_info->scrub_lock);
2388 }
2389 atomic_dec(&fs_info->scrubs_paused);
2390 mutex_unlock(&fs_info->scrub_lock);
2391 wake_up(&fs_info->scrub_pause_wait);
2392 }
2393
2394 ret = btrfs_lookup_csums_range(csum_root, logical,
2395 logical + map->stripe_len - 1,
2396 &sctx->csum_list, 1);
2397 if (ret)
2398 goto out;
2399
2400 key.objectid = logical;
2401 key.type = BTRFS_EXTENT_ITEM_KEY;
2402 key.offset = (u64)0;
2403
2404 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2405 if (ret < 0)
2406 goto out;
2407 if (ret > 0) {
2408 ret = btrfs_previous_item(root, path, 0,
2409 BTRFS_EXTENT_ITEM_KEY);
2410 if (ret < 0)
2411 goto out;
2412 if (ret > 0) {
2413
2414
2415 btrfs_release_path(path);
2416 ret = btrfs_search_slot(NULL, root, &key,
2417 path, 0, 0);
2418 if (ret < 0)
2419 goto out;
2420 }
2421 }
2422
2423 while (1) {
2424 l = path->nodes[0];
2425 slot = path->slots[0];
2426 if (slot >= btrfs_header_nritems(l)) {
2427 ret = btrfs_next_leaf(root, path);
2428 if (ret == 0)
2429 continue;
2430 if (ret < 0)
2431 goto out;
2432
2433 break;
2434 }
2435 btrfs_item_key_to_cpu(l, &key, slot);
2436
2437 if (key.objectid + key.offset <= logical)
2438 goto next;
2439
2440 if (key.objectid >= logical + map->stripe_len)
2441 break;
2442
2443 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
2444 goto next;
2445
2446 extent = btrfs_item_ptr(l, slot,
2447 struct btrfs_extent_item);
2448 flags = btrfs_extent_flags(l, extent);
2449 generation = btrfs_extent_generation(l, extent);
2450
2451 if (key.objectid < logical &&
2452 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2453 printk(KERN_ERR
2454 "btrfs scrub: tree block %llu spanning "
2455 "stripes, ignored. logical=%llu\n",
2456 (unsigned long long)key.objectid,
2457 (unsigned long long)logical);
2458 goto next;
2459 }
2460
2461
2462
2463
2464 if (key.objectid < logical) {
2465 key.offset -= logical - key.objectid;
2466 key.objectid = logical;
2467 }
2468 if (key.objectid + key.offset >
2469 logical + map->stripe_len) {
2470 key.offset = logical + map->stripe_len -
2471 key.objectid;
2472 }
2473
2474 extent_logical = key.objectid;
2475 extent_physical = key.objectid - logical + physical;
2476 extent_len = key.offset;
2477 extent_dev = scrub_dev;
2478 extent_mirror_num = mirror_num;
2479 if (is_dev_replace)
2480 scrub_remap_extent(fs_info, extent_logical,
2481 extent_len, &extent_physical,
2482 &extent_dev,
2483 &extent_mirror_num);
2484 ret = scrub_extent(sctx, extent_logical, extent_len,
2485 extent_physical, extent_dev, flags,
2486 generation, extent_mirror_num,
2487 key.objectid - logical + physical);
2488 if (ret)
2489 goto out;
2490
2491next:
2492 path->slots[0]++;
2493 }
2494 btrfs_release_path(path);
2495 logical += increment;
2496 physical += map->stripe_len;
2497 spin_lock(&sctx->stat_lock);
2498 sctx->stat.last_physical = physical;
2499 spin_unlock(&sctx->stat_lock);
2500 }
2501out:
2502
2503 scrub_submit(sctx);
2504 mutex_lock(&sctx->wr_ctx.wr_lock);
2505 scrub_wr_submit(sctx);
2506 mutex_unlock(&sctx->wr_ctx.wr_lock);
2507
2508 blk_finish_plug(&plug);
2509 btrfs_free_path(path);
2510 return ret < 0 ? ret : 0;
2511}
2512
2513static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
2514 struct btrfs_device *scrub_dev,
2515 u64 chunk_tree, u64 chunk_objectid,
2516 u64 chunk_offset, u64 length,
2517 u64 dev_offset, int is_dev_replace)
2518{
2519 struct btrfs_mapping_tree *map_tree =
2520 &sctx->dev_root->fs_info->mapping_tree;
2521 struct map_lookup *map;
2522 struct extent_map *em;
2523 int i;
2524 int ret = 0;
2525
2526 read_lock(&map_tree->map_tree.lock);
2527 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2528 read_unlock(&map_tree->map_tree.lock);
2529
2530 if (!em)
2531 return -EINVAL;
2532
2533 map = (struct map_lookup *)em->bdev;
2534 if (em->start != chunk_offset)
2535 goto out;
2536
2537 if (em->len < length)
2538 goto out;
2539
2540 for (i = 0; i < map->num_stripes; ++i) {
2541 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
2542 map->stripes[i].physical == dev_offset) {
2543 ret = scrub_stripe(sctx, map, scrub_dev, i,
2544 chunk_offset, length,
2545 is_dev_replace);
2546 if (ret)
2547 goto out;
2548 }
2549 }
2550out:
2551 free_extent_map(em);
2552
2553 return ret;
2554}
2555
2556static noinline_for_stack
2557int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2558 struct btrfs_device *scrub_dev, u64 start, u64 end,
2559 int is_dev_replace)
2560{
2561 struct btrfs_dev_extent *dev_extent = NULL;
2562 struct btrfs_path *path;
2563 struct btrfs_root *root = sctx->dev_root;
2564 struct btrfs_fs_info *fs_info = root->fs_info;
2565 u64 length;
2566 u64 chunk_tree;
2567 u64 chunk_objectid;
2568 u64 chunk_offset;
2569 int ret;
2570 int slot;
2571 struct extent_buffer *l;
2572 struct btrfs_key key;
2573 struct btrfs_key found_key;
2574 struct btrfs_block_group_cache *cache;
2575 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
2576
2577 path = btrfs_alloc_path();
2578 if (!path)
2579 return -ENOMEM;
2580
2581 path->reada = 2;
2582 path->search_commit_root = 1;
2583 path->skip_locking = 1;
2584
2585 key.objectid = scrub_dev->devid;
2586 key.offset = 0ull;
2587 key.type = BTRFS_DEV_EXTENT_KEY;
2588
2589 while (1) {
2590 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2591 if (ret < 0)
2592 break;
2593 if (ret > 0) {
2594 if (path->slots[0] >=
2595 btrfs_header_nritems(path->nodes[0])) {
2596 ret = btrfs_next_leaf(root, path);
2597 if (ret)
2598 break;
2599 }
2600 }
2601
2602 l = path->nodes[0];
2603 slot = path->slots[0];
2604
2605 btrfs_item_key_to_cpu(l, &found_key, slot);
2606
2607 if (found_key.objectid != scrub_dev->devid)
2608 break;
2609
2610 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
2611 break;
2612
2613 if (found_key.offset >= end)
2614 break;
2615
2616 if (found_key.offset < key.offset)
2617 break;
2618
2619 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2620 length = btrfs_dev_extent_length(l, dev_extent);
2621
2622 if (found_key.offset + length <= start) {
2623 key.offset = found_key.offset + length;
2624 btrfs_release_path(path);
2625 continue;
2626 }
2627
2628 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2629 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2630 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2631
2632
2633
2634
2635
2636 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2637 if (!cache) {
2638 ret = -ENOENT;
2639 break;
2640 }
2641 dev_replace->cursor_right = found_key.offset + length;
2642 dev_replace->cursor_left = found_key.offset;
2643 dev_replace->item_needs_writeback = 1;
2644 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
2645 chunk_offset, length, found_key.offset,
2646 is_dev_replace);
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2659 scrub_submit(sctx);
2660 mutex_lock(&sctx->wr_ctx.wr_lock);
2661 scrub_wr_submit(sctx);
2662 mutex_unlock(&sctx->wr_ctx.wr_lock);
2663
2664 wait_event(sctx->list_wait,
2665 atomic_read(&sctx->bios_in_flight) == 0);
2666 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2667 atomic_inc(&fs_info->scrubs_paused);
2668 wake_up(&fs_info->scrub_pause_wait);
2669 wait_event(sctx->list_wait,
2670 atomic_read(&sctx->workers_pending) == 0);
2671
2672 mutex_lock(&fs_info->scrub_lock);
2673 while (atomic_read(&fs_info->scrub_pause_req)) {
2674 mutex_unlock(&fs_info->scrub_lock);
2675 wait_event(fs_info->scrub_pause_wait,
2676 atomic_read(&fs_info->scrub_pause_req) == 0);
2677 mutex_lock(&fs_info->scrub_lock);
2678 }
2679 atomic_dec(&fs_info->scrubs_paused);
2680 mutex_unlock(&fs_info->scrub_lock);
2681 wake_up(&fs_info->scrub_pause_wait);
2682
2683 dev_replace->cursor_left = dev_replace->cursor_right;
2684 dev_replace->item_needs_writeback = 1;
2685 btrfs_put_block_group(cache);
2686 if (ret)
2687 break;
2688 if (is_dev_replace &&
2689 atomic64_read(&dev_replace->num_write_errors) > 0) {
2690 ret = -EIO;
2691 break;
2692 }
2693 if (sctx->stat.malloc_errors > 0) {
2694 ret = -ENOMEM;
2695 break;
2696 }
2697
2698 key.offset = found_key.offset + length;
2699 btrfs_release_path(path);
2700 }
2701
2702 btrfs_free_path(path);
2703
2704
2705
2706
2707
2708 return ret < 0 ? ret : 0;
2709}
2710
2711static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
2712 struct btrfs_device *scrub_dev)
2713{
2714 int i;
2715 u64 bytenr;
2716 u64 gen;
2717 int ret;
2718 struct btrfs_root *root = sctx->dev_root;
2719
2720 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
2721 return -EIO;
2722
2723 gen = root->fs_info->last_trans_committed;
2724
2725 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2726 bytenr = btrfs_sb_offset(i);
2727 if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
2728 break;
2729
2730 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
2731 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
2732 NULL, 1, bytenr);
2733 if (ret)
2734 return ret;
2735 }
2736 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2737
2738 return 0;
2739}
2740
2741
2742
2743
2744static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2745 int is_dev_replace)
2746{
2747 int ret = 0;
2748
2749 mutex_lock(&fs_info->scrub_lock);
2750 if (fs_info->scrub_workers_refcnt == 0) {
2751 if (is_dev_replace)
2752 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
2753 &fs_info->generic_worker);
2754 else
2755 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
2756 fs_info->thread_pool_size,
2757 &fs_info->generic_worker);
2758 fs_info->scrub_workers.idle_thresh = 4;
2759 ret = btrfs_start_workers(&fs_info->scrub_workers);
2760 if (ret)
2761 goto out;
2762 btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
2763 "scrubwrc",
2764 fs_info->thread_pool_size,
2765 &fs_info->generic_worker);
2766 fs_info->scrub_wr_completion_workers.idle_thresh = 2;
2767 ret = btrfs_start_workers(
2768 &fs_info->scrub_wr_completion_workers);
2769 if (ret)
2770 goto out;
2771 btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
2772 &fs_info->generic_worker);
2773 ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
2774 if (ret)
2775 goto out;
2776 }
2777 ++fs_info->scrub_workers_refcnt;
2778out:
2779 mutex_unlock(&fs_info->scrub_lock);
2780
2781 return ret;
2782}
2783
2784static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
2785{
2786 mutex_lock(&fs_info->scrub_lock);
2787 if (--fs_info->scrub_workers_refcnt == 0) {
2788 btrfs_stop_workers(&fs_info->scrub_workers);
2789 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
2790 btrfs_stop_workers(&fs_info->scrub_nocow_workers);
2791 }
2792 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2793 mutex_unlock(&fs_info->scrub_lock);
2794}
2795
2796int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2797 u64 end, struct btrfs_scrub_progress *progress,
2798 int readonly, int is_dev_replace)
2799{
2800 struct scrub_ctx *sctx;
2801 int ret;
2802 struct btrfs_device *dev;
2803
2804 if (btrfs_fs_closing(fs_info))
2805 return -EINVAL;
2806
2807
2808
2809
2810 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2811 printk(KERN_ERR
2812 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
2813 fs_info->chunk_root->nodesize,
2814 fs_info->chunk_root->leafsize);
2815 return -EINVAL;
2816 }
2817
2818 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
2819
2820
2821
2822
2823
2824 printk(KERN_ERR
2825 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2826 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2827 return -EINVAL;
2828 }
2829
2830 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2831
2832 printk(KERN_ERR
2833 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2834 fs_info->chunk_root->sectorsize,
2835 (unsigned long long)PAGE_SIZE);
2836 return -EINVAL;
2837 }
2838
2839 if (fs_info->chunk_root->nodesize >
2840 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
2841 fs_info->chunk_root->sectorsize >
2842 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
2843
2844
2845
2846
2847 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
2848 fs_info->chunk_root->nodesize,
2849 SCRUB_MAX_PAGES_PER_BLOCK,
2850 fs_info->chunk_root->sectorsize,
2851 SCRUB_MAX_PAGES_PER_BLOCK);
2852 return -EINVAL;
2853 }
2854
2855 ret = scrub_workers_get(fs_info, is_dev_replace);
2856 if (ret)
2857 return ret;
2858
2859 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2860 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
2861 if (!dev || (dev->missing && !is_dev_replace)) {
2862 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2863 scrub_workers_put(fs_info);
2864 return -ENODEV;
2865 }
2866 mutex_lock(&fs_info->scrub_lock);
2867
2868 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2869 mutex_unlock(&fs_info->scrub_lock);
2870 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2871 scrub_workers_put(fs_info);
2872 return -EIO;
2873 }
2874
2875 btrfs_dev_replace_lock(&fs_info->dev_replace);
2876 if (dev->scrub_device ||
2877 (!is_dev_replace &&
2878 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
2879 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2880 mutex_unlock(&fs_info->scrub_lock);
2881 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2882 scrub_workers_put(fs_info);
2883 return -EINPROGRESS;
2884 }
2885 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2886 sctx = scrub_setup_ctx(dev, is_dev_replace);
2887 if (IS_ERR(sctx)) {
2888 mutex_unlock(&fs_info->scrub_lock);
2889 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2890 scrub_workers_put(fs_info);
2891 return PTR_ERR(sctx);
2892 }
2893 sctx->readonly = readonly;
2894 dev->scrub_device = sctx;
2895
2896 atomic_inc(&fs_info->scrubs_running);
2897 mutex_unlock(&fs_info->scrub_lock);
2898 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2899
2900 if (!is_dev_replace) {
2901 down_read(&fs_info->scrub_super_lock);
2902 ret = scrub_supers(sctx, dev);
2903 up_read(&fs_info->scrub_super_lock);
2904 }
2905
2906 if (!ret)
2907 ret = scrub_enumerate_chunks(sctx, dev, start, end,
2908 is_dev_replace);
2909
2910 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2911 atomic_dec(&fs_info->scrubs_running);
2912 wake_up(&fs_info->scrub_pause_wait);
2913
2914 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
2915
2916 if (progress)
2917 memcpy(progress, &sctx->stat, sizeof(*progress));
2918
2919 mutex_lock(&fs_info->scrub_lock);
2920 dev->scrub_device = NULL;
2921 mutex_unlock(&fs_info->scrub_lock);
2922
2923 scrub_free_ctx(sctx);
2924 scrub_workers_put(fs_info);
2925
2926 return ret;
2927}
2928
2929void btrfs_scrub_pause(struct btrfs_root *root)
2930{
2931 struct btrfs_fs_info *fs_info = root->fs_info;
2932
2933 mutex_lock(&fs_info->scrub_lock);
2934 atomic_inc(&fs_info->scrub_pause_req);
2935 while (atomic_read(&fs_info->scrubs_paused) !=
2936 atomic_read(&fs_info->scrubs_running)) {
2937 mutex_unlock(&fs_info->scrub_lock);
2938 wait_event(fs_info->scrub_pause_wait,
2939 atomic_read(&fs_info->scrubs_paused) ==
2940 atomic_read(&fs_info->scrubs_running));
2941 mutex_lock(&fs_info->scrub_lock);
2942 }
2943 mutex_unlock(&fs_info->scrub_lock);
2944}
2945
2946void btrfs_scrub_continue(struct btrfs_root *root)
2947{
2948 struct btrfs_fs_info *fs_info = root->fs_info;
2949
2950 atomic_dec(&fs_info->scrub_pause_req);
2951 wake_up(&fs_info->scrub_pause_wait);
2952}
2953
2954void btrfs_scrub_pause_super(struct btrfs_root *root)
2955{
2956 down_write(&root->fs_info->scrub_super_lock);
2957}
2958
2959void btrfs_scrub_continue_super(struct btrfs_root *root)
2960{
2961 up_write(&root->fs_info->scrub_super_lock);
2962}
2963
2964int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
2965{
2966 mutex_lock(&fs_info->scrub_lock);
2967 if (!atomic_read(&fs_info->scrubs_running)) {
2968 mutex_unlock(&fs_info->scrub_lock);
2969 return -ENOTCONN;
2970 }
2971
2972 atomic_inc(&fs_info->scrub_cancel_req);
2973 while (atomic_read(&fs_info->scrubs_running)) {
2974 mutex_unlock(&fs_info->scrub_lock);
2975 wait_event(fs_info->scrub_pause_wait,
2976 atomic_read(&fs_info->scrubs_running) == 0);
2977 mutex_lock(&fs_info->scrub_lock);
2978 }
2979 atomic_dec(&fs_info->scrub_cancel_req);
2980 mutex_unlock(&fs_info->scrub_lock);
2981
2982 return 0;
2983}
2984
2985int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
2986 struct btrfs_device *dev)
2987{
2988 struct scrub_ctx *sctx;
2989
2990 mutex_lock(&fs_info->scrub_lock);
2991 sctx = dev->scrub_device;
2992 if (!sctx) {
2993 mutex_unlock(&fs_info->scrub_lock);
2994 return -ENOTCONN;
2995 }
2996 atomic_inc(&sctx->cancel_req);
2997 while (dev->scrub_device) {
2998 mutex_unlock(&fs_info->scrub_lock);
2999 wait_event(fs_info->scrub_pause_wait,
3000 dev->scrub_device == NULL);
3001 mutex_lock(&fs_info->scrub_lock);
3002 }
3003 mutex_unlock(&fs_info->scrub_lock);
3004
3005 return 0;
3006}
3007
3008int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
3009{
3010 struct btrfs_fs_info *fs_info = root->fs_info;
3011 struct btrfs_device *dev;
3012 int ret;
3013
3014
3015
3016
3017
3018 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3019 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
3020 if (!dev) {
3021 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3022 return -ENODEV;
3023 }
3024 ret = btrfs_scrub_cancel_dev(fs_info, dev);
3025 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3026
3027 return ret;
3028}
3029
3030int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3031 struct btrfs_scrub_progress *progress)
3032{
3033 struct btrfs_device *dev;
3034 struct scrub_ctx *sctx = NULL;
3035
3036 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3037 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
3038 if (dev)
3039 sctx = dev->scrub_device;
3040 if (sctx)
3041 memcpy(progress, &sctx->stat, sizeof(*progress));
3042 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3043
3044 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
3045}
3046
3047static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3048 u64 extent_logical, u64 extent_len,
3049 u64 *extent_physical,
3050 struct btrfs_device **extent_dev,
3051 int *extent_mirror_num)
3052{
3053 u64 mapped_length;
3054 struct btrfs_bio *bbio = NULL;
3055 int ret;
3056
3057 mapped_length = extent_len;
3058 ret = btrfs_map_block(fs_info, READ, extent_logical,
3059 &mapped_length, &bbio, 0);
3060 if (ret || !bbio || mapped_length < extent_len ||
3061 !bbio->stripes[0].dev->bdev) {
3062 kfree(bbio);
3063 return;
3064 }
3065
3066 *extent_physical = bbio->stripes[0].physical;
3067 *extent_mirror_num = bbio->mirror_num;
3068 *extent_dev = bbio->stripes[0].dev;
3069 kfree(bbio);
3070}
3071
3072static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
3073 struct scrub_wr_ctx *wr_ctx,
3074 struct btrfs_fs_info *fs_info,
3075 struct btrfs_device *dev,
3076 int is_dev_replace)
3077{
3078 WARN_ON(wr_ctx->wr_curr_bio != NULL);
3079
3080 mutex_init(&wr_ctx->wr_lock);
3081 wr_ctx->wr_curr_bio = NULL;
3082 if (!is_dev_replace)
3083 return 0;
3084
3085 WARN_ON(!dev->bdev);
3086 wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
3087 bio_get_nr_vecs(dev->bdev));
3088 wr_ctx->tgtdev = dev;
3089 atomic_set(&wr_ctx->flush_all_writes, 0);
3090 return 0;
3091}
3092
3093static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
3094{
3095 mutex_lock(&wr_ctx->wr_lock);
3096 kfree(wr_ctx->wr_curr_bio);
3097 wr_ctx->wr_curr_bio = NULL;
3098 mutex_unlock(&wr_ctx->wr_lock);
3099}
3100
3101static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3102 int mirror_num, u64 physical_for_dev_replace)
3103{
3104 struct scrub_copy_nocow_ctx *nocow_ctx;
3105 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3106
3107 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
3108 if (!nocow_ctx) {
3109 spin_lock(&sctx->stat_lock);
3110 sctx->stat.malloc_errors++;
3111 spin_unlock(&sctx->stat_lock);
3112 return -ENOMEM;
3113 }
3114
3115 scrub_pending_trans_workers_inc(sctx);
3116
3117 nocow_ctx->sctx = sctx;
3118 nocow_ctx->logical = logical;
3119 nocow_ctx->len = len;
3120 nocow_ctx->mirror_num = mirror_num;
3121 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3122 nocow_ctx->work.func = copy_nocow_pages_worker;
3123 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3124 &nocow_ctx->work);
3125
3126 return 0;
3127}
3128
3129static void copy_nocow_pages_worker(struct btrfs_work *work)
3130{
3131 struct scrub_copy_nocow_ctx *nocow_ctx =
3132 container_of(work, struct scrub_copy_nocow_ctx, work);
3133 struct scrub_ctx *sctx = nocow_ctx->sctx;
3134 u64 logical = nocow_ctx->logical;
3135 u64 len = nocow_ctx->len;
3136 int mirror_num = nocow_ctx->mirror_num;
3137 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3138 int ret;
3139 struct btrfs_trans_handle *trans = NULL;
3140 struct btrfs_fs_info *fs_info;
3141 struct btrfs_path *path;
3142 struct btrfs_root *root;
3143 int not_written = 0;
3144
3145 fs_info = sctx->dev_root->fs_info;
3146 root = fs_info->extent_root;
3147
3148 path = btrfs_alloc_path();
3149 if (!path) {
3150 spin_lock(&sctx->stat_lock);
3151 sctx->stat.malloc_errors++;
3152 spin_unlock(&sctx->stat_lock);
3153 not_written = 1;
3154 goto out;
3155 }
3156
3157 trans = btrfs_join_transaction(root);
3158 if (IS_ERR(trans)) {
3159 not_written = 1;
3160 goto out;
3161 }
3162
3163 ret = iterate_inodes_from_logical(logical, fs_info, path,
3164 copy_nocow_pages_for_inode,
3165 nocow_ctx);
3166 if (ret != 0 && ret != -ENOENT) {
3167 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
3168 (unsigned long long)logical,
3169 (unsigned long long)physical_for_dev_replace,
3170 (unsigned long long)len,
3171 (unsigned long long)mirror_num, ret);
3172 not_written = 1;
3173 goto out;
3174 }
3175
3176out:
3177 if (trans && !IS_ERR(trans))
3178 btrfs_end_transaction(trans, root);
3179 if (not_written)
3180 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
3181 num_uncorrectable_read_errors);
3182
3183 btrfs_free_path(path);
3184 kfree(nocow_ctx);
3185
3186 scrub_pending_trans_workers_dec(sctx);
3187}
3188
3189static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3190{
3191 unsigned long index;
3192 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3193 int ret = 0;
3194 struct btrfs_key key;
3195 struct inode *inode = NULL;
3196 struct btrfs_root *local_root;
3197 u64 physical_for_dev_replace;
3198 u64 len;
3199 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3200 int srcu_index;
3201
3202 key.objectid = root;
3203 key.type = BTRFS_ROOT_ITEM_KEY;
3204 key.offset = (u64)-1;
3205
3206 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3207
3208 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3209 if (IS_ERR(local_root)) {
3210 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3211 return PTR_ERR(local_root);
3212 }
3213
3214 key.type = BTRFS_INODE_ITEM_KEY;
3215 key.objectid = inum;
3216 key.offset = 0;
3217 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3218 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3219 if (IS_ERR(inode))
3220 return PTR_ERR(inode);
3221
3222 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3223 len = nocow_ctx->len;
3224 while (len >= PAGE_CACHE_SIZE) {
3225 struct page *page = NULL;
3226 int ret_sub;
3227
3228 index = offset >> PAGE_CACHE_SHIFT;
3229
3230 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3231 if (!page) {
3232 pr_err("find_or_create_page() failed\n");
3233 ret = -ENOMEM;
3234 goto next_page;
3235 }
3236
3237 if (PageUptodate(page)) {
3238 if (PageDirty(page))
3239 goto next_page;
3240 } else {
3241 ClearPageError(page);
3242 ret_sub = extent_read_full_page(&BTRFS_I(inode)->
3243 io_tree,
3244 page, btrfs_get_extent,
3245 nocow_ctx->mirror_num);
3246 if (ret_sub) {
3247 ret = ret_sub;
3248 goto next_page;
3249 }
3250 wait_on_page_locked(page);
3251 if (!PageUptodate(page)) {
3252 ret = -EIO;
3253 goto next_page;
3254 }
3255 }
3256 ret_sub = write_page_nocow(nocow_ctx->sctx,
3257 physical_for_dev_replace, page);
3258 if (ret_sub) {
3259 ret = ret_sub;
3260 goto next_page;
3261 }
3262
3263next_page:
3264 if (page) {
3265 unlock_page(page);
3266 put_page(page);
3267 }
3268 offset += PAGE_CACHE_SIZE;
3269 physical_for_dev_replace += PAGE_CACHE_SIZE;
3270 len -= PAGE_CACHE_SIZE;
3271 }
3272
3273 if (inode)
3274 iput(inode);
3275 return ret;
3276}
3277
3278static int write_page_nocow(struct scrub_ctx *sctx,
3279 u64 physical_for_dev_replace, struct page *page)
3280{
3281 struct bio *bio;
3282 struct btrfs_device *dev;
3283 int ret;
3284 DECLARE_COMPLETION_ONSTACK(compl);
3285
3286 dev = sctx->wr_ctx.tgtdev;
3287 if (!dev)
3288 return -EIO;
3289 if (!dev->bdev) {
3290 printk_ratelimited(KERN_WARNING
3291 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3292 return -EIO;
3293 }
3294 bio = bio_alloc(GFP_NOFS, 1);
3295 if (!bio) {
3296 spin_lock(&sctx->stat_lock);
3297 sctx->stat.malloc_errors++;
3298 spin_unlock(&sctx->stat_lock);
3299 return -ENOMEM;
3300 }
3301 bio->bi_private = &compl;
3302 bio->bi_end_io = scrub_complete_bio_end_io;
3303 bio->bi_size = 0;
3304 bio->bi_sector = physical_for_dev_replace >> 9;
3305 bio->bi_bdev = dev->bdev;
3306 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3307 if (ret != PAGE_CACHE_SIZE) {
3308leave_with_eio:
3309 bio_put(bio);
3310 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
3311 return -EIO;
3312 }
3313 btrfsic_submit_bio(WRITE_SYNC, bio);
3314 wait_for_completion(&compl);
3315
3316 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
3317 goto leave_with_eio;
3318
3319 bio_put(bio);
3320 return 0;
3321}
3322