1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_page {
67 struct scrub_block *sblock;
68 struct page *page;
69 struct btrfs_device *dev;
70 u64 flags;
71 u64 generation;
72 u64 logical;
73 u64 physical;
74 u64 physical_for_dev_replace;
75 atomic_t ref_count;
76 struct {
77 unsigned int mirror_num:8;
78 unsigned int have_csum:1;
79 unsigned int io_error:1;
80 };
81 u8 csum[BTRFS_CSUM_SIZE];
82};
83
84struct scrub_bio {
85 int index;
86 struct scrub_ctx *sctx;
87 struct btrfs_device *dev;
88 struct bio *bio;
89 int err;
90 u64 logical;
91 u64 physical;
92#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
93 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
94#else
95 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
96#endif
97 int page_count;
98 int next_free;
99 struct btrfs_work work;
100};
101
102struct scrub_block {
103 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
104 int page_count;
105 atomic_t outstanding_pages;
106 atomic_t ref_count;
107 struct scrub_ctx *sctx;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113 };
114};
115
116struct scrub_wr_ctx {
117 struct scrub_bio *wr_curr_bio;
118 struct btrfs_device *tgtdev;
119 int pages_per_wr_bio;
120 atomic_t flush_all_writes;
121 struct mutex wr_lock;
122};
123
124struct scrub_ctx {
125 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
126 struct btrfs_root *dev_root;
127 int first_free;
128 int curr;
129 atomic_t bios_in_flight;
130 atomic_t workers_pending;
131 spinlock_t list_lock;
132 wait_queue_head_t list_wait;
133 u16 csum_size;
134 struct list_head csum_list;
135 atomic_t cancel_req;
136 int readonly;
137 int pages_per_rd_bio;
138 u32 sectorsize;
139 u32 nodesize;
140 u32 leafsize;
141
142 int is_dev_replace;
143 struct scrub_wr_ctx wr_ctx;
144
145
146
147
148 struct btrfs_scrub_progress stat;
149 spinlock_t stat_lock;
150};
151
152struct scrub_fixup_nodatasum {
153 struct scrub_ctx *sctx;
154 struct btrfs_device *dev;
155 u64 logical;
156 struct btrfs_root *root;
157 struct btrfs_work work;
158 int mirror_num;
159};
160
161struct scrub_copy_nocow_ctx {
162 struct scrub_ctx *sctx;
163 u64 logical;
164 u64 len;
165 int mirror_num;
166 u64 physical_for_dev_replace;
167 struct btrfs_work work;
168};
169
170struct scrub_warning {
171 struct btrfs_path *path;
172 u64 extent_item_size;
173 char *scratch_buf;
174 char *msg_buf;
175 const char *errstr;
176 sector_t sector;
177 u64 logical;
178 struct btrfs_device *dev;
179 int msg_bufsize;
180 int scratch_bufsize;
181};
182
183
184static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
185static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
186static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
187static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
188static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
189static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
190 struct btrfs_fs_info *fs_info,
191 struct scrub_block *original_sblock,
192 u64 length, u64 logical,
193 struct scrub_block *sblocks_for_recheck);
194static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
195 struct scrub_block *sblock, int is_metadata,
196 int have_csum, u8 *csum, u64 generation,
197 u16 csum_size);
198static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
199 struct scrub_block *sblock,
200 int is_metadata, int have_csum,
201 const u8 *csum, u64 generation,
202 u16 csum_size);
203static void scrub_complete_bio_end_io(struct bio *bio, int err);
204static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
205 struct scrub_block *sblock_good,
206 int force_write);
207static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
208 struct scrub_block *sblock_good,
209 int page_num, int force_write);
210static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
211static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
212 int page_num);
213static int scrub_checksum_data(struct scrub_block *sblock);
214static int scrub_checksum_tree_block(struct scrub_block *sblock);
215static int scrub_checksum_super(struct scrub_block *sblock);
216static void scrub_block_get(struct scrub_block *sblock);
217static void scrub_block_put(struct scrub_block *sblock);
218static void scrub_page_get(struct scrub_page *spage);
219static void scrub_page_put(struct scrub_page *spage);
220static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
221 struct scrub_page *spage);
222static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
223 u64 physical, struct btrfs_device *dev, u64 flags,
224 u64 gen, int mirror_num, u8 *csum, int force,
225 u64 physical_for_dev_replace);
226static void scrub_bio_end_io(struct bio *bio, int err);
227static void scrub_bio_end_io_worker(struct btrfs_work *work);
228static void scrub_block_complete(struct scrub_block *sblock);
229static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
230 u64 extent_logical, u64 extent_len,
231 u64 *extent_physical,
232 struct btrfs_device **extent_dev,
233 int *extent_mirror_num);
234static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
235 struct scrub_wr_ctx *wr_ctx,
236 struct btrfs_fs_info *fs_info,
237 struct btrfs_device *dev,
238 int is_dev_replace);
239static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
240static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
241 struct scrub_page *spage);
242static void scrub_wr_submit(struct scrub_ctx *sctx);
243static void scrub_wr_bio_end_io(struct bio *bio, int err);
244static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
245static int write_page_nocow(struct scrub_ctx *sctx,
246 u64 physical_for_dev_replace, struct page *page);
247static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
248 void *ctx);
249static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
250 int mirror_num, u64 physical_for_dev_replace);
251static void copy_nocow_pages_worker(struct btrfs_work *work);
252
253
254static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
255{
256 atomic_inc(&sctx->bios_in_flight);
257}
258
259static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
260{
261 atomic_dec(&sctx->bios_in_flight);
262 wake_up(&sctx->list_wait);
263}
264
265
266
267
268
269static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
270{
271 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
272
273
274
275
276
277
278
279
280
281
282 mutex_lock(&fs_info->scrub_lock);
283 atomic_inc(&fs_info->scrubs_running);
284 atomic_inc(&fs_info->scrubs_paused);
285 mutex_unlock(&fs_info->scrub_lock);
286 atomic_inc(&sctx->workers_pending);
287}
288
289
290static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
291{
292 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
293
294
295
296
297
298 mutex_lock(&fs_info->scrub_lock);
299 atomic_dec(&fs_info->scrubs_running);
300 atomic_dec(&fs_info->scrubs_paused);
301 mutex_unlock(&fs_info->scrub_lock);
302 atomic_dec(&sctx->workers_pending);
303 wake_up(&fs_info->scrub_pause_wait);
304 wake_up(&sctx->list_wait);
305}
306
307static void scrub_free_csums(struct scrub_ctx *sctx)
308{
309 while (!list_empty(&sctx->csum_list)) {
310 struct btrfs_ordered_sum *sum;
311 sum = list_first_entry(&sctx->csum_list,
312 struct btrfs_ordered_sum, list);
313 list_del(&sum->list);
314 kfree(sum);
315 }
316}
317
318static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
319{
320 int i;
321
322 if (!sctx)
323 return;
324
325 scrub_free_wr_ctx(&sctx->wr_ctx);
326
327
328 if (sctx->curr != -1) {
329 struct scrub_bio *sbio = sctx->bios[sctx->curr];
330
331 for (i = 0; i < sbio->page_count; i++) {
332 WARN_ON(!sbio->pagev[i]->page);
333 scrub_block_put(sbio->pagev[i]->sblock);
334 }
335 bio_put(sbio->bio);
336 }
337
338 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
339 struct scrub_bio *sbio = sctx->bios[i];
340
341 if (!sbio)
342 break;
343 kfree(sbio);
344 }
345
346 scrub_free_csums(sctx);
347 kfree(sctx);
348}
349
350static noinline_for_stack
351struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
352{
353 struct scrub_ctx *sctx;
354 int i;
355 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
356 int pages_per_rd_bio;
357 int ret;
358
359
360
361
362
363
364
365
366 if (dev->bdev)
367 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
368 bio_get_nr_vecs(dev->bdev));
369 else
370 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
371 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
372 if (!sctx)
373 goto nomem;
374 sctx->is_dev_replace = is_dev_replace;
375 sctx->pages_per_rd_bio = pages_per_rd_bio;
376 sctx->curr = -1;
377 sctx->dev_root = dev->dev_root;
378 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
379 struct scrub_bio *sbio;
380
381 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
382 if (!sbio)
383 goto nomem;
384 sctx->bios[i] = sbio;
385
386 sbio->index = i;
387 sbio->sctx = sctx;
388 sbio->page_count = 0;
389 sbio->work.func = scrub_bio_end_io_worker;
390
391 if (i != SCRUB_BIOS_PER_SCTX - 1)
392 sctx->bios[i]->next_free = i + 1;
393 else
394 sctx->bios[i]->next_free = -1;
395 }
396 sctx->first_free = 0;
397 sctx->nodesize = dev->dev_root->nodesize;
398 sctx->leafsize = dev->dev_root->leafsize;
399 sctx->sectorsize = dev->dev_root->sectorsize;
400 atomic_set(&sctx->bios_in_flight, 0);
401 atomic_set(&sctx->workers_pending, 0);
402 atomic_set(&sctx->cancel_req, 0);
403 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
404 INIT_LIST_HEAD(&sctx->csum_list);
405
406 spin_lock_init(&sctx->list_lock);
407 spin_lock_init(&sctx->stat_lock);
408 init_waitqueue_head(&sctx->list_wait);
409
410 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
411 fs_info->dev_replace.tgtdev, is_dev_replace);
412 if (ret) {
413 scrub_free_ctx(sctx);
414 return ERR_PTR(ret);
415 }
416 return sctx;
417
418nomem:
419 scrub_free_ctx(sctx);
420 return ERR_PTR(-ENOMEM);
421}
422
423static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
424 void *warn_ctx)
425{
426 u64 isize;
427 u32 nlink;
428 int ret;
429 int i;
430 struct extent_buffer *eb;
431 struct btrfs_inode_item *inode_item;
432 struct scrub_warning *swarn = warn_ctx;
433 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
434 struct inode_fs_paths *ipath = NULL;
435 struct btrfs_root *local_root;
436 struct btrfs_key root_key;
437
438 root_key.objectid = root;
439 root_key.type = BTRFS_ROOT_ITEM_KEY;
440 root_key.offset = (u64)-1;
441 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
442 if (IS_ERR(local_root)) {
443 ret = PTR_ERR(local_root);
444 goto err;
445 }
446
447 ret = inode_item_info(inum, 0, local_root, swarn->path);
448 if (ret) {
449 btrfs_release_path(swarn->path);
450 goto err;
451 }
452
453 eb = swarn->path->nodes[0];
454 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
455 struct btrfs_inode_item);
456 isize = btrfs_inode_size(eb, inode_item);
457 nlink = btrfs_inode_nlink(eb, inode_item);
458 btrfs_release_path(swarn->path);
459
460 ipath = init_ipath(4096, local_root, swarn->path);
461 if (IS_ERR(ipath)) {
462 ret = PTR_ERR(ipath);
463 ipath = NULL;
464 goto err;
465 }
466 ret = paths_from_inode(inum, ipath);
467
468 if (ret < 0)
469 goto err;
470
471
472
473
474
475 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
476 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
477 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
478 "length %llu, links %u (path: %s)\n", swarn->errstr,
479 swarn->logical, rcu_str_deref(swarn->dev->name),
480 (unsigned long long)swarn->sector, root, inum, offset,
481 min(isize - offset, (u64)PAGE_SIZE), nlink,
482 (char *)(unsigned long)ipath->fspath->val[i]);
483
484 free_ipath(ipath);
485 return 0;
486
487err:
488 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
489 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
490 "resolving failed with ret=%d\n", swarn->errstr,
491 swarn->logical, rcu_str_deref(swarn->dev->name),
492 (unsigned long long)swarn->sector, root, inum, offset, ret);
493
494 free_ipath(ipath);
495 return 0;
496}
497
498static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
499{
500 struct btrfs_device *dev;
501 struct btrfs_fs_info *fs_info;
502 struct btrfs_path *path;
503 struct btrfs_key found_key;
504 struct extent_buffer *eb;
505 struct btrfs_extent_item *ei;
506 struct scrub_warning swarn;
507 unsigned long ptr = 0;
508 u64 extent_item_pos;
509 u64 flags = 0;
510 u64 ref_root;
511 u32 item_size;
512 u8 ref_level;
513 const int bufsize = 4096;
514 int ret;
515
516 WARN_ON(sblock->page_count < 1);
517 dev = sblock->pagev[0]->dev;
518 fs_info = sblock->sctx->dev_root->fs_info;
519
520 path = btrfs_alloc_path();
521
522 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
523 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
524 swarn.sector = (sblock->pagev[0]->physical) >> 9;
525 swarn.logical = sblock->pagev[0]->logical;
526 swarn.errstr = errstr;
527 swarn.dev = NULL;
528 swarn.msg_bufsize = bufsize;
529 swarn.scratch_bufsize = bufsize;
530
531 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
532 goto out;
533
534 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
535 &flags);
536 if (ret < 0)
537 goto out;
538
539 extent_item_pos = swarn.logical - found_key.objectid;
540 swarn.extent_item_size = found_key.offset;
541
542 eb = path->nodes[0];
543 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
544 item_size = btrfs_item_size_nr(eb, path->slots[0]);
545
546 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
547 do {
548 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
549 &ref_root, &ref_level);
550 printk_in_rcu(KERN_WARNING
551 "btrfs: %s at logical %llu on dev %s, "
552 "sector %llu: metadata %s (level %d) in tree "
553 "%llu\n", errstr, swarn.logical,
554 rcu_str_deref(dev->name),
555 (unsigned long long)swarn.sector,
556 ref_level ? "node" : "leaf",
557 ret < 0 ? -1 : ref_level,
558 ret < 0 ? -1 : ref_root);
559 } while (ret != 1);
560 btrfs_release_path(path);
561 } else {
562 btrfs_release_path(path);
563 swarn.path = path;
564 swarn.dev = dev;
565 iterate_extent_inodes(fs_info, found_key.objectid,
566 extent_item_pos, 1,
567 scrub_print_warning_inode, &swarn);
568 }
569
570out:
571 btrfs_free_path(path);
572 kfree(swarn.scratch_buf);
573 kfree(swarn.msg_buf);
574}
575
576static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
577{
578 struct page *page = NULL;
579 unsigned long index;
580 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
581 int ret;
582 int corrected = 0;
583 struct btrfs_key key;
584 struct inode *inode = NULL;
585 struct btrfs_fs_info *fs_info;
586 u64 end = offset + PAGE_SIZE - 1;
587 struct btrfs_root *local_root;
588 int srcu_index;
589
590 key.objectid = root;
591 key.type = BTRFS_ROOT_ITEM_KEY;
592 key.offset = (u64)-1;
593
594 fs_info = fixup->root->fs_info;
595 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
596
597 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
598 if (IS_ERR(local_root)) {
599 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
600 return PTR_ERR(local_root);
601 }
602
603 key.type = BTRFS_INODE_ITEM_KEY;
604 key.objectid = inum;
605 key.offset = 0;
606 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
607 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
608 if (IS_ERR(inode))
609 return PTR_ERR(inode);
610
611 index = offset >> PAGE_CACHE_SHIFT;
612
613 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
614 if (!page) {
615 ret = -ENOMEM;
616 goto out;
617 }
618
619 if (PageUptodate(page)) {
620 if (PageDirty(page)) {
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637 ret = -EIO;
638 goto out;
639 }
640 fs_info = BTRFS_I(inode)->root->fs_info;
641 ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
642 fixup->logical, page,
643 fixup->mirror_num);
644 unlock_page(page);
645 corrected = !ret;
646 } else {
647
648
649
650
651
652 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
653 EXTENT_DAMAGED, GFP_NOFS);
654 if (ret) {
655
656 WARN_ON(ret > 0);
657 if (ret > 0)
658 ret = -EFAULT;
659 goto out;
660 }
661
662 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
663 btrfs_get_extent,
664 fixup->mirror_num);
665 wait_on_page_locked(page);
666
667 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
668 end, EXTENT_DAMAGED, 0, NULL);
669 if (!corrected)
670 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
671 EXTENT_DAMAGED, GFP_NOFS);
672 }
673
674out:
675 if (page)
676 put_page(page);
677 if (inode)
678 iput(inode);
679
680 if (ret < 0)
681 return ret;
682
683 if (ret == 0 && corrected) {
684
685
686
687
688 return 1;
689 }
690
691 return -EIO;
692}
693
694static void scrub_fixup_nodatasum(struct btrfs_work *work)
695{
696 int ret;
697 struct scrub_fixup_nodatasum *fixup;
698 struct scrub_ctx *sctx;
699 struct btrfs_trans_handle *trans = NULL;
700 struct btrfs_fs_info *fs_info;
701 struct btrfs_path *path;
702 int uncorrectable = 0;
703
704 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
705 sctx = fixup->sctx;
706 fs_info = fixup->root->fs_info;
707
708 path = btrfs_alloc_path();
709 if (!path) {
710 spin_lock(&sctx->stat_lock);
711 ++sctx->stat.malloc_errors;
712 spin_unlock(&sctx->stat_lock);
713 uncorrectable = 1;
714 goto out;
715 }
716
717 trans = btrfs_join_transaction(fixup->root);
718 if (IS_ERR(trans)) {
719 uncorrectable = 1;
720 goto out;
721 }
722
723
724
725
726
727
728
729
730
731
732 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
733 path, scrub_fixup_readpage,
734 fixup);
735 if (ret < 0) {
736 uncorrectable = 1;
737 goto out;
738 }
739 WARN_ON(ret != 1);
740
741 spin_lock(&sctx->stat_lock);
742 ++sctx->stat.corrected_errors;
743 spin_unlock(&sctx->stat_lock);
744
745out:
746 if (trans && !IS_ERR(trans))
747 btrfs_end_transaction(trans, fixup->root);
748 if (uncorrectable) {
749 spin_lock(&sctx->stat_lock);
750 ++sctx->stat.uncorrectable_errors;
751 spin_unlock(&sctx->stat_lock);
752 btrfs_dev_replace_stats_inc(
753 &sctx->dev_root->fs_info->dev_replace.
754 num_uncorrectable_read_errors);
755 printk_ratelimited_in_rcu(KERN_ERR
756 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
757 (unsigned long long)fixup->logical,
758 rcu_str_deref(fixup->dev->name));
759 }
760
761 btrfs_free_path(path);
762 kfree(fixup);
763
764 scrub_pending_trans_workers_dec(sctx);
765}
766
767
768
769
770
771
772
773
774
775static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
776{
777 struct scrub_ctx *sctx = sblock_to_check->sctx;
778 struct btrfs_device *dev;
779 struct btrfs_fs_info *fs_info;
780 u64 length;
781 u64 logical;
782 u64 generation;
783 unsigned int failed_mirror_index;
784 unsigned int is_metadata;
785 unsigned int have_csum;
786 u8 *csum;
787 struct scrub_block *sblocks_for_recheck;
788 struct scrub_block *sblock_bad;
789 int ret;
790 int mirror_index;
791 int page_num;
792 int success;
793 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
794 DEFAULT_RATELIMIT_BURST);
795
796 BUG_ON(sblock_to_check->page_count < 1);
797 fs_info = sctx->dev_root->fs_info;
798 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
799
800
801
802
803
804 spin_lock(&sctx->stat_lock);
805 ++sctx->stat.super_errors;
806 spin_unlock(&sctx->stat_lock);
807 return 0;
808 }
809 length = sblock_to_check->page_count * PAGE_SIZE;
810 logical = sblock_to_check->pagev[0]->logical;
811 generation = sblock_to_check->pagev[0]->generation;
812 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
813 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
814 is_metadata = !(sblock_to_check->pagev[0]->flags &
815 BTRFS_EXTENT_FLAG_DATA);
816 have_csum = sblock_to_check->pagev[0]->have_csum;
817 csum = sblock_to_check->pagev[0]->csum;
818 dev = sblock_to_check->pagev[0]->dev;
819
820 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
821 sblocks_for_recheck = NULL;
822 goto nodatasum_case;
823 }
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
855 sizeof(*sblocks_for_recheck),
856 GFP_NOFS);
857 if (!sblocks_for_recheck) {
858 spin_lock(&sctx->stat_lock);
859 sctx->stat.malloc_errors++;
860 sctx->stat.read_errors++;
861 sctx->stat.uncorrectable_errors++;
862 spin_unlock(&sctx->stat_lock);
863 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
864 goto out;
865 }
866
867
868 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
869 logical, sblocks_for_recheck);
870 if (ret) {
871 spin_lock(&sctx->stat_lock);
872 sctx->stat.read_errors++;
873 sctx->stat.uncorrectable_errors++;
874 spin_unlock(&sctx->stat_lock);
875 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
876 goto out;
877 }
878 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
879 sblock_bad = sblocks_for_recheck + failed_mirror_index;
880
881
882 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
883 csum, generation, sctx->csum_size);
884
885 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
886 sblock_bad->no_io_error_seen) {
887
888
889
890
891
892
893
894
895 spin_lock(&sctx->stat_lock);
896 sctx->stat.unverified_errors++;
897 spin_unlock(&sctx->stat_lock);
898
899 if (sctx->is_dev_replace)
900 scrub_write_block_to_dev_replace(sblock_bad);
901 goto out;
902 }
903
904 if (!sblock_bad->no_io_error_seen) {
905 spin_lock(&sctx->stat_lock);
906 sctx->stat.read_errors++;
907 spin_unlock(&sctx->stat_lock);
908 if (__ratelimit(&_rs))
909 scrub_print_warning("i/o error", sblock_to_check);
910 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
911 } else if (sblock_bad->checksum_error) {
912 spin_lock(&sctx->stat_lock);
913 sctx->stat.csum_errors++;
914 spin_unlock(&sctx->stat_lock);
915 if (__ratelimit(&_rs))
916 scrub_print_warning("checksum error", sblock_to_check);
917 btrfs_dev_stat_inc_and_print(dev,
918 BTRFS_DEV_STAT_CORRUPTION_ERRS);
919 } else if (sblock_bad->header_error) {
920 spin_lock(&sctx->stat_lock);
921 sctx->stat.verify_errors++;
922 spin_unlock(&sctx->stat_lock);
923 if (__ratelimit(&_rs))
924 scrub_print_warning("checksum/header error",
925 sblock_to_check);
926 if (sblock_bad->generation_error)
927 btrfs_dev_stat_inc_and_print(dev,
928 BTRFS_DEV_STAT_GENERATION_ERRS);
929 else
930 btrfs_dev_stat_inc_and_print(dev,
931 BTRFS_DEV_STAT_CORRUPTION_ERRS);
932 }
933
934 if (sctx->readonly && !sctx->is_dev_replace)
935 goto did_not_correct_error;
936
937 if (!is_metadata && !have_csum) {
938 struct scrub_fixup_nodatasum *fixup_nodatasum;
939
940nodatasum_case:
941 WARN_ON(sctx->is_dev_replace);
942
943
944
945
946
947
948
949
950 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
951 if (!fixup_nodatasum)
952 goto did_not_correct_error;
953 fixup_nodatasum->sctx = sctx;
954 fixup_nodatasum->dev = dev;
955 fixup_nodatasum->logical = logical;
956 fixup_nodatasum->root = fs_info->extent_root;
957 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
958 scrub_pending_trans_workers_inc(sctx);
959 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
960 btrfs_queue_worker(&fs_info->scrub_workers,
961 &fixup_nodatasum->work);
962 goto out;
963 }
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980 for (mirror_index = 0;
981 mirror_index < BTRFS_MAX_MIRRORS &&
982 sblocks_for_recheck[mirror_index].page_count > 0;
983 mirror_index++) {
984 struct scrub_block *sblock_other;
985
986 if (mirror_index == failed_mirror_index)
987 continue;
988 sblock_other = sblocks_for_recheck + mirror_index;
989
990
991 scrub_recheck_block(fs_info, sblock_other, is_metadata,
992 have_csum, csum, generation,
993 sctx->csum_size);
994
995 if (!sblock_other->header_error &&
996 !sblock_other->checksum_error &&
997 sblock_other->no_io_error_seen) {
998 if (sctx->is_dev_replace) {
999 scrub_write_block_to_dev_replace(sblock_other);
1000 } else {
1001 int force_write = is_metadata || have_csum;
1002
1003 ret = scrub_repair_block_from_good_copy(
1004 sblock_bad, sblock_other,
1005 force_write);
1006 }
1007 if (0 == ret)
1008 goto corrected_error;
1009 }
1010 }
1011
1012
1013
1014
1015 if (sctx->is_dev_replace) {
1016 success = 1;
1017 for (page_num = 0; page_num < sblock_bad->page_count;
1018 page_num++) {
1019 int sub_success;
1020
1021 sub_success = 0;
1022 for (mirror_index = 0;
1023 mirror_index < BTRFS_MAX_MIRRORS &&
1024 sblocks_for_recheck[mirror_index].page_count > 0;
1025 mirror_index++) {
1026 struct scrub_block *sblock_other =
1027 sblocks_for_recheck + mirror_index;
1028 struct scrub_page *page_other =
1029 sblock_other->pagev[page_num];
1030
1031 if (!page_other->io_error) {
1032 ret = scrub_write_page_to_dev_replace(
1033 sblock_other, page_num);
1034 if (ret == 0) {
1035
1036 sub_success = 1;
1037 break;
1038 } else {
1039 btrfs_dev_replace_stats_inc(
1040 &sctx->dev_root->
1041 fs_info->dev_replace.
1042 num_write_errors);
1043 }
1044 }
1045 }
1046
1047 if (!sub_success) {
1048
1049
1050
1051
1052
1053
1054
1055 success = 0;
1056 ret = scrub_write_page_to_dev_replace(
1057 sblock_bad, page_num);
1058 if (ret)
1059 btrfs_dev_replace_stats_inc(
1060 &sctx->dev_root->fs_info->
1061 dev_replace.num_write_errors);
1062 }
1063 }
1064
1065 goto out;
1066 }
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095 if (sblock_bad->no_io_error_seen)
1096 goto did_not_correct_error;
1097
1098 success = 1;
1099 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1100 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1101
1102 if (!page_bad->io_error)
1103 continue;
1104
1105 for (mirror_index = 0;
1106 mirror_index < BTRFS_MAX_MIRRORS &&
1107 sblocks_for_recheck[mirror_index].page_count > 0;
1108 mirror_index++) {
1109 struct scrub_block *sblock_other = sblocks_for_recheck +
1110 mirror_index;
1111 struct scrub_page *page_other = sblock_other->pagev[
1112 page_num];
1113
1114 if (!page_other->io_error) {
1115 ret = scrub_repair_page_from_good_copy(
1116 sblock_bad, sblock_other, page_num, 0);
1117 if (0 == ret) {
1118 page_bad->io_error = 0;
1119 break;
1120 }
1121 }
1122 }
1123
1124 if (page_bad->io_error) {
1125
1126 success = 0;
1127 }
1128 }
1129
1130 if (success) {
1131 if (is_metadata || have_csum) {
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141 scrub_recheck_block(fs_info, sblock_bad,
1142 is_metadata, have_csum, csum,
1143 generation, sctx->csum_size);
1144 if (!sblock_bad->header_error &&
1145 !sblock_bad->checksum_error &&
1146 sblock_bad->no_io_error_seen)
1147 goto corrected_error;
1148 else
1149 goto did_not_correct_error;
1150 } else {
1151corrected_error:
1152 spin_lock(&sctx->stat_lock);
1153 sctx->stat.corrected_errors++;
1154 spin_unlock(&sctx->stat_lock);
1155 printk_ratelimited_in_rcu(KERN_ERR
1156 "btrfs: fixed up error at logical %llu on dev %s\n",
1157 (unsigned long long)logical,
1158 rcu_str_deref(dev->name));
1159 }
1160 } else {
1161did_not_correct_error:
1162 spin_lock(&sctx->stat_lock);
1163 sctx->stat.uncorrectable_errors++;
1164 spin_unlock(&sctx->stat_lock);
1165 printk_ratelimited_in_rcu(KERN_ERR
1166 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
1167 (unsigned long long)logical,
1168 rcu_str_deref(dev->name));
1169 }
1170
1171out:
1172 if (sblocks_for_recheck) {
1173 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1174 mirror_index++) {
1175 struct scrub_block *sblock = sblocks_for_recheck +
1176 mirror_index;
1177 int page_index;
1178
1179 for (page_index = 0; page_index < sblock->page_count;
1180 page_index++) {
1181 sblock->pagev[page_index]->sblock = NULL;
1182 scrub_page_put(sblock->pagev[page_index]);
1183 }
1184 }
1185 kfree(sblocks_for_recheck);
1186 }
1187
1188 return 0;
1189}
1190
1191static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1192 struct btrfs_fs_info *fs_info,
1193 struct scrub_block *original_sblock,
1194 u64 length, u64 logical,
1195 struct scrub_block *sblocks_for_recheck)
1196{
1197 int page_index;
1198 int mirror_index;
1199 int ret;
1200
1201
1202
1203
1204
1205
1206
1207 page_index = 0;
1208 while (length > 0) {
1209 u64 sublen = min_t(u64, length, PAGE_SIZE);
1210 u64 mapped_length = sublen;
1211 struct btrfs_bio *bbio = NULL;
1212
1213
1214
1215
1216
1217 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
1218 &mapped_length, &bbio, 0);
1219 if (ret || !bbio || mapped_length < sublen) {
1220 kfree(bbio);
1221 return -EIO;
1222 }
1223
1224 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1225 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
1226 mirror_index++) {
1227 struct scrub_block *sblock;
1228 struct scrub_page *page;
1229
1230 if (mirror_index >= BTRFS_MAX_MIRRORS)
1231 continue;
1232
1233 sblock = sblocks_for_recheck + mirror_index;
1234 sblock->sctx = sctx;
1235 page = kzalloc(sizeof(*page), GFP_NOFS);
1236 if (!page) {
1237leave_nomem:
1238 spin_lock(&sctx->stat_lock);
1239 sctx->stat.malloc_errors++;
1240 spin_unlock(&sctx->stat_lock);
1241 kfree(bbio);
1242 return -ENOMEM;
1243 }
1244 scrub_page_get(page);
1245 sblock->pagev[page_index] = page;
1246 page->logical = logical;
1247 page->physical = bbio->stripes[mirror_index].physical;
1248 BUG_ON(page_index >= original_sblock->page_count);
1249 page->physical_for_dev_replace =
1250 original_sblock->pagev[page_index]->
1251 physical_for_dev_replace;
1252
1253 page->dev = bbio->stripes[mirror_index].dev;
1254 page->mirror_num = mirror_index + 1;
1255 sblock->page_count++;
1256 page->page = alloc_page(GFP_NOFS);
1257 if (!page->page)
1258 goto leave_nomem;
1259 }
1260 kfree(bbio);
1261 length -= sublen;
1262 logical += sublen;
1263 page_index++;
1264 }
1265
1266 return 0;
1267}
1268
1269
1270
1271
1272
1273
1274
1275
1276static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1277 struct scrub_block *sblock, int is_metadata,
1278 int have_csum, u8 *csum, u64 generation,
1279 u16 csum_size)
1280{
1281 int page_num;
1282
1283 sblock->no_io_error_seen = 1;
1284 sblock->header_error = 0;
1285 sblock->checksum_error = 0;
1286
1287 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1288 struct bio *bio;
1289 struct scrub_page *page = sblock->pagev[page_num];
1290 DECLARE_COMPLETION_ONSTACK(complete);
1291
1292 if (page->dev->bdev == NULL) {
1293 page->io_error = 1;
1294 sblock->no_io_error_seen = 0;
1295 continue;
1296 }
1297
1298 WARN_ON(!page->page);
1299 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1300 if (!bio) {
1301 page->io_error = 1;
1302 sblock->no_io_error_seen = 0;
1303 continue;
1304 }
1305 bio->bi_bdev = page->dev->bdev;
1306 bio->bi_sector = page->physical >> 9;
1307 bio->bi_end_io = scrub_complete_bio_end_io;
1308 bio->bi_private = &complete;
1309
1310 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1311 btrfsic_submit_bio(READ, bio);
1312
1313
1314 wait_for_completion(&complete);
1315
1316 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
1317 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1318 sblock->no_io_error_seen = 0;
1319 bio_put(bio);
1320 }
1321
1322 if (sblock->no_io_error_seen)
1323 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1324 have_csum, csum, generation,
1325 csum_size);
1326
1327 return;
1328}
1329
1330static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1331 struct scrub_block *sblock,
1332 int is_metadata, int have_csum,
1333 const u8 *csum, u64 generation,
1334 u16 csum_size)
1335{
1336 int page_num;
1337 u8 calculated_csum[BTRFS_CSUM_SIZE];
1338 u32 crc = ~(u32)0;
1339 void *mapped_buffer;
1340
1341 WARN_ON(!sblock->pagev[0]->page);
1342 if (is_metadata) {
1343 struct btrfs_header *h;
1344
1345 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1346 h = (struct btrfs_header *)mapped_buffer;
1347
1348 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
1349 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1350 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1351 BTRFS_UUID_SIZE)) {
1352 sblock->header_error = 1;
1353 } else if (generation != le64_to_cpu(h->generation)) {
1354 sblock->header_error = 1;
1355 sblock->generation_error = 1;
1356 }
1357 csum = h->csum;
1358 } else {
1359 if (!have_csum)
1360 return;
1361
1362 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1363 }
1364
1365 for (page_num = 0;;) {
1366 if (page_num == 0 && is_metadata)
1367 crc = btrfs_csum_data(
1368 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1369 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1370 else
1371 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1372
1373 kunmap_atomic(mapped_buffer);
1374 page_num++;
1375 if (page_num >= sblock->page_count)
1376 break;
1377 WARN_ON(!sblock->pagev[page_num]->page);
1378
1379 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1380 }
1381
1382 btrfs_csum_final(crc, calculated_csum);
1383 if (memcmp(calculated_csum, csum, csum_size))
1384 sblock->checksum_error = 1;
1385}
1386
1387static void scrub_complete_bio_end_io(struct bio *bio, int err)
1388{
1389 complete((struct completion *)bio->bi_private);
1390}
1391
1392static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1393 struct scrub_block *sblock_good,
1394 int force_write)
1395{
1396 int page_num;
1397 int ret = 0;
1398
1399 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1400 int ret_sub;
1401
1402 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1403 sblock_good,
1404 page_num,
1405 force_write);
1406 if (ret_sub)
1407 ret = ret_sub;
1408 }
1409
1410 return ret;
1411}
1412
1413static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1414 struct scrub_block *sblock_good,
1415 int page_num, int force_write)
1416{
1417 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1418 struct scrub_page *page_good = sblock_good->pagev[page_num];
1419
1420 BUG_ON(page_bad->page == NULL);
1421 BUG_ON(page_good->page == NULL);
1422 if (force_write || sblock_bad->header_error ||
1423 sblock_bad->checksum_error || page_bad->io_error) {
1424 struct bio *bio;
1425 int ret;
1426 DECLARE_COMPLETION_ONSTACK(complete);
1427
1428 if (!page_bad->dev->bdev) {
1429 printk_ratelimited(KERN_WARNING
1430 "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n");
1431 return -EIO;
1432 }
1433
1434 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1435 if (!bio)
1436 return -EIO;
1437 bio->bi_bdev = page_bad->dev->bdev;
1438 bio->bi_sector = page_bad->physical >> 9;
1439 bio->bi_end_io = scrub_complete_bio_end_io;
1440 bio->bi_private = &complete;
1441
1442 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1443 if (PAGE_SIZE != ret) {
1444 bio_put(bio);
1445 return -EIO;
1446 }
1447 btrfsic_submit_bio(WRITE, bio);
1448
1449
1450 wait_for_completion(&complete);
1451 if (!bio_flagged(bio, BIO_UPTODATE)) {
1452 btrfs_dev_stat_inc_and_print(page_bad->dev,
1453 BTRFS_DEV_STAT_WRITE_ERRS);
1454 btrfs_dev_replace_stats_inc(
1455 &sblock_bad->sctx->dev_root->fs_info->
1456 dev_replace.num_write_errors);
1457 bio_put(bio);
1458 return -EIO;
1459 }
1460 bio_put(bio);
1461 }
1462
1463 return 0;
1464}
1465
1466static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1467{
1468 int page_num;
1469
1470 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1471 int ret;
1472
1473 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1474 if (ret)
1475 btrfs_dev_replace_stats_inc(
1476 &sblock->sctx->dev_root->fs_info->dev_replace.
1477 num_write_errors);
1478 }
1479}
1480
1481static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1482 int page_num)
1483{
1484 struct scrub_page *spage = sblock->pagev[page_num];
1485
1486 BUG_ON(spage->page == NULL);
1487 if (spage->io_error) {
1488 void *mapped_buffer = kmap_atomic(spage->page);
1489
1490 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
1491 flush_dcache_page(spage->page);
1492 kunmap_atomic(mapped_buffer);
1493 }
1494 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1495}
1496
1497static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1498 struct scrub_page *spage)
1499{
1500 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1501 struct scrub_bio *sbio;
1502 int ret;
1503
1504 mutex_lock(&wr_ctx->wr_lock);
1505again:
1506 if (!wr_ctx->wr_curr_bio) {
1507 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1508 GFP_NOFS);
1509 if (!wr_ctx->wr_curr_bio) {
1510 mutex_unlock(&wr_ctx->wr_lock);
1511 return -ENOMEM;
1512 }
1513 wr_ctx->wr_curr_bio->sctx = sctx;
1514 wr_ctx->wr_curr_bio->page_count = 0;
1515 }
1516 sbio = wr_ctx->wr_curr_bio;
1517 if (sbio->page_count == 0) {
1518 struct bio *bio;
1519
1520 sbio->physical = spage->physical_for_dev_replace;
1521 sbio->logical = spage->logical;
1522 sbio->dev = wr_ctx->tgtdev;
1523 bio = sbio->bio;
1524 if (!bio) {
1525 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1526 if (!bio) {
1527 mutex_unlock(&wr_ctx->wr_lock);
1528 return -ENOMEM;
1529 }
1530 sbio->bio = bio;
1531 }
1532
1533 bio->bi_private = sbio;
1534 bio->bi_end_io = scrub_wr_bio_end_io;
1535 bio->bi_bdev = sbio->dev->bdev;
1536 bio->bi_sector = sbio->physical >> 9;
1537 sbio->err = 0;
1538 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1539 spage->physical_for_dev_replace ||
1540 sbio->logical + sbio->page_count * PAGE_SIZE !=
1541 spage->logical) {
1542 scrub_wr_submit(sctx);
1543 goto again;
1544 }
1545
1546 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1547 if (ret != PAGE_SIZE) {
1548 if (sbio->page_count < 1) {
1549 bio_put(sbio->bio);
1550 sbio->bio = NULL;
1551 mutex_unlock(&wr_ctx->wr_lock);
1552 return -EIO;
1553 }
1554 scrub_wr_submit(sctx);
1555 goto again;
1556 }
1557
1558 sbio->pagev[sbio->page_count] = spage;
1559 scrub_page_get(spage);
1560 sbio->page_count++;
1561 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1562 scrub_wr_submit(sctx);
1563 mutex_unlock(&wr_ctx->wr_lock);
1564
1565 return 0;
1566}
1567
1568static void scrub_wr_submit(struct scrub_ctx *sctx)
1569{
1570 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1571 struct scrub_bio *sbio;
1572
1573 if (!wr_ctx->wr_curr_bio)
1574 return;
1575
1576 sbio = wr_ctx->wr_curr_bio;
1577 wr_ctx->wr_curr_bio = NULL;
1578 WARN_ON(!sbio->bio->bi_bdev);
1579 scrub_pending_bio_inc(sctx);
1580
1581
1582
1583
1584 btrfsic_submit_bio(WRITE, sbio->bio);
1585}
1586
1587static void scrub_wr_bio_end_io(struct bio *bio, int err)
1588{
1589 struct scrub_bio *sbio = bio->bi_private;
1590 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1591
1592 sbio->err = err;
1593 sbio->bio = bio;
1594
1595 sbio->work.func = scrub_wr_bio_end_io_worker;
1596 btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
1597}
1598
1599static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1600{
1601 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1602 struct scrub_ctx *sctx = sbio->sctx;
1603 int i;
1604
1605 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1606 if (sbio->err) {
1607 struct btrfs_dev_replace *dev_replace =
1608 &sbio->sctx->dev_root->fs_info->dev_replace;
1609
1610 for (i = 0; i < sbio->page_count; i++) {
1611 struct scrub_page *spage = sbio->pagev[i];
1612
1613 spage->io_error = 1;
1614 btrfs_dev_replace_stats_inc(&dev_replace->
1615 num_write_errors);
1616 }
1617 }
1618
1619 for (i = 0; i < sbio->page_count; i++)
1620 scrub_page_put(sbio->pagev[i]);
1621
1622 bio_put(sbio->bio);
1623 kfree(sbio);
1624 scrub_pending_bio_dec(sctx);
1625}
1626
1627static int scrub_checksum(struct scrub_block *sblock)
1628{
1629 u64 flags;
1630 int ret;
1631
1632 WARN_ON(sblock->page_count < 1);
1633 flags = sblock->pagev[0]->flags;
1634 ret = 0;
1635 if (flags & BTRFS_EXTENT_FLAG_DATA)
1636 ret = scrub_checksum_data(sblock);
1637 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1638 ret = scrub_checksum_tree_block(sblock);
1639 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1640 (void)scrub_checksum_super(sblock);
1641 else
1642 WARN_ON(1);
1643 if (ret)
1644 scrub_handle_errored_block(sblock);
1645
1646 return ret;
1647}
1648
1649static int scrub_checksum_data(struct scrub_block *sblock)
1650{
1651 struct scrub_ctx *sctx = sblock->sctx;
1652 u8 csum[BTRFS_CSUM_SIZE];
1653 u8 *on_disk_csum;
1654 struct page *page;
1655 void *buffer;
1656 u32 crc = ~(u32)0;
1657 int fail = 0;
1658 u64 len;
1659 int index;
1660
1661 BUG_ON(sblock->page_count < 1);
1662 if (!sblock->pagev[0]->have_csum)
1663 return 0;
1664
1665 on_disk_csum = sblock->pagev[0]->csum;
1666 page = sblock->pagev[0]->page;
1667 buffer = kmap_atomic(page);
1668
1669 len = sctx->sectorsize;
1670 index = 0;
1671 for (;;) {
1672 u64 l = min_t(u64, len, PAGE_SIZE);
1673
1674 crc = btrfs_csum_data(buffer, crc, l);
1675 kunmap_atomic(buffer);
1676 len -= l;
1677 if (len == 0)
1678 break;
1679 index++;
1680 BUG_ON(index >= sblock->page_count);
1681 BUG_ON(!sblock->pagev[index]->page);
1682 page = sblock->pagev[index]->page;
1683 buffer = kmap_atomic(page);
1684 }
1685
1686 btrfs_csum_final(crc, csum);
1687 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1688 fail = 1;
1689
1690 return fail;
1691}
1692
1693static int scrub_checksum_tree_block(struct scrub_block *sblock)
1694{
1695 struct scrub_ctx *sctx = sblock->sctx;
1696 struct btrfs_header *h;
1697 struct btrfs_root *root = sctx->dev_root;
1698 struct btrfs_fs_info *fs_info = root->fs_info;
1699 u8 calculated_csum[BTRFS_CSUM_SIZE];
1700 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1701 struct page *page;
1702 void *mapped_buffer;
1703 u64 mapped_size;
1704 void *p;
1705 u32 crc = ~(u32)0;
1706 int fail = 0;
1707 int crc_fail = 0;
1708 u64 len;
1709 int index;
1710
1711 BUG_ON(sblock->page_count < 1);
1712 page = sblock->pagev[0]->page;
1713 mapped_buffer = kmap_atomic(page);
1714 h = (struct btrfs_header *)mapped_buffer;
1715 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1716
1717
1718
1719
1720
1721
1722
1723 if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
1724 ++fail;
1725
1726 if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
1727 ++fail;
1728
1729 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1730 ++fail;
1731
1732 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1733 BTRFS_UUID_SIZE))
1734 ++fail;
1735
1736 WARN_ON(sctx->nodesize != sctx->leafsize);
1737 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1738 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1739 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1740 index = 0;
1741 for (;;) {
1742 u64 l = min_t(u64, len, mapped_size);
1743
1744 crc = btrfs_csum_data(p, crc, l);
1745 kunmap_atomic(mapped_buffer);
1746 len -= l;
1747 if (len == 0)
1748 break;
1749 index++;
1750 BUG_ON(index >= sblock->page_count);
1751 BUG_ON(!sblock->pagev[index]->page);
1752 page = sblock->pagev[index]->page;
1753 mapped_buffer = kmap_atomic(page);
1754 mapped_size = PAGE_SIZE;
1755 p = mapped_buffer;
1756 }
1757
1758 btrfs_csum_final(crc, calculated_csum);
1759 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1760 ++crc_fail;
1761
1762 return fail || crc_fail;
1763}
1764
1765static int scrub_checksum_super(struct scrub_block *sblock)
1766{
1767 struct btrfs_super_block *s;
1768 struct scrub_ctx *sctx = sblock->sctx;
1769 struct btrfs_root *root = sctx->dev_root;
1770 struct btrfs_fs_info *fs_info = root->fs_info;
1771 u8 calculated_csum[BTRFS_CSUM_SIZE];
1772 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1773 struct page *page;
1774 void *mapped_buffer;
1775 u64 mapped_size;
1776 void *p;
1777 u32 crc = ~(u32)0;
1778 int fail_gen = 0;
1779 int fail_cor = 0;
1780 u64 len;
1781 int index;
1782
1783 BUG_ON(sblock->page_count < 1);
1784 page = sblock->pagev[0]->page;
1785 mapped_buffer = kmap_atomic(page);
1786 s = (struct btrfs_super_block *)mapped_buffer;
1787 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1788
1789 if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
1790 ++fail_cor;
1791
1792 if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
1793 ++fail_gen;
1794
1795 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1796 ++fail_cor;
1797
1798 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1799 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1800 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1801 index = 0;
1802 for (;;) {
1803 u64 l = min_t(u64, len, mapped_size);
1804
1805 crc = btrfs_csum_data(p, crc, l);
1806 kunmap_atomic(mapped_buffer);
1807 len -= l;
1808 if (len == 0)
1809 break;
1810 index++;
1811 BUG_ON(index >= sblock->page_count);
1812 BUG_ON(!sblock->pagev[index]->page);
1813 page = sblock->pagev[index]->page;
1814 mapped_buffer = kmap_atomic(page);
1815 mapped_size = PAGE_SIZE;
1816 p = mapped_buffer;
1817 }
1818
1819 btrfs_csum_final(crc, calculated_csum);
1820 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1821 ++fail_cor;
1822
1823 if (fail_cor + fail_gen) {
1824
1825
1826
1827
1828
1829 spin_lock(&sctx->stat_lock);
1830 ++sctx->stat.super_errors;
1831 spin_unlock(&sctx->stat_lock);
1832 if (fail_cor)
1833 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1834 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1835 else
1836 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1837 BTRFS_DEV_STAT_GENERATION_ERRS);
1838 }
1839
1840 return fail_cor + fail_gen;
1841}
1842
1843static void scrub_block_get(struct scrub_block *sblock)
1844{
1845 atomic_inc(&sblock->ref_count);
1846}
1847
1848static void scrub_block_put(struct scrub_block *sblock)
1849{
1850 if (atomic_dec_and_test(&sblock->ref_count)) {
1851 int i;
1852
1853 for (i = 0; i < sblock->page_count; i++)
1854 scrub_page_put(sblock->pagev[i]);
1855 kfree(sblock);
1856 }
1857}
1858
1859static void scrub_page_get(struct scrub_page *spage)
1860{
1861 atomic_inc(&spage->ref_count);
1862}
1863
1864static void scrub_page_put(struct scrub_page *spage)
1865{
1866 if (atomic_dec_and_test(&spage->ref_count)) {
1867 if (spage->page)
1868 __free_page(spage->page);
1869 kfree(spage);
1870 }
1871}
1872
1873static void scrub_submit(struct scrub_ctx *sctx)
1874{
1875 struct scrub_bio *sbio;
1876
1877 if (sctx->curr == -1)
1878 return;
1879
1880 sbio = sctx->bios[sctx->curr];
1881 sctx->curr = -1;
1882 scrub_pending_bio_inc(sctx);
1883
1884 if (!sbio->bio->bi_bdev) {
1885
1886
1887
1888
1889
1890
1891
1892 printk_ratelimited(KERN_WARNING
1893 "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n");
1894 bio_endio(sbio->bio, -EIO);
1895 } else {
1896 btrfsic_submit_bio(READ, sbio->bio);
1897 }
1898}
1899
1900static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
1901 struct scrub_page *spage)
1902{
1903 struct scrub_block *sblock = spage->sblock;
1904 struct scrub_bio *sbio;
1905 int ret;
1906
1907again:
1908
1909
1910
1911 while (sctx->curr == -1) {
1912 spin_lock(&sctx->list_lock);
1913 sctx->curr = sctx->first_free;
1914 if (sctx->curr != -1) {
1915 sctx->first_free = sctx->bios[sctx->curr]->next_free;
1916 sctx->bios[sctx->curr]->next_free = -1;
1917 sctx->bios[sctx->curr]->page_count = 0;
1918 spin_unlock(&sctx->list_lock);
1919 } else {
1920 spin_unlock(&sctx->list_lock);
1921 wait_event(sctx->list_wait, sctx->first_free != -1);
1922 }
1923 }
1924 sbio = sctx->bios[sctx->curr];
1925 if (sbio->page_count == 0) {
1926 struct bio *bio;
1927
1928 sbio->physical = spage->physical;
1929 sbio->logical = spage->logical;
1930 sbio->dev = spage->dev;
1931 bio = sbio->bio;
1932 if (!bio) {
1933 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1934 if (!bio)
1935 return -ENOMEM;
1936 sbio->bio = bio;
1937 }
1938
1939 bio->bi_private = sbio;
1940 bio->bi_end_io = scrub_bio_end_io;
1941 bio->bi_bdev = sbio->dev->bdev;
1942 bio->bi_sector = sbio->physical >> 9;
1943 sbio->err = 0;
1944 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1945 spage->physical ||
1946 sbio->logical + sbio->page_count * PAGE_SIZE !=
1947 spage->logical ||
1948 sbio->dev != spage->dev) {
1949 scrub_submit(sctx);
1950 goto again;
1951 }
1952
1953 sbio->pagev[sbio->page_count] = spage;
1954 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1955 if (ret != PAGE_SIZE) {
1956 if (sbio->page_count < 1) {
1957 bio_put(sbio->bio);
1958 sbio->bio = NULL;
1959 return -EIO;
1960 }
1961 scrub_submit(sctx);
1962 goto again;
1963 }
1964
1965 scrub_block_get(sblock);
1966 atomic_inc(&sblock->outstanding_pages);
1967 sbio->page_count++;
1968 if (sbio->page_count == sctx->pages_per_rd_bio)
1969 scrub_submit(sctx);
1970
1971 return 0;
1972}
1973
1974static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1975 u64 physical, struct btrfs_device *dev, u64 flags,
1976 u64 gen, int mirror_num, u8 *csum, int force,
1977 u64 physical_for_dev_replace)
1978{
1979 struct scrub_block *sblock;
1980 int index;
1981
1982 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1983 if (!sblock) {
1984 spin_lock(&sctx->stat_lock);
1985 sctx->stat.malloc_errors++;
1986 spin_unlock(&sctx->stat_lock);
1987 return -ENOMEM;
1988 }
1989
1990
1991
1992 atomic_set(&sblock->ref_count, 1);
1993 sblock->sctx = sctx;
1994 sblock->no_io_error_seen = 1;
1995
1996 for (index = 0; len > 0; index++) {
1997 struct scrub_page *spage;
1998 u64 l = min_t(u64, len, PAGE_SIZE);
1999
2000 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2001 if (!spage) {
2002leave_nomem:
2003 spin_lock(&sctx->stat_lock);
2004 sctx->stat.malloc_errors++;
2005 spin_unlock(&sctx->stat_lock);
2006 scrub_block_put(sblock);
2007 return -ENOMEM;
2008 }
2009 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2010 scrub_page_get(spage);
2011 sblock->pagev[index] = spage;
2012 spage->sblock = sblock;
2013 spage->dev = dev;
2014 spage->flags = flags;
2015 spage->generation = gen;
2016 spage->logical = logical;
2017 spage->physical = physical;
2018 spage->physical_for_dev_replace = physical_for_dev_replace;
2019 spage->mirror_num = mirror_num;
2020 if (csum) {
2021 spage->have_csum = 1;
2022 memcpy(spage->csum, csum, sctx->csum_size);
2023 } else {
2024 spage->have_csum = 0;
2025 }
2026 sblock->page_count++;
2027 spage->page = alloc_page(GFP_NOFS);
2028 if (!spage->page)
2029 goto leave_nomem;
2030 len -= l;
2031 logical += l;
2032 physical += l;
2033 physical_for_dev_replace += l;
2034 }
2035
2036 WARN_ON(sblock->page_count == 0);
2037 for (index = 0; index < sblock->page_count; index++) {
2038 struct scrub_page *spage = sblock->pagev[index];
2039 int ret;
2040
2041 ret = scrub_add_page_to_rd_bio(sctx, spage);
2042 if (ret) {
2043 scrub_block_put(sblock);
2044 return ret;
2045 }
2046 }
2047
2048 if (force)
2049 scrub_submit(sctx);
2050
2051
2052 scrub_block_put(sblock);
2053 return 0;
2054}
2055
2056static void scrub_bio_end_io(struct bio *bio, int err)
2057{
2058 struct scrub_bio *sbio = bio->bi_private;
2059 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2060
2061 sbio->err = err;
2062 sbio->bio = bio;
2063
2064 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
2065}
2066
2067static void scrub_bio_end_io_worker(struct btrfs_work *work)
2068{
2069 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2070 struct scrub_ctx *sctx = sbio->sctx;
2071 int i;
2072
2073 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2074 if (sbio->err) {
2075 for (i = 0; i < sbio->page_count; i++) {
2076 struct scrub_page *spage = sbio->pagev[i];
2077
2078 spage->io_error = 1;
2079 spage->sblock->no_io_error_seen = 0;
2080 }
2081 }
2082
2083
2084 for (i = 0; i < sbio->page_count; i++) {
2085 struct scrub_page *spage = sbio->pagev[i];
2086 struct scrub_block *sblock = spage->sblock;
2087
2088 if (atomic_dec_and_test(&sblock->outstanding_pages))
2089 scrub_block_complete(sblock);
2090 scrub_block_put(sblock);
2091 }
2092
2093 bio_put(sbio->bio);
2094 sbio->bio = NULL;
2095 spin_lock(&sctx->list_lock);
2096 sbio->next_free = sctx->first_free;
2097 sctx->first_free = sbio->index;
2098 spin_unlock(&sctx->list_lock);
2099
2100 if (sctx->is_dev_replace &&
2101 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2102 mutex_lock(&sctx->wr_ctx.wr_lock);
2103 scrub_wr_submit(sctx);
2104 mutex_unlock(&sctx->wr_ctx.wr_lock);
2105 }
2106
2107 scrub_pending_bio_dec(sctx);
2108}
2109
2110static void scrub_block_complete(struct scrub_block *sblock)
2111{
2112 if (!sblock->no_io_error_seen) {
2113 scrub_handle_errored_block(sblock);
2114 } else {
2115
2116
2117
2118
2119
2120 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
2121 scrub_write_block_to_dev_replace(sblock);
2122 }
2123}
2124
2125static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
2126 u8 *csum)
2127{
2128 struct btrfs_ordered_sum *sum = NULL;
2129 unsigned long index;
2130 unsigned long num_sectors;
2131
2132 while (!list_empty(&sctx->csum_list)) {
2133 sum = list_first_entry(&sctx->csum_list,
2134 struct btrfs_ordered_sum, list);
2135 if (sum->bytenr > logical)
2136 return 0;
2137 if (sum->bytenr + sum->len > logical)
2138 break;
2139
2140 ++sctx->stat.csum_discards;
2141 list_del(&sum->list);
2142 kfree(sum);
2143 sum = NULL;
2144 }
2145 if (!sum)
2146 return 0;
2147
2148 index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
2149 num_sectors = sum->len / sctx->sectorsize;
2150 memcpy(csum, sum->sums + index, sctx->csum_size);
2151 if (index == num_sectors - 1) {
2152 list_del(&sum->list);
2153 kfree(sum);
2154 }
2155 return 1;
2156}
2157
2158
2159static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2160 u64 physical, struct btrfs_device *dev, u64 flags,
2161 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2162{
2163 int ret;
2164 u8 csum[BTRFS_CSUM_SIZE];
2165 u32 blocksize;
2166
2167 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2168 blocksize = sctx->sectorsize;
2169 spin_lock(&sctx->stat_lock);
2170 sctx->stat.data_extents_scrubbed++;
2171 sctx->stat.data_bytes_scrubbed += len;
2172 spin_unlock(&sctx->stat_lock);
2173 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2174 WARN_ON(sctx->nodesize != sctx->leafsize);
2175 blocksize = sctx->nodesize;
2176 spin_lock(&sctx->stat_lock);
2177 sctx->stat.tree_extents_scrubbed++;
2178 sctx->stat.tree_bytes_scrubbed += len;
2179 spin_unlock(&sctx->stat_lock);
2180 } else {
2181 blocksize = sctx->sectorsize;
2182 WARN_ON(1);
2183 }
2184
2185 while (len) {
2186 u64 l = min_t(u64, len, blocksize);
2187 int have_csum = 0;
2188
2189 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2190
2191 have_csum = scrub_find_csum(sctx, logical, l, csum);
2192 if (have_csum == 0)
2193 ++sctx->stat.no_csum;
2194 if (sctx->is_dev_replace && !have_csum) {
2195 ret = copy_nocow_pages(sctx, logical, l,
2196 mirror_num,
2197 physical_for_dev_replace);
2198 goto behind_scrub_pages;
2199 }
2200 }
2201 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2202 mirror_num, have_csum ? csum : NULL, 0,
2203 physical_for_dev_replace);
2204behind_scrub_pages:
2205 if (ret)
2206 return ret;
2207 len -= l;
2208 logical += l;
2209 physical += l;
2210 physical_for_dev_replace += l;
2211 }
2212 return 0;
2213}
2214
2215static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2216 struct map_lookup *map,
2217 struct btrfs_device *scrub_dev,
2218 int num, u64 base, u64 length,
2219 int is_dev_replace)
2220{
2221 struct btrfs_path *path;
2222 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2223 struct btrfs_root *root = fs_info->extent_root;
2224 struct btrfs_root *csum_root = fs_info->csum_root;
2225 struct btrfs_extent_item *extent;
2226 struct blk_plug plug;
2227 u64 flags;
2228 int ret;
2229 int slot;
2230 u64 nstripes;
2231 struct extent_buffer *l;
2232 struct btrfs_key key;
2233 u64 physical;
2234 u64 logical;
2235 u64 logic_end;
2236 u64 generation;
2237 int mirror_num;
2238 struct reada_control *reada1;
2239 struct reada_control *reada2;
2240 struct btrfs_key key_start;
2241 struct btrfs_key key_end;
2242 u64 increment = map->stripe_len;
2243 u64 offset;
2244 u64 extent_logical;
2245 u64 extent_physical;
2246 u64 extent_len;
2247 struct btrfs_device *extent_dev;
2248 int extent_mirror_num;
2249 int stop_loop;
2250
2251 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2252 BTRFS_BLOCK_GROUP_RAID6)) {
2253 if (num >= nr_data_stripes(map)) {
2254 return 0;
2255 }
2256 }
2257
2258 nstripes = length;
2259 offset = 0;
2260 do_div(nstripes, map->stripe_len);
2261 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
2262 offset = map->stripe_len * num;
2263 increment = map->stripe_len * map->num_stripes;
2264 mirror_num = 1;
2265 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2266 int factor = map->num_stripes / map->sub_stripes;
2267 offset = map->stripe_len * (num / map->sub_stripes);
2268 increment = map->stripe_len * factor;
2269 mirror_num = num % map->sub_stripes + 1;
2270 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2271 increment = map->stripe_len;
2272 mirror_num = num % map->num_stripes + 1;
2273 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2274 increment = map->stripe_len;
2275 mirror_num = num % map->num_stripes + 1;
2276 } else {
2277 increment = map->stripe_len;
2278 mirror_num = 1;
2279 }
2280
2281 path = btrfs_alloc_path();
2282 if (!path)
2283 return -ENOMEM;
2284
2285
2286
2287
2288
2289
2290 path->search_commit_root = 1;
2291 path->skip_locking = 1;
2292
2293
2294
2295
2296
2297
2298 logical = base + offset;
2299
2300 wait_event(sctx->list_wait,
2301 atomic_read(&sctx->bios_in_flight) == 0);
2302 atomic_inc(&fs_info->scrubs_paused);
2303 wake_up(&fs_info->scrub_pause_wait);
2304
2305
2306 key_start.objectid = logical;
2307 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2308 key_start.offset = (u64)0;
2309 key_end.objectid = base + offset + nstripes * increment;
2310 key_end.type = BTRFS_METADATA_ITEM_KEY;
2311 key_end.offset = (u64)-1;
2312 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2313
2314 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2315 key_start.type = BTRFS_EXTENT_CSUM_KEY;
2316 key_start.offset = logical;
2317 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2318 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2319 key_end.offset = base + offset + nstripes * increment;
2320 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2321
2322 if (!IS_ERR(reada1))
2323 btrfs_reada_wait(reada1);
2324 if (!IS_ERR(reada2))
2325 btrfs_reada_wait(reada2);
2326
2327 mutex_lock(&fs_info->scrub_lock);
2328 while (atomic_read(&fs_info->scrub_pause_req)) {
2329 mutex_unlock(&fs_info->scrub_lock);
2330 wait_event(fs_info->scrub_pause_wait,
2331 atomic_read(&fs_info->scrub_pause_req) == 0);
2332 mutex_lock(&fs_info->scrub_lock);
2333 }
2334 atomic_dec(&fs_info->scrubs_paused);
2335 mutex_unlock(&fs_info->scrub_lock);
2336 wake_up(&fs_info->scrub_pause_wait);
2337
2338
2339
2340
2341
2342 blk_start_plug(&plug);
2343
2344
2345
2346
2347 logical = base + offset;
2348 physical = map->stripes[num].physical;
2349 logic_end = logical + increment * nstripes;
2350 ret = 0;
2351 while (logical < logic_end) {
2352
2353
2354
2355 if (atomic_read(&fs_info->scrub_cancel_req) ||
2356 atomic_read(&sctx->cancel_req)) {
2357 ret = -ECANCELED;
2358 goto out;
2359 }
2360
2361
2362
2363 if (atomic_read(&fs_info->scrub_pause_req)) {
2364
2365 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2366 scrub_submit(sctx);
2367 mutex_lock(&sctx->wr_ctx.wr_lock);
2368 scrub_wr_submit(sctx);
2369 mutex_unlock(&sctx->wr_ctx.wr_lock);
2370 wait_event(sctx->list_wait,
2371 atomic_read(&sctx->bios_in_flight) == 0);
2372 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2373 atomic_inc(&fs_info->scrubs_paused);
2374 wake_up(&fs_info->scrub_pause_wait);
2375 mutex_lock(&fs_info->scrub_lock);
2376 while (atomic_read(&fs_info->scrub_pause_req)) {
2377 mutex_unlock(&fs_info->scrub_lock);
2378 wait_event(fs_info->scrub_pause_wait,
2379 atomic_read(&fs_info->scrub_pause_req) == 0);
2380 mutex_lock(&fs_info->scrub_lock);
2381 }
2382 atomic_dec(&fs_info->scrubs_paused);
2383 mutex_unlock(&fs_info->scrub_lock);
2384 wake_up(&fs_info->scrub_pause_wait);
2385 }
2386
2387 key.objectid = logical;
2388 key.type = BTRFS_EXTENT_ITEM_KEY;
2389 key.offset = (u64)-1;
2390
2391 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2392 if (ret < 0)
2393 goto out;
2394
2395 if (ret > 0) {
2396 ret = btrfs_previous_item(root, path, 0,
2397 BTRFS_EXTENT_ITEM_KEY);
2398 if (ret < 0)
2399 goto out;
2400 if (ret > 0) {
2401
2402
2403 btrfs_release_path(path);
2404 ret = btrfs_search_slot(NULL, root, &key,
2405 path, 0, 0);
2406 if (ret < 0)
2407 goto out;
2408 }
2409 }
2410
2411 stop_loop = 0;
2412 while (1) {
2413 u64 bytes;
2414
2415 l = path->nodes[0];
2416 slot = path->slots[0];
2417 if (slot >= btrfs_header_nritems(l)) {
2418 ret = btrfs_next_leaf(root, path);
2419 if (ret == 0)
2420 continue;
2421 if (ret < 0)
2422 goto out;
2423
2424 stop_loop = 1;
2425 break;
2426 }
2427 btrfs_item_key_to_cpu(l, &key, slot);
2428
2429 if (key.type == BTRFS_METADATA_ITEM_KEY)
2430 bytes = root->leafsize;
2431 else
2432 bytes = key.offset;
2433
2434 if (key.objectid + bytes <= logical)
2435 goto next;
2436
2437 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2438 key.type != BTRFS_METADATA_ITEM_KEY)
2439 goto next;
2440
2441 if (key.objectid >= logical + map->stripe_len) {
2442
2443 if (key.objectid >= logic_end)
2444 stop_loop = 1;
2445 break;
2446 }
2447
2448 extent = btrfs_item_ptr(l, slot,
2449 struct btrfs_extent_item);
2450 flags = btrfs_extent_flags(l, extent);
2451 generation = btrfs_extent_generation(l, extent);
2452
2453 if (key.objectid < logical &&
2454 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2455 printk(KERN_ERR
2456 "btrfs scrub: tree block %llu spanning "
2457 "stripes, ignored. logical=%llu\n",
2458 (unsigned long long)key.objectid,
2459 (unsigned long long)logical);
2460 goto next;
2461 }
2462
2463again:
2464 extent_logical = key.objectid;
2465 extent_len = bytes;
2466
2467
2468
2469
2470 if (extent_logical < logical) {
2471 extent_len -= logical - extent_logical;
2472 extent_logical = logical;
2473 }
2474 if (extent_logical + extent_len >
2475 logical + map->stripe_len) {
2476 extent_len = logical + map->stripe_len -
2477 extent_logical;
2478 }
2479
2480 extent_physical = extent_logical - logical + physical;
2481 extent_dev = scrub_dev;
2482 extent_mirror_num = mirror_num;
2483 if (is_dev_replace)
2484 scrub_remap_extent(fs_info, extent_logical,
2485 extent_len, &extent_physical,
2486 &extent_dev,
2487 &extent_mirror_num);
2488
2489 ret = btrfs_lookup_csums_range(csum_root, logical,
2490 logical + map->stripe_len - 1,
2491 &sctx->csum_list, 1);
2492 if (ret)
2493 goto out;
2494
2495 ret = scrub_extent(sctx, extent_logical, extent_len,
2496 extent_physical, extent_dev, flags,
2497 generation, extent_mirror_num,
2498 extent_logical - logical + physical);
2499 if (ret)
2500 goto out;
2501
2502 scrub_free_csums(sctx);
2503 if (extent_logical + extent_len <
2504 key.objectid + bytes) {
2505 logical += increment;
2506 physical += map->stripe_len;
2507
2508 if (logical < key.objectid + bytes) {
2509 cond_resched();
2510 goto again;
2511 }
2512
2513 if (logical >= logic_end) {
2514 stop_loop = 1;
2515 break;
2516 }
2517 }
2518next:
2519 path->slots[0]++;
2520 }
2521 btrfs_release_path(path);
2522 logical += increment;
2523 physical += map->stripe_len;
2524 spin_lock(&sctx->stat_lock);
2525 if (stop_loop)
2526 sctx->stat.last_physical = map->stripes[num].physical +
2527 length;
2528 else
2529 sctx->stat.last_physical = physical;
2530 spin_unlock(&sctx->stat_lock);
2531 if (stop_loop)
2532 break;
2533 }
2534out:
2535
2536 scrub_submit(sctx);
2537 mutex_lock(&sctx->wr_ctx.wr_lock);
2538 scrub_wr_submit(sctx);
2539 mutex_unlock(&sctx->wr_ctx.wr_lock);
2540
2541 blk_finish_plug(&plug);
2542 btrfs_free_path(path);
2543 return ret < 0 ? ret : 0;
2544}
2545
2546static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
2547 struct btrfs_device *scrub_dev,
2548 u64 chunk_tree, u64 chunk_objectid,
2549 u64 chunk_offset, u64 length,
2550 u64 dev_offset, int is_dev_replace)
2551{
2552 struct btrfs_mapping_tree *map_tree =
2553 &sctx->dev_root->fs_info->mapping_tree;
2554 struct map_lookup *map;
2555 struct extent_map *em;
2556 int i;
2557 int ret = 0;
2558
2559 read_lock(&map_tree->map_tree.lock);
2560 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2561 read_unlock(&map_tree->map_tree.lock);
2562
2563 if (!em)
2564 return -EINVAL;
2565
2566 map = (struct map_lookup *)em->bdev;
2567 if (em->start != chunk_offset)
2568 goto out;
2569
2570 if (em->len < length)
2571 goto out;
2572
2573 for (i = 0; i < map->num_stripes; ++i) {
2574 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
2575 map->stripes[i].physical == dev_offset) {
2576 ret = scrub_stripe(sctx, map, scrub_dev, i,
2577 chunk_offset, length,
2578 is_dev_replace);
2579 if (ret)
2580 goto out;
2581 }
2582 }
2583out:
2584 free_extent_map(em);
2585
2586 return ret;
2587}
2588
2589static noinline_for_stack
2590int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2591 struct btrfs_device *scrub_dev, u64 start, u64 end,
2592 int is_dev_replace)
2593{
2594 struct btrfs_dev_extent *dev_extent = NULL;
2595 struct btrfs_path *path;
2596 struct btrfs_root *root = sctx->dev_root;
2597 struct btrfs_fs_info *fs_info = root->fs_info;
2598 u64 length;
2599 u64 chunk_tree;
2600 u64 chunk_objectid;
2601 u64 chunk_offset;
2602 int ret;
2603 int slot;
2604 struct extent_buffer *l;
2605 struct btrfs_key key;
2606 struct btrfs_key found_key;
2607 struct btrfs_block_group_cache *cache;
2608 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
2609
2610 path = btrfs_alloc_path();
2611 if (!path)
2612 return -ENOMEM;
2613
2614 path->reada = 2;
2615 path->search_commit_root = 1;
2616 path->skip_locking = 1;
2617
2618 key.objectid = scrub_dev->devid;
2619 key.offset = 0ull;
2620 key.type = BTRFS_DEV_EXTENT_KEY;
2621
2622 while (1) {
2623 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2624 if (ret < 0)
2625 break;
2626 if (ret > 0) {
2627 if (path->slots[0] >=
2628 btrfs_header_nritems(path->nodes[0])) {
2629 ret = btrfs_next_leaf(root, path);
2630 if (ret)
2631 break;
2632 }
2633 }
2634
2635 l = path->nodes[0];
2636 slot = path->slots[0];
2637
2638 btrfs_item_key_to_cpu(l, &found_key, slot);
2639
2640 if (found_key.objectid != scrub_dev->devid)
2641 break;
2642
2643 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
2644 break;
2645
2646 if (found_key.offset >= end)
2647 break;
2648
2649 if (found_key.offset < key.offset)
2650 break;
2651
2652 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2653 length = btrfs_dev_extent_length(l, dev_extent);
2654
2655 if (found_key.offset + length <= start) {
2656 key.offset = found_key.offset + length;
2657 btrfs_release_path(path);
2658 continue;
2659 }
2660
2661 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2662 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2663 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2664
2665
2666
2667
2668
2669 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2670 if (!cache) {
2671 ret = -ENOENT;
2672 break;
2673 }
2674 dev_replace->cursor_right = found_key.offset + length;
2675 dev_replace->cursor_left = found_key.offset;
2676 dev_replace->item_needs_writeback = 1;
2677 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
2678 chunk_offset, length, found_key.offset,
2679 is_dev_replace);
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2692 scrub_submit(sctx);
2693 mutex_lock(&sctx->wr_ctx.wr_lock);
2694 scrub_wr_submit(sctx);
2695 mutex_unlock(&sctx->wr_ctx.wr_lock);
2696
2697 wait_event(sctx->list_wait,
2698 atomic_read(&sctx->bios_in_flight) == 0);
2699 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2700 atomic_inc(&fs_info->scrubs_paused);
2701 wake_up(&fs_info->scrub_pause_wait);
2702 wait_event(sctx->list_wait,
2703 atomic_read(&sctx->workers_pending) == 0);
2704
2705 mutex_lock(&fs_info->scrub_lock);
2706 while (atomic_read(&fs_info->scrub_pause_req)) {
2707 mutex_unlock(&fs_info->scrub_lock);
2708 wait_event(fs_info->scrub_pause_wait,
2709 atomic_read(&fs_info->scrub_pause_req) == 0);
2710 mutex_lock(&fs_info->scrub_lock);
2711 }
2712 atomic_dec(&fs_info->scrubs_paused);
2713 mutex_unlock(&fs_info->scrub_lock);
2714 wake_up(&fs_info->scrub_pause_wait);
2715
2716 dev_replace->cursor_left = dev_replace->cursor_right;
2717 dev_replace->item_needs_writeback = 1;
2718 btrfs_put_block_group(cache);
2719 if (ret)
2720 break;
2721 if (is_dev_replace &&
2722 atomic64_read(&dev_replace->num_write_errors) > 0) {
2723 ret = -EIO;
2724 break;
2725 }
2726 if (sctx->stat.malloc_errors > 0) {
2727 ret = -ENOMEM;
2728 break;
2729 }
2730
2731 key.offset = found_key.offset + length;
2732 btrfs_release_path(path);
2733 }
2734
2735 btrfs_free_path(path);
2736
2737
2738
2739
2740
2741 return ret < 0 ? ret : 0;
2742}
2743
2744static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
2745 struct btrfs_device *scrub_dev)
2746{
2747 int i;
2748 u64 bytenr;
2749 u64 gen;
2750 int ret;
2751 struct btrfs_root *root = sctx->dev_root;
2752
2753 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
2754 return -EIO;
2755
2756 gen = root->fs_info->last_trans_committed;
2757
2758 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2759 bytenr = btrfs_sb_offset(i);
2760 if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
2761 break;
2762
2763 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
2764 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
2765 NULL, 1, bytenr);
2766 if (ret)
2767 return ret;
2768 }
2769 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2770
2771 return 0;
2772}
2773
2774
2775
2776
2777static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2778 int is_dev_replace)
2779{
2780 int ret = 0;
2781
2782 mutex_lock(&fs_info->scrub_lock);
2783 if (fs_info->scrub_workers_refcnt == 0) {
2784 if (is_dev_replace)
2785 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
2786 &fs_info->generic_worker);
2787 else
2788 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
2789 fs_info->thread_pool_size,
2790 &fs_info->generic_worker);
2791 fs_info->scrub_workers.idle_thresh = 4;
2792 ret = btrfs_start_workers(&fs_info->scrub_workers);
2793 if (ret)
2794 goto out;
2795 btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
2796 "scrubwrc",
2797 fs_info->thread_pool_size,
2798 &fs_info->generic_worker);
2799 fs_info->scrub_wr_completion_workers.idle_thresh = 2;
2800 ret = btrfs_start_workers(
2801 &fs_info->scrub_wr_completion_workers);
2802 if (ret)
2803 goto out;
2804 btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
2805 &fs_info->generic_worker);
2806 ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
2807 if (ret)
2808 goto out;
2809 }
2810 ++fs_info->scrub_workers_refcnt;
2811out:
2812 mutex_unlock(&fs_info->scrub_lock);
2813
2814 return ret;
2815}
2816
2817static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
2818{
2819 mutex_lock(&fs_info->scrub_lock);
2820 if (--fs_info->scrub_workers_refcnt == 0) {
2821 btrfs_stop_workers(&fs_info->scrub_workers);
2822 btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
2823 btrfs_stop_workers(&fs_info->scrub_nocow_workers);
2824 }
2825 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2826 mutex_unlock(&fs_info->scrub_lock);
2827}
2828
2829int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2830 u64 end, struct btrfs_scrub_progress *progress,
2831 int readonly, int is_dev_replace)
2832{
2833 struct scrub_ctx *sctx;
2834 int ret;
2835 struct btrfs_device *dev;
2836
2837 if (btrfs_fs_closing(fs_info))
2838 return -EINVAL;
2839
2840
2841
2842
2843 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2844 printk(KERN_ERR
2845 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
2846 fs_info->chunk_root->nodesize,
2847 fs_info->chunk_root->leafsize);
2848 return -EINVAL;
2849 }
2850
2851 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
2852
2853
2854
2855
2856
2857 printk(KERN_ERR
2858 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2859 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2860 return -EINVAL;
2861 }
2862
2863 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2864
2865 printk(KERN_ERR
2866 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2867 fs_info->chunk_root->sectorsize,
2868 (unsigned long long)PAGE_SIZE);
2869 return -EINVAL;
2870 }
2871
2872 if (fs_info->chunk_root->nodesize >
2873 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
2874 fs_info->chunk_root->sectorsize >
2875 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
2876
2877
2878
2879
2880 pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n",
2881 fs_info->chunk_root->nodesize,
2882 SCRUB_MAX_PAGES_PER_BLOCK,
2883 fs_info->chunk_root->sectorsize,
2884 SCRUB_MAX_PAGES_PER_BLOCK);
2885 return -EINVAL;
2886 }
2887
2888 ret = scrub_workers_get(fs_info, is_dev_replace);
2889 if (ret)
2890 return ret;
2891
2892 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2893 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
2894 if (!dev || (dev->missing && !is_dev_replace)) {
2895 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2896 scrub_workers_put(fs_info);
2897 return -ENODEV;
2898 }
2899 mutex_lock(&fs_info->scrub_lock);
2900
2901 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2902 mutex_unlock(&fs_info->scrub_lock);
2903 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2904 scrub_workers_put(fs_info);
2905 return -EIO;
2906 }
2907
2908 btrfs_dev_replace_lock(&fs_info->dev_replace);
2909 if (dev->scrub_device ||
2910 (!is_dev_replace &&
2911 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
2912 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2913 mutex_unlock(&fs_info->scrub_lock);
2914 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2915 scrub_workers_put(fs_info);
2916 return -EINPROGRESS;
2917 }
2918 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2919 sctx = scrub_setup_ctx(dev, is_dev_replace);
2920 if (IS_ERR(sctx)) {
2921 mutex_unlock(&fs_info->scrub_lock);
2922 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2923 scrub_workers_put(fs_info);
2924 return PTR_ERR(sctx);
2925 }
2926 sctx->readonly = readonly;
2927 dev->scrub_device = sctx;
2928
2929 atomic_inc(&fs_info->scrubs_running);
2930 mutex_unlock(&fs_info->scrub_lock);
2931 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2932
2933 if (!is_dev_replace) {
2934 down_read(&fs_info->scrub_super_lock);
2935 ret = scrub_supers(sctx, dev);
2936 up_read(&fs_info->scrub_super_lock);
2937 }
2938
2939 if (!ret)
2940 ret = scrub_enumerate_chunks(sctx, dev, start, end,
2941 is_dev_replace);
2942
2943 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2944 atomic_dec(&fs_info->scrubs_running);
2945 wake_up(&fs_info->scrub_pause_wait);
2946
2947 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
2948
2949 if (progress)
2950 memcpy(progress, &sctx->stat, sizeof(*progress));
2951
2952 mutex_lock(&fs_info->scrub_lock);
2953 dev->scrub_device = NULL;
2954 mutex_unlock(&fs_info->scrub_lock);
2955
2956 scrub_free_ctx(sctx);
2957 scrub_workers_put(fs_info);
2958
2959 return ret;
2960}
2961
2962void btrfs_scrub_pause(struct btrfs_root *root)
2963{
2964 struct btrfs_fs_info *fs_info = root->fs_info;
2965
2966 mutex_lock(&fs_info->scrub_lock);
2967 atomic_inc(&fs_info->scrub_pause_req);
2968 while (atomic_read(&fs_info->scrubs_paused) !=
2969 atomic_read(&fs_info->scrubs_running)) {
2970 mutex_unlock(&fs_info->scrub_lock);
2971 wait_event(fs_info->scrub_pause_wait,
2972 atomic_read(&fs_info->scrubs_paused) ==
2973 atomic_read(&fs_info->scrubs_running));
2974 mutex_lock(&fs_info->scrub_lock);
2975 }
2976 mutex_unlock(&fs_info->scrub_lock);
2977}
2978
2979void btrfs_scrub_continue(struct btrfs_root *root)
2980{
2981 struct btrfs_fs_info *fs_info = root->fs_info;
2982
2983 atomic_dec(&fs_info->scrub_pause_req);
2984 wake_up(&fs_info->scrub_pause_wait);
2985}
2986
2987void btrfs_scrub_pause_super(struct btrfs_root *root)
2988{
2989 down_write(&root->fs_info->scrub_super_lock);
2990}
2991
2992void btrfs_scrub_continue_super(struct btrfs_root *root)
2993{
2994 up_write(&root->fs_info->scrub_super_lock);
2995}
2996
2997int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
2998{
2999 mutex_lock(&fs_info->scrub_lock);
3000 if (!atomic_read(&fs_info->scrubs_running)) {
3001 mutex_unlock(&fs_info->scrub_lock);
3002 return -ENOTCONN;
3003 }
3004
3005 atomic_inc(&fs_info->scrub_cancel_req);
3006 while (atomic_read(&fs_info->scrubs_running)) {
3007 mutex_unlock(&fs_info->scrub_lock);
3008 wait_event(fs_info->scrub_pause_wait,
3009 atomic_read(&fs_info->scrubs_running) == 0);
3010 mutex_lock(&fs_info->scrub_lock);
3011 }
3012 atomic_dec(&fs_info->scrub_cancel_req);
3013 mutex_unlock(&fs_info->scrub_lock);
3014
3015 return 0;
3016}
3017
3018int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3019 struct btrfs_device *dev)
3020{
3021 struct scrub_ctx *sctx;
3022
3023 mutex_lock(&fs_info->scrub_lock);
3024 sctx = dev->scrub_device;
3025 if (!sctx) {
3026 mutex_unlock(&fs_info->scrub_lock);
3027 return -ENOTCONN;
3028 }
3029 atomic_inc(&sctx->cancel_req);
3030 while (dev->scrub_device) {
3031 mutex_unlock(&fs_info->scrub_lock);
3032 wait_event(fs_info->scrub_pause_wait,
3033 dev->scrub_device == NULL);
3034 mutex_lock(&fs_info->scrub_lock);
3035 }
3036 mutex_unlock(&fs_info->scrub_lock);
3037
3038 return 0;
3039}
3040
3041int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3042 struct btrfs_scrub_progress *progress)
3043{
3044 struct btrfs_device *dev;
3045 struct scrub_ctx *sctx = NULL;
3046
3047 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3048 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
3049 if (dev)
3050 sctx = dev->scrub_device;
3051 if (sctx)
3052 memcpy(progress, &sctx->stat, sizeof(*progress));
3053 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3054
3055 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
3056}
3057
3058static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3059 u64 extent_logical, u64 extent_len,
3060 u64 *extent_physical,
3061 struct btrfs_device **extent_dev,
3062 int *extent_mirror_num)
3063{
3064 u64 mapped_length;
3065 struct btrfs_bio *bbio = NULL;
3066 int ret;
3067
3068 mapped_length = extent_len;
3069 ret = btrfs_map_block(fs_info, READ, extent_logical,
3070 &mapped_length, &bbio, 0);
3071 if (ret || !bbio || mapped_length < extent_len ||
3072 !bbio->stripes[0].dev->bdev) {
3073 kfree(bbio);
3074 return;
3075 }
3076
3077 *extent_physical = bbio->stripes[0].physical;
3078 *extent_mirror_num = bbio->mirror_num;
3079 *extent_dev = bbio->stripes[0].dev;
3080 kfree(bbio);
3081}
3082
3083static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
3084 struct scrub_wr_ctx *wr_ctx,
3085 struct btrfs_fs_info *fs_info,
3086 struct btrfs_device *dev,
3087 int is_dev_replace)
3088{
3089 WARN_ON(wr_ctx->wr_curr_bio != NULL);
3090
3091 mutex_init(&wr_ctx->wr_lock);
3092 wr_ctx->wr_curr_bio = NULL;
3093 if (!is_dev_replace)
3094 return 0;
3095
3096 WARN_ON(!dev->bdev);
3097 wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
3098 bio_get_nr_vecs(dev->bdev));
3099 wr_ctx->tgtdev = dev;
3100 atomic_set(&wr_ctx->flush_all_writes, 0);
3101 return 0;
3102}
3103
3104static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
3105{
3106 mutex_lock(&wr_ctx->wr_lock);
3107 kfree(wr_ctx->wr_curr_bio);
3108 wr_ctx->wr_curr_bio = NULL;
3109 mutex_unlock(&wr_ctx->wr_lock);
3110}
3111
3112static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3113 int mirror_num, u64 physical_for_dev_replace)
3114{
3115 struct scrub_copy_nocow_ctx *nocow_ctx;
3116 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3117
3118 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
3119 if (!nocow_ctx) {
3120 spin_lock(&sctx->stat_lock);
3121 sctx->stat.malloc_errors++;
3122 spin_unlock(&sctx->stat_lock);
3123 return -ENOMEM;
3124 }
3125
3126 scrub_pending_trans_workers_inc(sctx);
3127
3128 nocow_ctx->sctx = sctx;
3129 nocow_ctx->logical = logical;
3130 nocow_ctx->len = len;
3131 nocow_ctx->mirror_num = mirror_num;
3132 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3133 nocow_ctx->work.func = copy_nocow_pages_worker;
3134 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3135 &nocow_ctx->work);
3136
3137 return 0;
3138}
3139
3140static void copy_nocow_pages_worker(struct btrfs_work *work)
3141{
3142 struct scrub_copy_nocow_ctx *nocow_ctx =
3143 container_of(work, struct scrub_copy_nocow_ctx, work);
3144 struct scrub_ctx *sctx = nocow_ctx->sctx;
3145 u64 logical = nocow_ctx->logical;
3146 u64 len = nocow_ctx->len;
3147 int mirror_num = nocow_ctx->mirror_num;
3148 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3149 int ret;
3150 struct btrfs_trans_handle *trans = NULL;
3151 struct btrfs_fs_info *fs_info;
3152 struct btrfs_path *path;
3153 struct btrfs_root *root;
3154 int not_written = 0;
3155
3156 fs_info = sctx->dev_root->fs_info;
3157 root = fs_info->extent_root;
3158
3159 path = btrfs_alloc_path();
3160 if (!path) {
3161 spin_lock(&sctx->stat_lock);
3162 sctx->stat.malloc_errors++;
3163 spin_unlock(&sctx->stat_lock);
3164 not_written = 1;
3165 goto out;
3166 }
3167
3168 trans = btrfs_join_transaction(root);
3169 if (IS_ERR(trans)) {
3170 not_written = 1;
3171 goto out;
3172 }
3173
3174 ret = iterate_inodes_from_logical(logical, fs_info, path,
3175 copy_nocow_pages_for_inode,
3176 nocow_ctx);
3177 if (ret != 0 && ret != -ENOENT) {
3178 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
3179 (unsigned long long)logical,
3180 (unsigned long long)physical_for_dev_replace,
3181 (unsigned long long)len,
3182 (unsigned long long)mirror_num, ret);
3183 not_written = 1;
3184 goto out;
3185 }
3186
3187out:
3188 if (trans && !IS_ERR(trans))
3189 btrfs_end_transaction(trans, root);
3190 if (not_written)
3191 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
3192 num_uncorrectable_read_errors);
3193
3194 btrfs_free_path(path);
3195 kfree(nocow_ctx);
3196
3197 scrub_pending_trans_workers_dec(sctx);
3198}
3199
3200static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3201{
3202 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3203 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3204 struct btrfs_key key;
3205 struct inode *inode;
3206 struct page *page;
3207 struct btrfs_root *local_root;
3208 u64 physical_for_dev_replace;
3209 u64 len;
3210 unsigned long index;
3211 int srcu_index;
3212 int ret;
3213 int err;
3214
3215 key.objectid = root;
3216 key.type = BTRFS_ROOT_ITEM_KEY;
3217 key.offset = (u64)-1;
3218
3219 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3220
3221 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3222 if (IS_ERR(local_root)) {
3223 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3224 return PTR_ERR(local_root);
3225 }
3226
3227 if (btrfs_root_refs(&local_root->root_item) == 0) {
3228 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3229 return -ENOENT;
3230 }
3231
3232 key.type = BTRFS_INODE_ITEM_KEY;
3233 key.objectid = inum;
3234 key.offset = 0;
3235 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3236 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3237 if (IS_ERR(inode))
3238 return PTR_ERR(inode);
3239
3240
3241 mutex_lock(&inode->i_mutex);
3242 inode_dio_wait(inode);
3243
3244 ret = 0;
3245 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3246 len = nocow_ctx->len;
3247 while (len >= PAGE_CACHE_SIZE) {
3248 index = offset >> PAGE_CACHE_SHIFT;
3249again:
3250 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3251 if (!page) {
3252 pr_err("find_or_create_page() failed\n");
3253 ret = -ENOMEM;
3254 goto out;
3255 }
3256
3257 if (PageUptodate(page)) {
3258 if (PageDirty(page))
3259 goto next_page;
3260 } else {
3261 ClearPageError(page);
3262 err = extent_read_full_page(&BTRFS_I(inode)->
3263 io_tree,
3264 page, btrfs_get_extent,
3265 nocow_ctx->mirror_num);
3266 if (err) {
3267 ret = err;
3268 goto next_page;
3269 }
3270
3271 lock_page(page);
3272
3273
3274
3275
3276
3277
3278 if (page->mapping != inode->i_mapping) {
3279 page_cache_release(page);
3280 goto again;
3281 }
3282 if (!PageUptodate(page)) {
3283 ret = -EIO;
3284 goto next_page;
3285 }
3286 }
3287 err = write_page_nocow(nocow_ctx->sctx,
3288 physical_for_dev_replace, page);
3289 if (err)
3290 ret = err;
3291next_page:
3292 unlock_page(page);
3293 page_cache_release(page);
3294
3295 if (ret)
3296 break;
3297
3298 offset += PAGE_CACHE_SIZE;
3299 physical_for_dev_replace += PAGE_CACHE_SIZE;
3300 len -= PAGE_CACHE_SIZE;
3301 }
3302out:
3303 mutex_unlock(&inode->i_mutex);
3304 iput(inode);
3305 return ret;
3306}
3307
3308static int write_page_nocow(struct scrub_ctx *sctx,
3309 u64 physical_for_dev_replace, struct page *page)
3310{
3311 struct bio *bio;
3312 struct btrfs_device *dev;
3313 int ret;
3314 DECLARE_COMPLETION_ONSTACK(compl);
3315
3316 dev = sctx->wr_ctx.tgtdev;
3317 if (!dev)
3318 return -EIO;
3319 if (!dev->bdev) {
3320 printk_ratelimited(KERN_WARNING
3321 "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3322 return -EIO;
3323 }
3324 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
3325 if (!bio) {
3326 spin_lock(&sctx->stat_lock);
3327 sctx->stat.malloc_errors++;
3328 spin_unlock(&sctx->stat_lock);
3329 return -ENOMEM;
3330 }
3331 bio->bi_private = &compl;
3332 bio->bi_end_io = scrub_complete_bio_end_io;
3333 bio->bi_size = 0;
3334 bio->bi_sector = physical_for_dev_replace >> 9;
3335 bio->bi_bdev = dev->bdev;
3336 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3337 if (ret != PAGE_CACHE_SIZE) {
3338leave_with_eio:
3339 bio_put(bio);
3340 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
3341 return -EIO;
3342 }
3343 btrfsic_submit_bio(WRITE_SYNC, bio);
3344 wait_for_completion(&compl);
3345
3346 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
3347 goto leave_with_eio;
3348
3349 bio_put(bio);
3350 return 0;
3351}
3352