1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "check-integrity.h"
29#include "rcu-string.h"
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44struct scrub_block;
45struct scrub_dev;
46
47#define SCRUB_PAGES_PER_BIO 16
48#define SCRUB_BIOS_PER_DEV 16
49#define SCRUB_MAX_PAGES_PER_BLOCK 16
50
51struct scrub_page {
52 struct scrub_block *sblock;
53 struct page *page;
54 struct btrfs_device *dev;
55 u64 flags;
56 u64 generation;
57 u64 logical;
58 u64 physical;
59 struct {
60 unsigned int mirror_num:8;
61 unsigned int have_csum:1;
62 unsigned int io_error:1;
63 };
64 u8 csum[BTRFS_CSUM_SIZE];
65};
66
67struct scrub_bio {
68 int index;
69 struct scrub_dev *sdev;
70 struct bio *bio;
71 int err;
72 u64 logical;
73 u64 physical;
74 struct scrub_page *pagev[SCRUB_PAGES_PER_BIO];
75 int page_count;
76 int next_free;
77 struct btrfs_work work;
78};
79
80struct scrub_block {
81 struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK];
82 int page_count;
83 atomic_t outstanding_pages;
84 atomic_t ref_count;
85 struct scrub_dev *sdev;
86 struct {
87 unsigned int header_error:1;
88 unsigned int checksum_error:1;
89 unsigned int no_io_error_seen:1;
90 unsigned int generation_error:1;
91 };
92};
93
94struct scrub_dev {
95 struct scrub_bio *bios[SCRUB_BIOS_PER_DEV];
96 struct btrfs_device *dev;
97 int first_free;
98 int curr;
99 atomic_t in_flight;
100 atomic_t fixup_cnt;
101 spinlock_t list_lock;
102 wait_queue_head_t list_wait;
103 u16 csum_size;
104 struct list_head csum_list;
105 atomic_t cancel_req;
106 int readonly;
107 int pages_per_bio;
108 u32 sectorsize;
109 u32 nodesize;
110 u32 leafsize;
111
112
113
114 struct btrfs_scrub_progress stat;
115 spinlock_t stat_lock;
116};
117
118struct scrub_fixup_nodatasum {
119 struct scrub_dev *sdev;
120 u64 logical;
121 struct btrfs_root *root;
122 struct btrfs_work work;
123 int mirror_num;
124};
125
126struct scrub_warning {
127 struct btrfs_path *path;
128 u64 extent_item_size;
129 char *scratch_buf;
130 char *msg_buf;
131 const char *errstr;
132 sector_t sector;
133 u64 logical;
134 struct btrfs_device *dev;
135 int msg_bufsize;
136 int scratch_bufsize;
137};
138
139
140static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
141static int scrub_setup_recheck_block(struct scrub_dev *sdev,
142 struct btrfs_mapping_tree *map_tree,
143 u64 length, u64 logical,
144 struct scrub_block *sblock);
145static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
146 struct scrub_block *sblock, int is_metadata,
147 int have_csum, u8 *csum, u64 generation,
148 u16 csum_size);
149static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
150 struct scrub_block *sblock,
151 int is_metadata, int have_csum,
152 const u8 *csum, u64 generation,
153 u16 csum_size);
154static void scrub_complete_bio_end_io(struct bio *bio, int err);
155static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
156 struct scrub_block *sblock_good,
157 int force_write);
158static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
159 struct scrub_block *sblock_good,
160 int page_num, int force_write);
161static int scrub_checksum_data(struct scrub_block *sblock);
162static int scrub_checksum_tree_block(struct scrub_block *sblock);
163static int scrub_checksum_super(struct scrub_block *sblock);
164static void scrub_block_get(struct scrub_block *sblock);
165static void scrub_block_put(struct scrub_block *sblock);
166static int scrub_add_page_to_bio(struct scrub_dev *sdev,
167 struct scrub_page *spage);
168static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
169 u64 physical, u64 flags, u64 gen, int mirror_num,
170 u8 *csum, int force);
171static void scrub_bio_end_io(struct bio *bio, int err);
172static void scrub_bio_end_io_worker(struct btrfs_work *work);
173static void scrub_block_complete(struct scrub_block *sblock);
174
175
176static void scrub_free_csums(struct scrub_dev *sdev)
177{
178 while (!list_empty(&sdev->csum_list)) {
179 struct btrfs_ordered_sum *sum;
180 sum = list_first_entry(&sdev->csum_list,
181 struct btrfs_ordered_sum, list);
182 list_del(&sum->list);
183 kfree(sum);
184 }
185}
186
187static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
188{
189 int i;
190
191 if (!sdev)
192 return;
193
194
195 if (sdev->curr != -1) {
196 struct scrub_bio *sbio = sdev->bios[sdev->curr];
197
198 for (i = 0; i < sbio->page_count; i++) {
199 BUG_ON(!sbio->pagev[i]);
200 BUG_ON(!sbio->pagev[i]->page);
201 scrub_block_put(sbio->pagev[i]->sblock);
202 }
203 bio_put(sbio->bio);
204 }
205
206 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
207 struct scrub_bio *sbio = sdev->bios[i];
208
209 if (!sbio)
210 break;
211 kfree(sbio);
212 }
213
214 scrub_free_csums(sdev);
215 kfree(sdev);
216}
217
218static noinline_for_stack
219struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
220{
221 struct scrub_dev *sdev;
222 int i;
223 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
224 int pages_per_bio;
225
226 pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO,
227 bio_get_nr_vecs(dev->bdev));
228 sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
229 if (!sdev)
230 goto nomem;
231 sdev->dev = dev;
232 sdev->pages_per_bio = pages_per_bio;
233 sdev->curr = -1;
234 for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
235 struct scrub_bio *sbio;
236
237 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
238 if (!sbio)
239 goto nomem;
240 sdev->bios[i] = sbio;
241
242 sbio->index = i;
243 sbio->sdev = sdev;
244 sbio->page_count = 0;
245 sbio->work.func = scrub_bio_end_io_worker;
246
247 if (i != SCRUB_BIOS_PER_DEV-1)
248 sdev->bios[i]->next_free = i + 1;
249 else
250 sdev->bios[i]->next_free = -1;
251 }
252 sdev->first_free = 0;
253 sdev->nodesize = dev->dev_root->nodesize;
254 sdev->leafsize = dev->dev_root->leafsize;
255 sdev->sectorsize = dev->dev_root->sectorsize;
256 atomic_set(&sdev->in_flight, 0);
257 atomic_set(&sdev->fixup_cnt, 0);
258 atomic_set(&sdev->cancel_req, 0);
259 sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy);
260 INIT_LIST_HEAD(&sdev->csum_list);
261
262 spin_lock_init(&sdev->list_lock);
263 spin_lock_init(&sdev->stat_lock);
264 init_waitqueue_head(&sdev->list_wait);
265 return sdev;
266
267nomem:
268 scrub_free_dev(sdev);
269 return ERR_PTR(-ENOMEM);
270}
271
272static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
273{
274 u64 isize;
275 u32 nlink;
276 int ret;
277 int i;
278 struct extent_buffer *eb;
279 struct btrfs_inode_item *inode_item;
280 struct scrub_warning *swarn = ctx;
281 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
282 struct inode_fs_paths *ipath = NULL;
283 struct btrfs_root *local_root;
284 struct btrfs_key root_key;
285
286 root_key.objectid = root;
287 root_key.type = BTRFS_ROOT_ITEM_KEY;
288 root_key.offset = (u64)-1;
289 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
290 if (IS_ERR(local_root)) {
291 ret = PTR_ERR(local_root);
292 goto err;
293 }
294
295 ret = inode_item_info(inum, 0, local_root, swarn->path);
296 if (ret) {
297 btrfs_release_path(swarn->path);
298 goto err;
299 }
300
301 eb = swarn->path->nodes[0];
302 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
303 struct btrfs_inode_item);
304 isize = btrfs_inode_size(eb, inode_item);
305 nlink = btrfs_inode_nlink(eb, inode_item);
306 btrfs_release_path(swarn->path);
307
308 ipath = init_ipath(4096, local_root, swarn->path);
309 if (IS_ERR(ipath)) {
310 ret = PTR_ERR(ipath);
311 ipath = NULL;
312 goto err;
313 }
314 ret = paths_from_inode(inum, ipath);
315
316 if (ret < 0)
317 goto err;
318
319
320
321
322
323 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
324 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
325 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
326 "length %llu, links %u (path: %s)\n", swarn->errstr,
327 swarn->logical, rcu_str_deref(swarn->dev->name),
328 (unsigned long long)swarn->sector, root, inum, offset,
329 min(isize - offset, (u64)PAGE_SIZE), nlink,
330 (char *)(unsigned long)ipath->fspath->val[i]);
331
332 free_ipath(ipath);
333 return 0;
334
335err:
336 printk_in_rcu(KERN_WARNING "btrfs: %s at logical %llu on dev "
337 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
338 "resolving failed with ret=%d\n", swarn->errstr,
339 swarn->logical, rcu_str_deref(swarn->dev->name),
340 (unsigned long long)swarn->sector, root, inum, offset, ret);
341
342 free_ipath(ipath);
343 return 0;
344}
345
346static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
347{
348 struct btrfs_device *dev = sblock->sdev->dev;
349 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
350 struct btrfs_path *path;
351 struct btrfs_key found_key;
352 struct extent_buffer *eb;
353 struct btrfs_extent_item *ei;
354 struct scrub_warning swarn;
355 unsigned long ptr = 0;
356 u64 extent_item_pos;
357 u64 flags = 0;
358 u64 ref_root;
359 u32 item_size;
360 u8 ref_level;
361 const int bufsize = 4096;
362 int ret;
363
364 path = btrfs_alloc_path();
365
366 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
367 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
368 BUG_ON(sblock->page_count < 1);
369 swarn.sector = (sblock->pagev[0].physical) >> 9;
370 swarn.logical = sblock->pagev[0].logical;
371 swarn.errstr = errstr;
372 swarn.dev = dev;
373 swarn.msg_bufsize = bufsize;
374 swarn.scratch_bufsize = bufsize;
375
376 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
377 goto out;
378
379 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
380 &flags);
381 if (ret < 0)
382 goto out;
383
384 extent_item_pos = swarn.logical - found_key.objectid;
385 swarn.extent_item_size = found_key.offset;
386
387 eb = path->nodes[0];
388 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
389 item_size = btrfs_item_size_nr(eb, path->slots[0]);
390 btrfs_release_path(path);
391
392 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
393 do {
394 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
395 &ref_root, &ref_level);
396 printk_in_rcu(KERN_WARNING
397 "btrfs: %s at logical %llu on dev %s, "
398 "sector %llu: metadata %s (level %d) in tree "
399 "%llu\n", errstr, swarn.logical,
400 rcu_str_deref(dev->name),
401 (unsigned long long)swarn.sector,
402 ref_level ? "node" : "leaf",
403 ret < 0 ? -1 : ref_level,
404 ret < 0 ? -1 : ref_root);
405 } while (ret != 1);
406 } else {
407 swarn.path = path;
408 iterate_extent_inodes(fs_info, found_key.objectid,
409 extent_item_pos, 1,
410 scrub_print_warning_inode, &swarn);
411 }
412
413out:
414 btrfs_free_path(path);
415 kfree(swarn.scratch_buf);
416 kfree(swarn.msg_buf);
417}
418
419static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx)
420{
421 struct page *page = NULL;
422 unsigned long index;
423 struct scrub_fixup_nodatasum *fixup = ctx;
424 int ret;
425 int corrected = 0;
426 struct btrfs_key key;
427 struct inode *inode = NULL;
428 u64 end = offset + PAGE_SIZE - 1;
429 struct btrfs_root *local_root;
430
431 key.objectid = root;
432 key.type = BTRFS_ROOT_ITEM_KEY;
433 key.offset = (u64)-1;
434 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
435 if (IS_ERR(local_root))
436 return PTR_ERR(local_root);
437
438 key.type = BTRFS_INODE_ITEM_KEY;
439 key.objectid = inum;
440 key.offset = 0;
441 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
442 if (IS_ERR(inode))
443 return PTR_ERR(inode);
444
445 index = offset >> PAGE_CACHE_SHIFT;
446
447 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
448 if (!page) {
449 ret = -ENOMEM;
450 goto out;
451 }
452
453 if (PageUptodate(page)) {
454 struct btrfs_mapping_tree *map_tree;
455 if (PageDirty(page)) {
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472 ret = -EIO;
473 goto out;
474 }
475 map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
476 ret = repair_io_failure(map_tree, offset, PAGE_SIZE,
477 fixup->logical, page,
478 fixup->mirror_num);
479 unlock_page(page);
480 corrected = !ret;
481 } else {
482
483
484
485
486
487 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
488 EXTENT_DAMAGED, GFP_NOFS);
489 if (ret) {
490
491 WARN_ON(ret > 0);
492 if (ret > 0)
493 ret = -EFAULT;
494 goto out;
495 }
496
497 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
498 btrfs_get_extent,
499 fixup->mirror_num);
500 wait_on_page_locked(page);
501
502 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
503 end, EXTENT_DAMAGED, 0, NULL);
504 if (!corrected)
505 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
506 EXTENT_DAMAGED, GFP_NOFS);
507 }
508
509out:
510 if (page)
511 put_page(page);
512 if (inode)
513 iput(inode);
514
515 if (ret < 0)
516 return ret;
517
518 if (ret == 0 && corrected) {
519
520
521
522
523 return 1;
524 }
525
526 return -EIO;
527}
528
529static void scrub_fixup_nodatasum(struct btrfs_work *work)
530{
531 int ret;
532 struct scrub_fixup_nodatasum *fixup;
533 struct scrub_dev *sdev;
534 struct btrfs_trans_handle *trans = NULL;
535 struct btrfs_fs_info *fs_info;
536 struct btrfs_path *path;
537 int uncorrectable = 0;
538
539 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
540 sdev = fixup->sdev;
541 fs_info = fixup->root->fs_info;
542
543 path = btrfs_alloc_path();
544 if (!path) {
545 spin_lock(&sdev->stat_lock);
546 ++sdev->stat.malloc_errors;
547 spin_unlock(&sdev->stat_lock);
548 uncorrectable = 1;
549 goto out;
550 }
551
552 trans = btrfs_join_transaction(fixup->root);
553 if (IS_ERR(trans)) {
554 uncorrectable = 1;
555 goto out;
556 }
557
558
559
560
561
562
563
564
565
566
567 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
568 path, scrub_fixup_readpage,
569 fixup);
570 if (ret < 0) {
571 uncorrectable = 1;
572 goto out;
573 }
574 WARN_ON(ret != 1);
575
576 spin_lock(&sdev->stat_lock);
577 ++sdev->stat.corrected_errors;
578 spin_unlock(&sdev->stat_lock);
579
580out:
581 if (trans && !IS_ERR(trans))
582 btrfs_end_transaction(trans, fixup->root);
583 if (uncorrectable) {
584 spin_lock(&sdev->stat_lock);
585 ++sdev->stat.uncorrectable_errors;
586 spin_unlock(&sdev->stat_lock);
587
588 printk_ratelimited_in_rcu(KERN_ERR
589 "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
590 (unsigned long long)fixup->logical,
591 rcu_str_deref(sdev->dev->name));
592 }
593
594 btrfs_free_path(path);
595 kfree(fixup);
596
597
598 mutex_lock(&fs_info->scrub_lock);
599 atomic_dec(&fs_info->scrubs_running);
600 atomic_dec(&fs_info->scrubs_paused);
601 mutex_unlock(&fs_info->scrub_lock);
602 atomic_dec(&sdev->fixup_cnt);
603 wake_up(&fs_info->scrub_pause_wait);
604 wake_up(&sdev->list_wait);
605}
606
607
608
609
610
611
612
613
614
615static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
616{
617 struct scrub_dev *sdev = sblock_to_check->sdev;
618 struct btrfs_fs_info *fs_info;
619 u64 length;
620 u64 logical;
621 u64 generation;
622 unsigned int failed_mirror_index;
623 unsigned int is_metadata;
624 unsigned int have_csum;
625 u8 *csum;
626 struct scrub_block *sblocks_for_recheck;
627 struct scrub_block *sblock_bad;
628 int ret;
629 int mirror_index;
630 int page_num;
631 int success;
632 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
633 DEFAULT_RATELIMIT_BURST);
634
635 BUG_ON(sblock_to_check->page_count < 1);
636 fs_info = sdev->dev->dev_root->fs_info;
637 length = sblock_to_check->page_count * PAGE_SIZE;
638 logical = sblock_to_check->pagev[0].logical;
639 generation = sblock_to_check->pagev[0].generation;
640 BUG_ON(sblock_to_check->pagev[0].mirror_num < 1);
641 failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1;
642 is_metadata = !(sblock_to_check->pagev[0].flags &
643 BTRFS_EXTENT_FLAG_DATA);
644 have_csum = sblock_to_check->pagev[0].have_csum;
645 csum = sblock_to_check->pagev[0].csum;
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
677 sizeof(*sblocks_for_recheck),
678 GFP_NOFS);
679 if (!sblocks_for_recheck) {
680 spin_lock(&sdev->stat_lock);
681 sdev->stat.malloc_errors++;
682 sdev->stat.read_errors++;
683 sdev->stat.uncorrectable_errors++;
684 spin_unlock(&sdev->stat_lock);
685 btrfs_dev_stat_inc_and_print(sdev->dev,
686 BTRFS_DEV_STAT_READ_ERRS);
687 goto out;
688 }
689
690
691 ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length,
692 logical, sblocks_for_recheck);
693 if (ret) {
694 spin_lock(&sdev->stat_lock);
695 sdev->stat.read_errors++;
696 sdev->stat.uncorrectable_errors++;
697 spin_unlock(&sdev->stat_lock);
698 btrfs_dev_stat_inc_and_print(sdev->dev,
699 BTRFS_DEV_STAT_READ_ERRS);
700 goto out;
701 }
702 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
703 sblock_bad = sblocks_for_recheck + failed_mirror_index;
704
705
706 ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
707 csum, generation, sdev->csum_size);
708 if (ret) {
709 spin_lock(&sdev->stat_lock);
710 sdev->stat.read_errors++;
711 sdev->stat.uncorrectable_errors++;
712 spin_unlock(&sdev->stat_lock);
713 btrfs_dev_stat_inc_and_print(sdev->dev,
714 BTRFS_DEV_STAT_READ_ERRS);
715 goto out;
716 }
717
718 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
719 sblock_bad->no_io_error_seen) {
720
721
722
723
724
725
726
727
728 spin_lock(&sdev->stat_lock);
729 sdev->stat.unverified_errors++;
730 spin_unlock(&sdev->stat_lock);
731
732 goto out;
733 }
734
735 if (!sblock_bad->no_io_error_seen) {
736 spin_lock(&sdev->stat_lock);
737 sdev->stat.read_errors++;
738 spin_unlock(&sdev->stat_lock);
739 if (__ratelimit(&_rs))
740 scrub_print_warning("i/o error", sblock_to_check);
741 btrfs_dev_stat_inc_and_print(sdev->dev,
742 BTRFS_DEV_STAT_READ_ERRS);
743 } else if (sblock_bad->checksum_error) {
744 spin_lock(&sdev->stat_lock);
745 sdev->stat.csum_errors++;
746 spin_unlock(&sdev->stat_lock);
747 if (__ratelimit(&_rs))
748 scrub_print_warning("checksum error", sblock_to_check);
749 btrfs_dev_stat_inc_and_print(sdev->dev,
750 BTRFS_DEV_STAT_CORRUPTION_ERRS);
751 } else if (sblock_bad->header_error) {
752 spin_lock(&sdev->stat_lock);
753 sdev->stat.verify_errors++;
754 spin_unlock(&sdev->stat_lock);
755 if (__ratelimit(&_rs))
756 scrub_print_warning("checksum/header error",
757 sblock_to_check);
758 if (sblock_bad->generation_error)
759 btrfs_dev_stat_inc_and_print(sdev->dev,
760 BTRFS_DEV_STAT_GENERATION_ERRS);
761 else
762 btrfs_dev_stat_inc_and_print(sdev->dev,
763 BTRFS_DEV_STAT_CORRUPTION_ERRS);
764 }
765
766 if (sdev->readonly)
767 goto did_not_correct_error;
768
769 if (!is_metadata && !have_csum) {
770 struct scrub_fixup_nodatasum *fixup_nodatasum;
771
772
773
774
775
776
777
778
779 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
780 if (!fixup_nodatasum)
781 goto did_not_correct_error;
782 fixup_nodatasum->sdev = sdev;
783 fixup_nodatasum->logical = logical;
784 fixup_nodatasum->root = fs_info->extent_root;
785 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
786
787
788
789
790
791
792
793
794
795 mutex_lock(&fs_info->scrub_lock);
796 atomic_inc(&fs_info->scrubs_running);
797 atomic_inc(&fs_info->scrubs_paused);
798 mutex_unlock(&fs_info->scrub_lock);
799 atomic_inc(&sdev->fixup_cnt);
800 fixup_nodatasum->work.func = scrub_fixup_nodatasum;
801 btrfs_queue_worker(&fs_info->scrub_workers,
802 &fixup_nodatasum->work);
803 goto out;
804 }
805
806
807
808
809
810 for (mirror_index = 0;
811 mirror_index < BTRFS_MAX_MIRRORS &&
812 sblocks_for_recheck[mirror_index].page_count > 0;
813 mirror_index++) {
814 if (mirror_index == failed_mirror_index)
815 continue;
816
817
818 ret = scrub_recheck_block(fs_info,
819 sblocks_for_recheck + mirror_index,
820 is_metadata, have_csum, csum,
821 generation, sdev->csum_size);
822 if (ret)
823 goto did_not_correct_error;
824 }
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839 for (mirror_index = 0;
840 mirror_index < BTRFS_MAX_MIRRORS &&
841 sblocks_for_recheck[mirror_index].page_count > 0;
842 mirror_index++) {
843 struct scrub_block *sblock_other = sblocks_for_recheck +
844 mirror_index;
845
846 if (!sblock_other->header_error &&
847 !sblock_other->checksum_error &&
848 sblock_other->no_io_error_seen) {
849 int force_write = is_metadata || have_csum;
850
851 ret = scrub_repair_block_from_good_copy(sblock_bad,
852 sblock_other,
853 force_write);
854 if (0 == ret)
855 goto corrected_error;
856 }
857 }
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885 if (sblock_bad->no_io_error_seen)
886 goto did_not_correct_error;
887
888 success = 1;
889 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
890 struct scrub_page *page_bad = sblock_bad->pagev + page_num;
891
892 if (!page_bad->io_error)
893 continue;
894
895 for (mirror_index = 0;
896 mirror_index < BTRFS_MAX_MIRRORS &&
897 sblocks_for_recheck[mirror_index].page_count > 0;
898 mirror_index++) {
899 struct scrub_block *sblock_other = sblocks_for_recheck +
900 mirror_index;
901 struct scrub_page *page_other = sblock_other->pagev +
902 page_num;
903
904 if (!page_other->io_error) {
905 ret = scrub_repair_page_from_good_copy(
906 sblock_bad, sblock_other, page_num, 0);
907 if (0 == ret) {
908 page_bad->io_error = 0;
909 break;
910 }
911 }
912 }
913
914 if (page_bad->io_error) {
915
916 success = 0;
917 }
918 }
919
920 if (success) {
921 if (is_metadata || have_csum) {
922
923
924
925
926
927
928
929
930
931 ret = scrub_recheck_block(fs_info, sblock_bad,
932 is_metadata, have_csum, csum,
933 generation, sdev->csum_size);
934 if (!ret && !sblock_bad->header_error &&
935 !sblock_bad->checksum_error &&
936 sblock_bad->no_io_error_seen)
937 goto corrected_error;
938 else
939 goto did_not_correct_error;
940 } else {
941corrected_error:
942 spin_lock(&sdev->stat_lock);
943 sdev->stat.corrected_errors++;
944 spin_unlock(&sdev->stat_lock);
945 printk_ratelimited_in_rcu(KERN_ERR
946 "btrfs: fixed up error at logical %llu on dev %s\n",
947 (unsigned long long)logical,
948 rcu_str_deref(sdev->dev->name));
949 }
950 } else {
951did_not_correct_error:
952 spin_lock(&sdev->stat_lock);
953 sdev->stat.uncorrectable_errors++;
954 spin_unlock(&sdev->stat_lock);
955 printk_ratelimited_in_rcu(KERN_ERR
956 "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
957 (unsigned long long)logical,
958 rcu_str_deref(sdev->dev->name));
959 }
960
961out:
962 if (sblocks_for_recheck) {
963 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
964 mirror_index++) {
965 struct scrub_block *sblock = sblocks_for_recheck +
966 mirror_index;
967 int page_index;
968
969 for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO;
970 page_index++)
971 if (sblock->pagev[page_index].page)
972 __free_page(
973 sblock->pagev[page_index].page);
974 }
975 kfree(sblocks_for_recheck);
976 }
977
978 return 0;
979}
980
981static int scrub_setup_recheck_block(struct scrub_dev *sdev,
982 struct btrfs_mapping_tree *map_tree,
983 u64 length, u64 logical,
984 struct scrub_block *sblocks_for_recheck)
985{
986 int page_index;
987 int mirror_index;
988 int ret;
989
990
991
992
993
994
995
996 page_index = 0;
997 while (length > 0) {
998 u64 sublen = min_t(u64, length, PAGE_SIZE);
999 u64 mapped_length = sublen;
1000 struct btrfs_bio *bbio = NULL;
1001
1002
1003
1004
1005
1006 ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length,
1007 &bbio, 0);
1008 if (ret || !bbio || mapped_length < sublen) {
1009 kfree(bbio);
1010 return -EIO;
1011 }
1012
1013 BUG_ON(page_index >= SCRUB_PAGES_PER_BIO);
1014 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
1015 mirror_index++) {
1016 struct scrub_block *sblock;
1017 struct scrub_page *page;
1018
1019 if (mirror_index >= BTRFS_MAX_MIRRORS)
1020 continue;
1021
1022 sblock = sblocks_for_recheck + mirror_index;
1023 page = sblock->pagev + page_index;
1024 page->logical = logical;
1025 page->physical = bbio->stripes[mirror_index].physical;
1026
1027 page->dev = bbio->stripes[mirror_index].dev;
1028 page->mirror_num = mirror_index + 1;
1029 page->page = alloc_page(GFP_NOFS);
1030 if (!page->page) {
1031 spin_lock(&sdev->stat_lock);
1032 sdev->stat.malloc_errors++;
1033 spin_unlock(&sdev->stat_lock);
1034 kfree(bbio);
1035 return -ENOMEM;
1036 }
1037 sblock->page_count++;
1038 }
1039 kfree(bbio);
1040 length -= sublen;
1041 logical += sublen;
1042 page_index++;
1043 }
1044
1045 return 0;
1046}
1047
1048
1049
1050
1051
1052
1053
1054
1055static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
1056 struct scrub_block *sblock, int is_metadata,
1057 int have_csum, u8 *csum, u64 generation,
1058 u16 csum_size)
1059{
1060 int page_num;
1061
1062 sblock->no_io_error_seen = 1;
1063 sblock->header_error = 0;
1064 sblock->checksum_error = 0;
1065
1066 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1067 struct bio *bio;
1068 int ret;
1069 struct scrub_page *page = sblock->pagev + page_num;
1070 DECLARE_COMPLETION_ONSTACK(complete);
1071
1072 if (page->dev->bdev == NULL) {
1073 page->io_error = 1;
1074 sblock->no_io_error_seen = 0;
1075 continue;
1076 }
1077
1078 BUG_ON(!page->page);
1079 bio = bio_alloc(GFP_NOFS, 1);
1080 if (!bio)
1081 return -EIO;
1082 bio->bi_bdev = page->dev->bdev;
1083 bio->bi_sector = page->physical >> 9;
1084 bio->bi_end_io = scrub_complete_bio_end_io;
1085 bio->bi_private = &complete;
1086
1087 ret = bio_add_page(bio, page->page, PAGE_SIZE, 0);
1088 if (PAGE_SIZE != ret) {
1089 bio_put(bio);
1090 return -EIO;
1091 }
1092 btrfsic_submit_bio(READ, bio);
1093
1094
1095 wait_for_completion(&complete);
1096
1097 page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
1098 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
1099 sblock->no_io_error_seen = 0;
1100 bio_put(bio);
1101 }
1102
1103 if (sblock->no_io_error_seen)
1104 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1105 have_csum, csum, generation,
1106 csum_size);
1107
1108 return 0;
1109}
1110
1111static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1112 struct scrub_block *sblock,
1113 int is_metadata, int have_csum,
1114 const u8 *csum, u64 generation,
1115 u16 csum_size)
1116{
1117 int page_num;
1118 u8 calculated_csum[BTRFS_CSUM_SIZE];
1119 u32 crc = ~(u32)0;
1120 struct btrfs_root *root = fs_info->extent_root;
1121 void *mapped_buffer;
1122
1123 BUG_ON(!sblock->pagev[0].page);
1124 if (is_metadata) {
1125 struct btrfs_header *h;
1126
1127 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1128 h = (struct btrfs_header *)mapped_buffer;
1129
1130 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) ||
1131 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1132 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1133 BTRFS_UUID_SIZE)) {
1134 sblock->header_error = 1;
1135 } else if (generation != le64_to_cpu(h->generation)) {
1136 sblock->header_error = 1;
1137 sblock->generation_error = 1;
1138 }
1139 csum = h->csum;
1140 } else {
1141 if (!have_csum)
1142 return;
1143
1144 mapped_buffer = kmap_atomic(sblock->pagev[0].page);
1145 }
1146
1147 for (page_num = 0;;) {
1148 if (page_num == 0 && is_metadata)
1149 crc = btrfs_csum_data(root,
1150 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1151 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1152 else
1153 crc = btrfs_csum_data(root, mapped_buffer, crc,
1154 PAGE_SIZE);
1155
1156 kunmap_atomic(mapped_buffer);
1157 page_num++;
1158 if (page_num >= sblock->page_count)
1159 break;
1160 BUG_ON(!sblock->pagev[page_num].page);
1161
1162 mapped_buffer = kmap_atomic(sblock->pagev[page_num].page);
1163 }
1164
1165 btrfs_csum_final(crc, calculated_csum);
1166 if (memcmp(calculated_csum, csum, csum_size))
1167 sblock->checksum_error = 1;
1168}
1169
1170static void scrub_complete_bio_end_io(struct bio *bio, int err)
1171{
1172 complete((struct completion *)bio->bi_private);
1173}
1174
1175static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1176 struct scrub_block *sblock_good,
1177 int force_write)
1178{
1179 int page_num;
1180 int ret = 0;
1181
1182 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1183 int ret_sub;
1184
1185 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1186 sblock_good,
1187 page_num,
1188 force_write);
1189 if (ret_sub)
1190 ret = ret_sub;
1191 }
1192
1193 return ret;
1194}
1195
1196static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1197 struct scrub_block *sblock_good,
1198 int page_num, int force_write)
1199{
1200 struct scrub_page *page_bad = sblock_bad->pagev + page_num;
1201 struct scrub_page *page_good = sblock_good->pagev + page_num;
1202
1203 BUG_ON(sblock_bad->pagev[page_num].page == NULL);
1204 BUG_ON(sblock_good->pagev[page_num].page == NULL);
1205 if (force_write || sblock_bad->header_error ||
1206 sblock_bad->checksum_error || page_bad->io_error) {
1207 struct bio *bio;
1208 int ret;
1209 DECLARE_COMPLETION_ONSTACK(complete);
1210
1211 bio = bio_alloc(GFP_NOFS, 1);
1212 if (!bio)
1213 return -EIO;
1214 bio->bi_bdev = page_bad->dev->bdev;
1215 bio->bi_sector = page_bad->physical >> 9;
1216 bio->bi_end_io = scrub_complete_bio_end_io;
1217 bio->bi_private = &complete;
1218
1219 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1220 if (PAGE_SIZE != ret) {
1221 bio_put(bio);
1222 return -EIO;
1223 }
1224 btrfsic_submit_bio(WRITE, bio);
1225
1226
1227 wait_for_completion(&complete);
1228 if (!bio_flagged(bio, BIO_UPTODATE)) {
1229 btrfs_dev_stat_inc_and_print(page_bad->dev,
1230 BTRFS_DEV_STAT_WRITE_ERRS);
1231 bio_put(bio);
1232 return -EIO;
1233 }
1234 bio_put(bio);
1235 }
1236
1237 return 0;
1238}
1239
1240static void scrub_checksum(struct scrub_block *sblock)
1241{
1242 u64 flags;
1243 int ret;
1244
1245 BUG_ON(sblock->page_count < 1);
1246 flags = sblock->pagev[0].flags;
1247 ret = 0;
1248 if (flags & BTRFS_EXTENT_FLAG_DATA)
1249 ret = scrub_checksum_data(sblock);
1250 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1251 ret = scrub_checksum_tree_block(sblock);
1252 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1253 (void)scrub_checksum_super(sblock);
1254 else
1255 WARN_ON(1);
1256 if (ret)
1257 scrub_handle_errored_block(sblock);
1258}
1259
1260static int scrub_checksum_data(struct scrub_block *sblock)
1261{
1262 struct scrub_dev *sdev = sblock->sdev;
1263 u8 csum[BTRFS_CSUM_SIZE];
1264 u8 *on_disk_csum;
1265 struct page *page;
1266 void *buffer;
1267 u32 crc = ~(u32)0;
1268 int fail = 0;
1269 struct btrfs_root *root = sdev->dev->dev_root;
1270 u64 len;
1271 int index;
1272
1273 BUG_ON(sblock->page_count < 1);
1274 if (!sblock->pagev[0].have_csum)
1275 return 0;
1276
1277 on_disk_csum = sblock->pagev[0].csum;
1278 page = sblock->pagev[0].page;
1279 buffer = kmap_atomic(page);
1280
1281 len = sdev->sectorsize;
1282 index = 0;
1283 for (;;) {
1284 u64 l = min_t(u64, len, PAGE_SIZE);
1285
1286 crc = btrfs_csum_data(root, buffer, crc, l);
1287 kunmap_atomic(buffer);
1288 len -= l;
1289 if (len == 0)
1290 break;
1291 index++;
1292 BUG_ON(index >= sblock->page_count);
1293 BUG_ON(!sblock->pagev[index].page);
1294 page = sblock->pagev[index].page;
1295 buffer = kmap_atomic(page);
1296 }
1297
1298 btrfs_csum_final(crc, csum);
1299 if (memcmp(csum, on_disk_csum, sdev->csum_size))
1300 fail = 1;
1301
1302 return fail;
1303}
1304
1305static int scrub_checksum_tree_block(struct scrub_block *sblock)
1306{
1307 struct scrub_dev *sdev = sblock->sdev;
1308 struct btrfs_header *h;
1309 struct btrfs_root *root = sdev->dev->dev_root;
1310 struct btrfs_fs_info *fs_info = root->fs_info;
1311 u8 calculated_csum[BTRFS_CSUM_SIZE];
1312 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1313 struct page *page;
1314 void *mapped_buffer;
1315 u64 mapped_size;
1316 void *p;
1317 u32 crc = ~(u32)0;
1318 int fail = 0;
1319 int crc_fail = 0;
1320 u64 len;
1321 int index;
1322
1323 BUG_ON(sblock->page_count < 1);
1324 page = sblock->pagev[0].page;
1325 mapped_buffer = kmap_atomic(page);
1326 h = (struct btrfs_header *)mapped_buffer;
1327 memcpy(on_disk_csum, h->csum, sdev->csum_size);
1328
1329
1330
1331
1332
1333
1334
1335 if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr))
1336 ++fail;
1337
1338 if (sblock->pagev[0].generation != le64_to_cpu(h->generation))
1339 ++fail;
1340
1341 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1342 ++fail;
1343
1344 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1345 BTRFS_UUID_SIZE))
1346 ++fail;
1347
1348 BUG_ON(sdev->nodesize != sdev->leafsize);
1349 len = sdev->nodesize - BTRFS_CSUM_SIZE;
1350 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1351 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1352 index = 0;
1353 for (;;) {
1354 u64 l = min_t(u64, len, mapped_size);
1355
1356 crc = btrfs_csum_data(root, p, crc, l);
1357 kunmap_atomic(mapped_buffer);
1358 len -= l;
1359 if (len == 0)
1360 break;
1361 index++;
1362 BUG_ON(index >= sblock->page_count);
1363 BUG_ON(!sblock->pagev[index].page);
1364 page = sblock->pagev[index].page;
1365 mapped_buffer = kmap_atomic(page);
1366 mapped_size = PAGE_SIZE;
1367 p = mapped_buffer;
1368 }
1369
1370 btrfs_csum_final(crc, calculated_csum);
1371 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
1372 ++crc_fail;
1373
1374 return fail || crc_fail;
1375}
1376
1377static int scrub_checksum_super(struct scrub_block *sblock)
1378{
1379 struct btrfs_super_block *s;
1380 struct scrub_dev *sdev = sblock->sdev;
1381 struct btrfs_root *root = sdev->dev->dev_root;
1382 struct btrfs_fs_info *fs_info = root->fs_info;
1383 u8 calculated_csum[BTRFS_CSUM_SIZE];
1384 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1385 struct page *page;
1386 void *mapped_buffer;
1387 u64 mapped_size;
1388 void *p;
1389 u32 crc = ~(u32)0;
1390 int fail_gen = 0;
1391 int fail_cor = 0;
1392 u64 len;
1393 int index;
1394
1395 BUG_ON(sblock->page_count < 1);
1396 page = sblock->pagev[0].page;
1397 mapped_buffer = kmap_atomic(page);
1398 s = (struct btrfs_super_block *)mapped_buffer;
1399 memcpy(on_disk_csum, s->csum, sdev->csum_size);
1400
1401 if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr))
1402 ++fail_cor;
1403
1404 if (sblock->pagev[0].generation != le64_to_cpu(s->generation))
1405 ++fail_gen;
1406
1407 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1408 ++fail_cor;
1409
1410 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1411 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1412 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1413 index = 0;
1414 for (;;) {
1415 u64 l = min_t(u64, len, mapped_size);
1416
1417 crc = btrfs_csum_data(root, p, crc, l);
1418 kunmap_atomic(mapped_buffer);
1419 len -= l;
1420 if (len == 0)
1421 break;
1422 index++;
1423 BUG_ON(index >= sblock->page_count);
1424 BUG_ON(!sblock->pagev[index].page);
1425 page = sblock->pagev[index].page;
1426 mapped_buffer = kmap_atomic(page);
1427 mapped_size = PAGE_SIZE;
1428 p = mapped_buffer;
1429 }
1430
1431 btrfs_csum_final(crc, calculated_csum);
1432 if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
1433 ++fail_cor;
1434
1435 if (fail_cor + fail_gen) {
1436
1437
1438
1439
1440
1441 spin_lock(&sdev->stat_lock);
1442 ++sdev->stat.super_errors;
1443 spin_unlock(&sdev->stat_lock);
1444 if (fail_cor)
1445 btrfs_dev_stat_inc_and_print(sdev->dev,
1446 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1447 else
1448 btrfs_dev_stat_inc_and_print(sdev->dev,
1449 BTRFS_DEV_STAT_GENERATION_ERRS);
1450 }
1451
1452 return fail_cor + fail_gen;
1453}
1454
1455static void scrub_block_get(struct scrub_block *sblock)
1456{
1457 atomic_inc(&sblock->ref_count);
1458}
1459
1460static void scrub_block_put(struct scrub_block *sblock)
1461{
1462 if (atomic_dec_and_test(&sblock->ref_count)) {
1463 int i;
1464
1465 for (i = 0; i < sblock->page_count; i++)
1466 if (sblock->pagev[i].page)
1467 __free_page(sblock->pagev[i].page);
1468 kfree(sblock);
1469 }
1470}
1471
1472static void scrub_submit(struct scrub_dev *sdev)
1473{
1474 struct scrub_bio *sbio;
1475
1476 if (sdev->curr == -1)
1477 return;
1478
1479 sbio = sdev->bios[sdev->curr];
1480 sdev->curr = -1;
1481 atomic_inc(&sdev->in_flight);
1482
1483 btrfsic_submit_bio(READ, sbio->bio);
1484}
1485
1486static int scrub_add_page_to_bio(struct scrub_dev *sdev,
1487 struct scrub_page *spage)
1488{
1489 struct scrub_block *sblock = spage->sblock;
1490 struct scrub_bio *sbio;
1491 int ret;
1492
1493again:
1494
1495
1496
1497 while (sdev->curr == -1) {
1498 spin_lock(&sdev->list_lock);
1499 sdev->curr = sdev->first_free;
1500 if (sdev->curr != -1) {
1501 sdev->first_free = sdev->bios[sdev->curr]->next_free;
1502 sdev->bios[sdev->curr]->next_free = -1;
1503 sdev->bios[sdev->curr]->page_count = 0;
1504 spin_unlock(&sdev->list_lock);
1505 } else {
1506 spin_unlock(&sdev->list_lock);
1507 wait_event(sdev->list_wait, sdev->first_free != -1);
1508 }
1509 }
1510 sbio = sdev->bios[sdev->curr];
1511 if (sbio->page_count == 0) {
1512 struct bio *bio;
1513
1514 sbio->physical = spage->physical;
1515 sbio->logical = spage->logical;
1516 bio = sbio->bio;
1517 if (!bio) {
1518 bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio);
1519 if (!bio)
1520 return -ENOMEM;
1521 sbio->bio = bio;
1522 }
1523
1524 bio->bi_private = sbio;
1525 bio->bi_end_io = scrub_bio_end_io;
1526 bio->bi_bdev = sdev->dev->bdev;
1527 bio->bi_sector = spage->physical >> 9;
1528 sbio->err = 0;
1529 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1530 spage->physical ||
1531 sbio->logical + sbio->page_count * PAGE_SIZE !=
1532 spage->logical) {
1533 scrub_submit(sdev);
1534 goto again;
1535 }
1536
1537 sbio->pagev[sbio->page_count] = spage;
1538 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1539 if (ret != PAGE_SIZE) {
1540 if (sbio->page_count < 1) {
1541 bio_put(sbio->bio);
1542 sbio->bio = NULL;
1543 return -EIO;
1544 }
1545 scrub_submit(sdev);
1546 goto again;
1547 }
1548
1549 scrub_block_get(sblock);
1550 atomic_inc(&sblock->outstanding_pages);
1551 sbio->page_count++;
1552 if (sbio->page_count == sdev->pages_per_bio)
1553 scrub_submit(sdev);
1554
1555 return 0;
1556}
1557
1558static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len,
1559 u64 physical, u64 flags, u64 gen, int mirror_num,
1560 u8 *csum, int force)
1561{
1562 struct scrub_block *sblock;
1563 int index;
1564
1565 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
1566 if (!sblock) {
1567 spin_lock(&sdev->stat_lock);
1568 sdev->stat.malloc_errors++;
1569 spin_unlock(&sdev->stat_lock);
1570 return -ENOMEM;
1571 }
1572
1573
1574 atomic_set(&sblock->ref_count, 1);
1575 sblock->sdev = sdev;
1576 sblock->no_io_error_seen = 1;
1577
1578 for (index = 0; len > 0; index++) {
1579 struct scrub_page *spage = sblock->pagev + index;
1580 u64 l = min_t(u64, len, PAGE_SIZE);
1581
1582 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
1583 spage->page = alloc_page(GFP_NOFS);
1584 if (!spage->page) {
1585 spin_lock(&sdev->stat_lock);
1586 sdev->stat.malloc_errors++;
1587 spin_unlock(&sdev->stat_lock);
1588 while (index > 0) {
1589 index--;
1590 __free_page(sblock->pagev[index].page);
1591 }
1592 kfree(sblock);
1593 return -ENOMEM;
1594 }
1595 spage->sblock = sblock;
1596 spage->dev = sdev->dev;
1597 spage->flags = flags;
1598 spage->generation = gen;
1599 spage->logical = logical;
1600 spage->physical = physical;
1601 spage->mirror_num = mirror_num;
1602 if (csum) {
1603 spage->have_csum = 1;
1604 memcpy(spage->csum, csum, sdev->csum_size);
1605 } else {
1606 spage->have_csum = 0;
1607 }
1608 sblock->page_count++;
1609 len -= l;
1610 logical += l;
1611 physical += l;
1612 }
1613
1614 BUG_ON(sblock->page_count == 0);
1615 for (index = 0; index < sblock->page_count; index++) {
1616 struct scrub_page *spage = sblock->pagev + index;
1617 int ret;
1618
1619 ret = scrub_add_page_to_bio(sdev, spage);
1620 if (ret) {
1621 scrub_block_put(sblock);
1622 return ret;
1623 }
1624 }
1625
1626 if (force)
1627 scrub_submit(sdev);
1628
1629
1630 scrub_block_put(sblock);
1631 return 0;
1632}
1633
1634static void scrub_bio_end_io(struct bio *bio, int err)
1635{
1636 struct scrub_bio *sbio = bio->bi_private;
1637 struct scrub_dev *sdev = sbio->sdev;
1638 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
1639
1640 sbio->err = err;
1641 sbio->bio = bio;
1642
1643 btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
1644}
1645
1646static void scrub_bio_end_io_worker(struct btrfs_work *work)
1647{
1648 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1649 struct scrub_dev *sdev = sbio->sdev;
1650 int i;
1651
1652 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO);
1653 if (sbio->err) {
1654 for (i = 0; i < sbio->page_count; i++) {
1655 struct scrub_page *spage = sbio->pagev[i];
1656
1657 spage->io_error = 1;
1658 spage->sblock->no_io_error_seen = 0;
1659 }
1660 }
1661
1662
1663 for (i = 0; i < sbio->page_count; i++) {
1664 struct scrub_page *spage = sbio->pagev[i];
1665 struct scrub_block *sblock = spage->sblock;
1666
1667 if (atomic_dec_and_test(&sblock->outstanding_pages))
1668 scrub_block_complete(sblock);
1669 scrub_block_put(sblock);
1670 }
1671
1672 bio_put(sbio->bio);
1673 sbio->bio = NULL;
1674 spin_lock(&sdev->list_lock);
1675 sbio->next_free = sdev->first_free;
1676 sdev->first_free = sbio->index;
1677 spin_unlock(&sdev->list_lock);
1678 atomic_dec(&sdev->in_flight);
1679 wake_up(&sdev->list_wait);
1680}
1681
1682static void scrub_block_complete(struct scrub_block *sblock)
1683{
1684 if (!sblock->no_io_error_seen)
1685 scrub_handle_errored_block(sblock);
1686 else
1687 scrub_checksum(sblock);
1688}
1689
1690static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
1691 u8 *csum)
1692{
1693 struct btrfs_ordered_sum *sum = NULL;
1694 int ret = 0;
1695 unsigned long i;
1696 unsigned long num_sectors;
1697
1698 while (!list_empty(&sdev->csum_list)) {
1699 sum = list_first_entry(&sdev->csum_list,
1700 struct btrfs_ordered_sum, list);
1701 if (sum->bytenr > logical)
1702 return 0;
1703 if (sum->bytenr + sum->len > logical)
1704 break;
1705
1706 ++sdev->stat.csum_discards;
1707 list_del(&sum->list);
1708 kfree(sum);
1709 sum = NULL;
1710 }
1711 if (!sum)
1712 return 0;
1713
1714 num_sectors = sum->len / sdev->sectorsize;
1715 for (i = 0; i < num_sectors; ++i) {
1716 if (sum->sums[i].bytenr == logical) {
1717 memcpy(csum, &sum->sums[i].sum, sdev->csum_size);
1718 ret = 1;
1719 break;
1720 }
1721 }
1722 if (ret && i == num_sectors - 1) {
1723 list_del(&sum->list);
1724 kfree(sum);
1725 }
1726 return ret;
1727}
1728
1729
1730static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
1731 u64 physical, u64 flags, u64 gen, int mirror_num)
1732{
1733 int ret;
1734 u8 csum[BTRFS_CSUM_SIZE];
1735 u32 blocksize;
1736
1737 if (flags & BTRFS_EXTENT_FLAG_DATA) {
1738 blocksize = sdev->sectorsize;
1739 spin_lock(&sdev->stat_lock);
1740 sdev->stat.data_extents_scrubbed++;
1741 sdev->stat.data_bytes_scrubbed += len;
1742 spin_unlock(&sdev->stat_lock);
1743 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1744 BUG_ON(sdev->nodesize != sdev->leafsize);
1745 blocksize = sdev->nodesize;
1746 spin_lock(&sdev->stat_lock);
1747 sdev->stat.tree_extents_scrubbed++;
1748 sdev->stat.tree_bytes_scrubbed += len;
1749 spin_unlock(&sdev->stat_lock);
1750 } else {
1751 blocksize = sdev->sectorsize;
1752 BUG_ON(1);
1753 }
1754
1755 while (len) {
1756 u64 l = min_t(u64, len, blocksize);
1757 int have_csum = 0;
1758
1759 if (flags & BTRFS_EXTENT_FLAG_DATA) {
1760
1761 have_csum = scrub_find_csum(sdev, logical, l, csum);
1762 if (have_csum == 0)
1763 ++sdev->stat.no_csum;
1764 }
1765 ret = scrub_pages(sdev, logical, l, physical, flags, gen,
1766 mirror_num, have_csum ? csum : NULL, 0);
1767 if (ret)
1768 return ret;
1769 len -= l;
1770 logical += l;
1771 physical += l;
1772 }
1773 return 0;
1774}
1775
1776static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
1777 struct map_lookup *map, int num, u64 base, u64 length)
1778{
1779 struct btrfs_path *path;
1780 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
1781 struct btrfs_root *root = fs_info->extent_root;
1782 struct btrfs_root *csum_root = fs_info->csum_root;
1783 struct btrfs_extent_item *extent;
1784 struct blk_plug plug;
1785 u64 flags;
1786 int ret;
1787 int slot;
1788 int i;
1789 u64 nstripes;
1790 struct extent_buffer *l;
1791 struct btrfs_key key;
1792 u64 physical;
1793 u64 logical;
1794 u64 generation;
1795 int mirror_num;
1796 struct reada_control *reada1;
1797 struct reada_control *reada2;
1798 struct btrfs_key key_start;
1799 struct btrfs_key key_end;
1800
1801 u64 increment = map->stripe_len;
1802 u64 offset;
1803
1804 nstripes = length;
1805 offset = 0;
1806 do_div(nstripes, map->stripe_len);
1807 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
1808 offset = map->stripe_len * num;
1809 increment = map->stripe_len * map->num_stripes;
1810 mirror_num = 1;
1811 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1812 int factor = map->num_stripes / map->sub_stripes;
1813 offset = map->stripe_len * (num / map->sub_stripes);
1814 increment = map->stripe_len * factor;
1815 mirror_num = num % map->sub_stripes + 1;
1816 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
1817 increment = map->stripe_len;
1818 mirror_num = num % map->num_stripes + 1;
1819 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
1820 increment = map->stripe_len;
1821 mirror_num = num % map->num_stripes + 1;
1822 } else {
1823 increment = map->stripe_len;
1824 mirror_num = 1;
1825 }
1826
1827 path = btrfs_alloc_path();
1828 if (!path)
1829 return -ENOMEM;
1830
1831
1832
1833
1834
1835
1836 path->search_commit_root = 1;
1837 path->skip_locking = 1;
1838
1839
1840
1841
1842
1843
1844 logical = base + offset;
1845
1846 wait_event(sdev->list_wait,
1847 atomic_read(&sdev->in_flight) == 0);
1848 atomic_inc(&fs_info->scrubs_paused);
1849 wake_up(&fs_info->scrub_pause_wait);
1850
1851
1852 key_start.objectid = logical;
1853 key_start.type = BTRFS_EXTENT_ITEM_KEY;
1854 key_start.offset = (u64)0;
1855 key_end.objectid = base + offset + nstripes * increment;
1856 key_end.type = BTRFS_EXTENT_ITEM_KEY;
1857 key_end.offset = (u64)0;
1858 reada1 = btrfs_reada_add(root, &key_start, &key_end);
1859
1860 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1861 key_start.type = BTRFS_EXTENT_CSUM_KEY;
1862 key_start.offset = logical;
1863 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
1864 key_end.type = BTRFS_EXTENT_CSUM_KEY;
1865 key_end.offset = base + offset + nstripes * increment;
1866 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
1867
1868 if (!IS_ERR(reada1))
1869 btrfs_reada_wait(reada1);
1870 if (!IS_ERR(reada2))
1871 btrfs_reada_wait(reada2);
1872
1873 mutex_lock(&fs_info->scrub_lock);
1874 while (atomic_read(&fs_info->scrub_pause_req)) {
1875 mutex_unlock(&fs_info->scrub_lock);
1876 wait_event(fs_info->scrub_pause_wait,
1877 atomic_read(&fs_info->scrub_pause_req) == 0);
1878 mutex_lock(&fs_info->scrub_lock);
1879 }
1880 atomic_dec(&fs_info->scrubs_paused);
1881 mutex_unlock(&fs_info->scrub_lock);
1882 wake_up(&fs_info->scrub_pause_wait);
1883
1884
1885
1886
1887
1888 blk_start_plug(&plug);
1889
1890
1891
1892
1893 logical = base + offset;
1894 physical = map->stripes[num].physical;
1895 ret = 0;
1896 for (i = 0; i < nstripes; ++i) {
1897
1898
1899
1900 if (atomic_read(&fs_info->scrub_cancel_req) ||
1901 atomic_read(&sdev->cancel_req)) {
1902 ret = -ECANCELED;
1903 goto out;
1904 }
1905
1906
1907
1908 if (atomic_read(&fs_info->scrub_pause_req)) {
1909
1910 scrub_submit(sdev);
1911 wait_event(sdev->list_wait,
1912 atomic_read(&sdev->in_flight) == 0);
1913 atomic_inc(&fs_info->scrubs_paused);
1914 wake_up(&fs_info->scrub_pause_wait);
1915 mutex_lock(&fs_info->scrub_lock);
1916 while (atomic_read(&fs_info->scrub_pause_req)) {
1917 mutex_unlock(&fs_info->scrub_lock);
1918 wait_event(fs_info->scrub_pause_wait,
1919 atomic_read(&fs_info->scrub_pause_req) == 0);
1920 mutex_lock(&fs_info->scrub_lock);
1921 }
1922 atomic_dec(&fs_info->scrubs_paused);
1923 mutex_unlock(&fs_info->scrub_lock);
1924 wake_up(&fs_info->scrub_pause_wait);
1925 }
1926
1927 ret = btrfs_lookup_csums_range(csum_root, logical,
1928 logical + map->stripe_len - 1,
1929 &sdev->csum_list, 1);
1930 if (ret)
1931 goto out;
1932
1933 key.objectid = logical;
1934 key.type = BTRFS_EXTENT_ITEM_KEY;
1935 key.offset = (u64)0;
1936
1937 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1938 if (ret < 0)
1939 goto out;
1940 if (ret > 0) {
1941 ret = btrfs_previous_item(root, path, 0,
1942 BTRFS_EXTENT_ITEM_KEY);
1943 if (ret < 0)
1944 goto out;
1945 if (ret > 0) {
1946
1947
1948 btrfs_release_path(path);
1949 ret = btrfs_search_slot(NULL, root, &key,
1950 path, 0, 0);
1951 if (ret < 0)
1952 goto out;
1953 }
1954 }
1955
1956 while (1) {
1957 l = path->nodes[0];
1958 slot = path->slots[0];
1959 if (slot >= btrfs_header_nritems(l)) {
1960 ret = btrfs_next_leaf(root, path);
1961 if (ret == 0)
1962 continue;
1963 if (ret < 0)
1964 goto out;
1965
1966 break;
1967 }
1968 btrfs_item_key_to_cpu(l, &key, slot);
1969
1970 if (key.objectid + key.offset <= logical)
1971 goto next;
1972
1973 if (key.objectid >= logical + map->stripe_len)
1974 break;
1975
1976 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1977 goto next;
1978
1979 extent = btrfs_item_ptr(l, slot,
1980 struct btrfs_extent_item);
1981 flags = btrfs_extent_flags(l, extent);
1982 generation = btrfs_extent_generation(l, extent);
1983
1984 if (key.objectid < logical &&
1985 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
1986 printk(KERN_ERR
1987 "btrfs scrub: tree block %llu spanning "
1988 "stripes, ignored. logical=%llu\n",
1989 (unsigned long long)key.objectid,
1990 (unsigned long long)logical);
1991 goto next;
1992 }
1993
1994
1995
1996
1997 if (key.objectid < logical) {
1998 key.offset -= logical - key.objectid;
1999 key.objectid = logical;
2000 }
2001 if (key.objectid + key.offset >
2002 logical + map->stripe_len) {
2003 key.offset = logical + map->stripe_len -
2004 key.objectid;
2005 }
2006
2007 ret = scrub_extent(sdev, key.objectid, key.offset,
2008 key.objectid - logical + physical,
2009 flags, generation, mirror_num);
2010 if (ret)
2011 goto out;
2012
2013next:
2014 path->slots[0]++;
2015 }
2016 btrfs_release_path(path);
2017 logical += increment;
2018 physical += map->stripe_len;
2019 spin_lock(&sdev->stat_lock);
2020 sdev->stat.last_physical = physical;
2021 spin_unlock(&sdev->stat_lock);
2022 }
2023
2024 scrub_submit(sdev);
2025
2026out:
2027 blk_finish_plug(&plug);
2028 btrfs_free_path(path);
2029 return ret < 0 ? ret : 0;
2030}
2031
2032static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
2033 u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
2034 u64 dev_offset)
2035{
2036 struct btrfs_mapping_tree *map_tree =
2037 &sdev->dev->dev_root->fs_info->mapping_tree;
2038 struct map_lookup *map;
2039 struct extent_map *em;
2040 int i;
2041 int ret = -EINVAL;
2042
2043 read_lock(&map_tree->map_tree.lock);
2044 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2045 read_unlock(&map_tree->map_tree.lock);
2046
2047 if (!em)
2048 return -EINVAL;
2049
2050 map = (struct map_lookup *)em->bdev;
2051 if (em->start != chunk_offset)
2052 goto out;
2053
2054 if (em->len < length)
2055 goto out;
2056
2057 for (i = 0; i < map->num_stripes; ++i) {
2058 if (map->stripes[i].dev == sdev->dev &&
2059 map->stripes[i].physical == dev_offset) {
2060 ret = scrub_stripe(sdev, map, i, chunk_offset, length);
2061 if (ret)
2062 goto out;
2063 }
2064 }
2065out:
2066 free_extent_map(em);
2067
2068 return ret;
2069}
2070
2071static noinline_for_stack
2072int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
2073{
2074 struct btrfs_dev_extent *dev_extent = NULL;
2075 struct btrfs_path *path;
2076 struct btrfs_root *root = sdev->dev->dev_root;
2077 struct btrfs_fs_info *fs_info = root->fs_info;
2078 u64 length;
2079 u64 chunk_tree;
2080 u64 chunk_objectid;
2081 u64 chunk_offset;
2082 int ret;
2083 int slot;
2084 struct extent_buffer *l;
2085 struct btrfs_key key;
2086 struct btrfs_key found_key;
2087 struct btrfs_block_group_cache *cache;
2088
2089 path = btrfs_alloc_path();
2090 if (!path)
2091 return -ENOMEM;
2092
2093 path->reada = 2;
2094 path->search_commit_root = 1;
2095 path->skip_locking = 1;
2096
2097 key.objectid = sdev->dev->devid;
2098 key.offset = 0ull;
2099 key.type = BTRFS_DEV_EXTENT_KEY;
2100
2101
2102 while (1) {
2103 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2104 if (ret < 0)
2105 break;
2106 if (ret > 0) {
2107 if (path->slots[0] >=
2108 btrfs_header_nritems(path->nodes[0])) {
2109 ret = btrfs_next_leaf(root, path);
2110 if (ret)
2111 break;
2112 }
2113 }
2114
2115 l = path->nodes[0];
2116 slot = path->slots[0];
2117
2118 btrfs_item_key_to_cpu(l, &found_key, slot);
2119
2120 if (found_key.objectid != sdev->dev->devid)
2121 break;
2122
2123 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
2124 break;
2125
2126 if (found_key.offset >= end)
2127 break;
2128
2129 if (found_key.offset < key.offset)
2130 break;
2131
2132 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2133 length = btrfs_dev_extent_length(l, dev_extent);
2134
2135 if (found_key.offset + length <= start) {
2136 key.offset = found_key.offset + length;
2137 btrfs_release_path(path);
2138 continue;
2139 }
2140
2141 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2142 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2143 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2144
2145
2146
2147
2148
2149 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2150 if (!cache) {
2151 ret = -ENOENT;
2152 break;
2153 }
2154 ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
2155 chunk_offset, length, found_key.offset);
2156 btrfs_put_block_group(cache);
2157 if (ret)
2158 break;
2159
2160 key.offset = found_key.offset + length;
2161 btrfs_release_path(path);
2162 }
2163
2164 btrfs_free_path(path);
2165
2166
2167
2168
2169
2170 return ret < 0 ? ret : 0;
2171}
2172
2173static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
2174{
2175 int i;
2176 u64 bytenr;
2177 u64 gen;
2178 int ret;
2179 struct btrfs_device *device = sdev->dev;
2180 struct btrfs_root *root = device->dev_root;
2181
2182 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
2183 return -EIO;
2184
2185 gen = root->fs_info->last_trans_committed;
2186
2187 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2188 bytenr = btrfs_sb_offset(i);
2189 if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
2190 break;
2191
2192 ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
2193 BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1);
2194 if (ret)
2195 return ret;
2196 }
2197 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
2198
2199 return 0;
2200}
2201
2202
2203
2204
2205static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
2206{
2207 struct btrfs_fs_info *fs_info = root->fs_info;
2208 int ret = 0;
2209
2210 mutex_lock(&fs_info->scrub_lock);
2211 if (fs_info->scrub_workers_refcnt == 0) {
2212 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
2213 fs_info->thread_pool_size, &fs_info->generic_worker);
2214 fs_info->scrub_workers.idle_thresh = 4;
2215 ret = btrfs_start_workers(&fs_info->scrub_workers);
2216 if (ret)
2217 goto out;
2218 }
2219 ++fs_info->scrub_workers_refcnt;
2220out:
2221 mutex_unlock(&fs_info->scrub_lock);
2222
2223 return ret;
2224}
2225
2226static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
2227{
2228 struct btrfs_fs_info *fs_info = root->fs_info;
2229
2230 mutex_lock(&fs_info->scrub_lock);
2231 if (--fs_info->scrub_workers_refcnt == 0)
2232 btrfs_stop_workers(&fs_info->scrub_workers);
2233 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2234 mutex_unlock(&fs_info->scrub_lock);
2235}
2236
2237
2238int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
2239 struct btrfs_scrub_progress *progress, int readonly)
2240{
2241 struct scrub_dev *sdev;
2242 struct btrfs_fs_info *fs_info = root->fs_info;
2243 int ret;
2244 struct btrfs_device *dev;
2245
2246 if (btrfs_fs_closing(root->fs_info))
2247 return -EINVAL;
2248
2249
2250
2251
2252 if (root->nodesize != root->leafsize) {
2253 printk(KERN_ERR
2254 "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n",
2255 root->nodesize, root->leafsize);
2256 return -EINVAL;
2257 }
2258
2259 if (root->nodesize > BTRFS_STRIPE_LEN) {
2260
2261
2262
2263
2264
2265 printk(KERN_ERR
2266 "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n",
2267 root->nodesize, BTRFS_STRIPE_LEN);
2268 return -EINVAL;
2269 }
2270
2271 if (root->sectorsize != PAGE_SIZE) {
2272
2273 printk(KERN_ERR
2274 "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
2275 root->sectorsize, (unsigned long long)PAGE_SIZE);
2276 return -EINVAL;
2277 }
2278
2279 ret = scrub_workers_get(root);
2280 if (ret)
2281 return ret;
2282
2283 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2284 dev = btrfs_find_device(root, devid, NULL, NULL);
2285 if (!dev || dev->missing) {
2286 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2287 scrub_workers_put(root);
2288 return -ENODEV;
2289 }
2290 mutex_lock(&fs_info->scrub_lock);
2291
2292 if (!dev->in_fs_metadata) {
2293 mutex_unlock(&fs_info->scrub_lock);
2294 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2295 scrub_workers_put(root);
2296 return -ENODEV;
2297 }
2298
2299 if (dev->scrub_device) {
2300 mutex_unlock(&fs_info->scrub_lock);
2301 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2302 scrub_workers_put(root);
2303 return -EINPROGRESS;
2304 }
2305 sdev = scrub_setup_dev(dev);
2306 if (IS_ERR(sdev)) {
2307 mutex_unlock(&fs_info->scrub_lock);
2308 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2309 scrub_workers_put(root);
2310 return PTR_ERR(sdev);
2311 }
2312 sdev->readonly = readonly;
2313 dev->scrub_device = sdev;
2314
2315 atomic_inc(&fs_info->scrubs_running);
2316 mutex_unlock(&fs_info->scrub_lock);
2317 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2318
2319 down_read(&fs_info->scrub_super_lock);
2320 ret = scrub_supers(sdev);
2321 up_read(&fs_info->scrub_super_lock);
2322
2323 if (!ret)
2324 ret = scrub_enumerate_chunks(sdev, start, end);
2325
2326 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
2327 atomic_dec(&fs_info->scrubs_running);
2328 wake_up(&fs_info->scrub_pause_wait);
2329
2330 wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0);
2331
2332 if (progress)
2333 memcpy(progress, &sdev->stat, sizeof(*progress));
2334
2335 mutex_lock(&fs_info->scrub_lock);
2336 dev->scrub_device = NULL;
2337 mutex_unlock(&fs_info->scrub_lock);
2338
2339 scrub_free_dev(sdev);
2340 scrub_workers_put(root);
2341
2342 return ret;
2343}
2344
2345void btrfs_scrub_pause(struct btrfs_root *root)
2346{
2347 struct btrfs_fs_info *fs_info = root->fs_info;
2348
2349 mutex_lock(&fs_info->scrub_lock);
2350 atomic_inc(&fs_info->scrub_pause_req);
2351 while (atomic_read(&fs_info->scrubs_paused) !=
2352 atomic_read(&fs_info->scrubs_running)) {
2353 mutex_unlock(&fs_info->scrub_lock);
2354 wait_event(fs_info->scrub_pause_wait,
2355 atomic_read(&fs_info->scrubs_paused) ==
2356 atomic_read(&fs_info->scrubs_running));
2357 mutex_lock(&fs_info->scrub_lock);
2358 }
2359 mutex_unlock(&fs_info->scrub_lock);
2360}
2361
2362void btrfs_scrub_continue(struct btrfs_root *root)
2363{
2364 struct btrfs_fs_info *fs_info = root->fs_info;
2365
2366 atomic_dec(&fs_info->scrub_pause_req);
2367 wake_up(&fs_info->scrub_pause_wait);
2368}
2369
2370void btrfs_scrub_pause_super(struct btrfs_root *root)
2371{
2372 down_write(&root->fs_info->scrub_super_lock);
2373}
2374
2375void btrfs_scrub_continue_super(struct btrfs_root *root)
2376{
2377 up_write(&root->fs_info->scrub_super_lock);
2378}
2379
2380int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
2381{
2382
2383 mutex_lock(&fs_info->scrub_lock);
2384 if (!atomic_read(&fs_info->scrubs_running)) {
2385 mutex_unlock(&fs_info->scrub_lock);
2386 return -ENOTCONN;
2387 }
2388
2389 atomic_inc(&fs_info->scrub_cancel_req);
2390 while (atomic_read(&fs_info->scrubs_running)) {
2391 mutex_unlock(&fs_info->scrub_lock);
2392 wait_event(fs_info->scrub_pause_wait,
2393 atomic_read(&fs_info->scrubs_running) == 0);
2394 mutex_lock(&fs_info->scrub_lock);
2395 }
2396 atomic_dec(&fs_info->scrub_cancel_req);
2397 mutex_unlock(&fs_info->scrub_lock);
2398
2399 return 0;
2400}
2401
2402int btrfs_scrub_cancel(struct btrfs_root *root)
2403{
2404 return __btrfs_scrub_cancel(root->fs_info);
2405}
2406
2407int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev)
2408{
2409 struct btrfs_fs_info *fs_info = root->fs_info;
2410 struct scrub_dev *sdev;
2411
2412 mutex_lock(&fs_info->scrub_lock);
2413 sdev = dev->scrub_device;
2414 if (!sdev) {
2415 mutex_unlock(&fs_info->scrub_lock);
2416 return -ENOTCONN;
2417 }
2418 atomic_inc(&sdev->cancel_req);
2419 while (dev->scrub_device) {
2420 mutex_unlock(&fs_info->scrub_lock);
2421 wait_event(fs_info->scrub_pause_wait,
2422 dev->scrub_device == NULL);
2423 mutex_lock(&fs_info->scrub_lock);
2424 }
2425 mutex_unlock(&fs_info->scrub_lock);
2426
2427 return 0;
2428}
2429
2430int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
2431{
2432 struct btrfs_fs_info *fs_info = root->fs_info;
2433 struct btrfs_device *dev;
2434 int ret;
2435
2436
2437
2438
2439
2440 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2441 dev = btrfs_find_device(root, devid, NULL, NULL);
2442 if (!dev) {
2443 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2444 return -ENODEV;
2445 }
2446 ret = btrfs_scrub_cancel_dev(root, dev);
2447 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2448
2449 return ret;
2450}
2451
2452int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
2453 struct btrfs_scrub_progress *progress)
2454{
2455 struct btrfs_device *dev;
2456 struct scrub_dev *sdev = NULL;
2457
2458 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2459 dev = btrfs_find_device(root, devid, NULL, NULL);
2460 if (dev)
2461 sdev = dev->scrub_device;
2462 if (sdev)
2463 memcpy(progress, &sdev->stat, sizeof(*progress));
2464 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2465
2466 return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV;
2467}
2468