1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
21#include "ctree.h"
22#include "volumes.h"
23#include "disk-io.h"
24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
28#include "dev-replace.h"
29#include "check-integrity.h"
30#include "rcu-string.h"
31#include "raid56.h"
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46struct scrub_block;
47struct scrub_ctx;
48
49
50
51
52
53
54
55#define SCRUB_PAGES_PER_RD_BIO 32
56#define SCRUB_PAGES_PER_WR_BIO 32
57#define SCRUB_BIOS_PER_SCTX 64
58
59
60
61
62
63
64#define SCRUB_MAX_PAGES_PER_BLOCK 16
65
66struct scrub_page {
67 struct scrub_block *sblock;
68 struct page *page;
69 struct btrfs_device *dev;
70 u64 flags;
71 u64 generation;
72 u64 logical;
73 u64 physical;
74 u64 physical_for_dev_replace;
75 atomic_t ref_count;
76 struct {
77 unsigned int mirror_num:8;
78 unsigned int have_csum:1;
79 unsigned int io_error:1;
80 };
81 u8 csum[BTRFS_CSUM_SIZE];
82};
83
84struct scrub_bio {
85 int index;
86 struct scrub_ctx *sctx;
87 struct btrfs_device *dev;
88 struct bio *bio;
89 int err;
90 u64 logical;
91 u64 physical;
92#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO
93 struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO];
94#else
95 struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO];
96#endif
97 int page_count;
98 int next_free;
99 struct btrfs_work work;
100};
101
102struct scrub_block {
103 struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK];
104 int page_count;
105 atomic_t outstanding_pages;
106 atomic_t ref_count;
107 struct scrub_ctx *sctx;
108 struct {
109 unsigned int header_error:1;
110 unsigned int checksum_error:1;
111 unsigned int no_io_error_seen:1;
112 unsigned int generation_error:1;
113 };
114};
115
116struct scrub_wr_ctx {
117 struct scrub_bio *wr_curr_bio;
118 struct btrfs_device *tgtdev;
119 int pages_per_wr_bio;
120 atomic_t flush_all_writes;
121 struct mutex wr_lock;
122};
123
124struct scrub_ctx {
125 struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX];
126 struct btrfs_root *dev_root;
127 int first_free;
128 int curr;
129 atomic_t bios_in_flight;
130 atomic_t workers_pending;
131 spinlock_t list_lock;
132 wait_queue_head_t list_wait;
133 u16 csum_size;
134 struct list_head csum_list;
135 atomic_t cancel_req;
136 int readonly;
137 int pages_per_rd_bio;
138 u32 sectorsize;
139 u32 nodesize;
140 u32 leafsize;
141
142 int is_dev_replace;
143 struct scrub_wr_ctx wr_ctx;
144
145
146
147
148 struct btrfs_scrub_progress stat;
149 spinlock_t stat_lock;
150};
151
152struct scrub_fixup_nodatasum {
153 struct scrub_ctx *sctx;
154 struct btrfs_device *dev;
155 u64 logical;
156 struct btrfs_root *root;
157 struct btrfs_work work;
158 int mirror_num;
159};
160
161struct scrub_nocow_inode {
162 u64 inum;
163 u64 offset;
164 u64 root;
165 struct list_head list;
166};
167
168struct scrub_copy_nocow_ctx {
169 struct scrub_ctx *sctx;
170 u64 logical;
171 u64 len;
172 int mirror_num;
173 u64 physical_for_dev_replace;
174 struct list_head inodes;
175 struct btrfs_work work;
176};
177
178struct scrub_warning {
179 struct btrfs_path *path;
180 u64 extent_item_size;
181 char *scratch_buf;
182 char *msg_buf;
183 const char *errstr;
184 sector_t sector;
185 u64 logical;
186 struct btrfs_device *dev;
187 int msg_bufsize;
188 int scratch_bufsize;
189};
190
191
192static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
193static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
194static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
195static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
196static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
197static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
198 struct btrfs_fs_info *fs_info,
199 struct scrub_block *original_sblock,
200 u64 length, u64 logical,
201 struct scrub_block *sblocks_for_recheck);
202static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
203 struct scrub_block *sblock, int is_metadata,
204 int have_csum, u8 *csum, u64 generation,
205 u16 csum_size);
206static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
207 struct scrub_block *sblock,
208 int is_metadata, int have_csum,
209 const u8 *csum, u64 generation,
210 u16 csum_size);
211static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
212 struct scrub_block *sblock_good,
213 int force_write);
214static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
215 struct scrub_block *sblock_good,
216 int page_num, int force_write);
217static void scrub_write_block_to_dev_replace(struct scrub_block *sblock);
218static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
219 int page_num);
220static int scrub_checksum_data(struct scrub_block *sblock);
221static int scrub_checksum_tree_block(struct scrub_block *sblock);
222static int scrub_checksum_super(struct scrub_block *sblock);
223static void scrub_block_get(struct scrub_block *sblock);
224static void scrub_block_put(struct scrub_block *sblock);
225static void scrub_page_get(struct scrub_page *spage);
226static void scrub_page_put(struct scrub_page *spage);
227static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
228 struct scrub_page *spage);
229static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
230 u64 physical, struct btrfs_device *dev, u64 flags,
231 u64 gen, int mirror_num, u8 *csum, int force,
232 u64 physical_for_dev_replace);
233static void scrub_bio_end_io(struct bio *bio, int err);
234static void scrub_bio_end_io_worker(struct btrfs_work *work);
235static void scrub_block_complete(struct scrub_block *sblock);
236static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
237 u64 extent_logical, u64 extent_len,
238 u64 *extent_physical,
239 struct btrfs_device **extent_dev,
240 int *extent_mirror_num);
241static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
242 struct scrub_wr_ctx *wr_ctx,
243 struct btrfs_fs_info *fs_info,
244 struct btrfs_device *dev,
245 int is_dev_replace);
246static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx);
247static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
248 struct scrub_page *spage);
249static void scrub_wr_submit(struct scrub_ctx *sctx);
250static void scrub_wr_bio_end_io(struct bio *bio, int err);
251static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
252static int write_page_nocow(struct scrub_ctx *sctx,
253 u64 physical_for_dev_replace, struct page *page);
254static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
255 struct scrub_copy_nocow_ctx *ctx);
256static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
257 int mirror_num, u64 physical_for_dev_replace);
258static void copy_nocow_pages_worker(struct btrfs_work *work);
259static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
260static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
261
262
263static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
264{
265 atomic_inc(&sctx->bios_in_flight);
266}
267
268static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
269{
270 atomic_dec(&sctx->bios_in_flight);
271 wake_up(&sctx->list_wait);
272}
273
274static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
275{
276 while (atomic_read(&fs_info->scrub_pause_req)) {
277 mutex_unlock(&fs_info->scrub_lock);
278 wait_event(fs_info->scrub_pause_wait,
279 atomic_read(&fs_info->scrub_pause_req) == 0);
280 mutex_lock(&fs_info->scrub_lock);
281 }
282}
283
284static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
285{
286 atomic_inc(&fs_info->scrubs_paused);
287 wake_up(&fs_info->scrub_pause_wait);
288
289 mutex_lock(&fs_info->scrub_lock);
290 __scrub_blocked_if_needed(fs_info);
291 atomic_dec(&fs_info->scrubs_paused);
292 mutex_unlock(&fs_info->scrub_lock);
293
294 wake_up(&fs_info->scrub_pause_wait);
295}
296
297
298
299
300
301static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
302{
303 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
304
305
306
307
308
309
310
311
312
313
314 mutex_lock(&fs_info->scrub_lock);
315 atomic_inc(&fs_info->scrubs_running);
316 atomic_inc(&fs_info->scrubs_paused);
317 mutex_unlock(&fs_info->scrub_lock);
318
319
320
321
322
323
324
325
326 wake_up(&fs_info->scrub_pause_wait);
327
328 atomic_inc(&sctx->workers_pending);
329}
330
331
332static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
333{
334 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
335
336
337
338
339
340 mutex_lock(&fs_info->scrub_lock);
341 atomic_dec(&fs_info->scrubs_running);
342 atomic_dec(&fs_info->scrubs_paused);
343 mutex_unlock(&fs_info->scrub_lock);
344 atomic_dec(&sctx->workers_pending);
345 wake_up(&fs_info->scrub_pause_wait);
346 wake_up(&sctx->list_wait);
347}
348
349static void scrub_free_csums(struct scrub_ctx *sctx)
350{
351 while (!list_empty(&sctx->csum_list)) {
352 struct btrfs_ordered_sum *sum;
353 sum = list_first_entry(&sctx->csum_list,
354 struct btrfs_ordered_sum, list);
355 list_del(&sum->list);
356 kfree(sum);
357 }
358}
359
360static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
361{
362 int i;
363
364 if (!sctx)
365 return;
366
367 scrub_free_wr_ctx(&sctx->wr_ctx);
368
369
370 if (sctx->curr != -1) {
371 struct scrub_bio *sbio = sctx->bios[sctx->curr];
372
373 for (i = 0; i < sbio->page_count; i++) {
374 WARN_ON(!sbio->pagev[i]->page);
375 scrub_block_put(sbio->pagev[i]->sblock);
376 }
377 bio_put(sbio->bio);
378 }
379
380 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
381 struct scrub_bio *sbio = sctx->bios[i];
382
383 if (!sbio)
384 break;
385 kfree(sbio);
386 }
387
388 scrub_free_csums(sctx);
389 kfree(sctx);
390}
391
392static noinline_for_stack
393struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
394{
395 struct scrub_ctx *sctx;
396 int i;
397 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
398 int pages_per_rd_bio;
399 int ret;
400
401
402
403
404
405
406
407
408 if (dev->bdev)
409 pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO,
410 bio_get_nr_vecs(dev->bdev));
411 else
412 pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO;
413 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
414 if (!sctx)
415 goto nomem;
416 sctx->is_dev_replace = is_dev_replace;
417 sctx->pages_per_rd_bio = pages_per_rd_bio;
418 sctx->curr = -1;
419 sctx->dev_root = dev->dev_root;
420 for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
421 struct scrub_bio *sbio;
422
423 sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
424 if (!sbio)
425 goto nomem;
426 sctx->bios[i] = sbio;
427
428 sbio->index = i;
429 sbio->sctx = sctx;
430 sbio->page_count = 0;
431 btrfs_init_work(&sbio->work, scrub_bio_end_io_worker,
432 NULL, NULL);
433
434 if (i != SCRUB_BIOS_PER_SCTX - 1)
435 sctx->bios[i]->next_free = i + 1;
436 else
437 sctx->bios[i]->next_free = -1;
438 }
439 sctx->first_free = 0;
440 sctx->nodesize = dev->dev_root->nodesize;
441 sctx->leafsize = dev->dev_root->leafsize;
442 sctx->sectorsize = dev->dev_root->sectorsize;
443 atomic_set(&sctx->bios_in_flight, 0);
444 atomic_set(&sctx->workers_pending, 0);
445 atomic_set(&sctx->cancel_req, 0);
446 sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
447 INIT_LIST_HEAD(&sctx->csum_list);
448
449 spin_lock_init(&sctx->list_lock);
450 spin_lock_init(&sctx->stat_lock);
451 init_waitqueue_head(&sctx->list_wait);
452
453 ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info,
454 fs_info->dev_replace.tgtdev, is_dev_replace);
455 if (ret) {
456 scrub_free_ctx(sctx);
457 return ERR_PTR(ret);
458 }
459 return sctx;
460
461nomem:
462 scrub_free_ctx(sctx);
463 return ERR_PTR(-ENOMEM);
464}
465
466static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
467 void *warn_ctx)
468{
469 u64 isize;
470 u32 nlink;
471 int ret;
472 int i;
473 struct extent_buffer *eb;
474 struct btrfs_inode_item *inode_item;
475 struct scrub_warning *swarn = warn_ctx;
476 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
477 struct inode_fs_paths *ipath = NULL;
478 struct btrfs_root *local_root;
479 struct btrfs_key root_key;
480
481 root_key.objectid = root;
482 root_key.type = BTRFS_ROOT_ITEM_KEY;
483 root_key.offset = (u64)-1;
484 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
485 if (IS_ERR(local_root)) {
486 ret = PTR_ERR(local_root);
487 goto err;
488 }
489
490 ret = inode_item_info(inum, 0, local_root, swarn->path);
491 if (ret) {
492 btrfs_release_path(swarn->path);
493 goto err;
494 }
495
496 eb = swarn->path->nodes[0];
497 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
498 struct btrfs_inode_item);
499 isize = btrfs_inode_size(eb, inode_item);
500 nlink = btrfs_inode_nlink(eb, inode_item);
501 btrfs_release_path(swarn->path);
502
503 ipath = init_ipath(4096, local_root, swarn->path);
504 if (IS_ERR(ipath)) {
505 ret = PTR_ERR(ipath);
506 ipath = NULL;
507 goto err;
508 }
509 ret = paths_from_inode(inum, ipath);
510
511 if (ret < 0)
512 goto err;
513
514
515
516
517
518 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
519 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
520 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
521 "length %llu, links %u (path: %s)\n", swarn->errstr,
522 swarn->logical, rcu_str_deref(swarn->dev->name),
523 (unsigned long long)swarn->sector, root, inum, offset,
524 min(isize - offset, (u64)PAGE_SIZE), nlink,
525 (char *)(unsigned long)ipath->fspath->val[i]);
526
527 free_ipath(ipath);
528 return 0;
529
530err:
531 printk_in_rcu(KERN_WARNING "BTRFS: %s at logical %llu on dev "
532 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
533 "resolving failed with ret=%d\n", swarn->errstr,
534 swarn->logical, rcu_str_deref(swarn->dev->name),
535 (unsigned long long)swarn->sector, root, inum, offset, ret);
536
537 free_ipath(ipath);
538 return 0;
539}
540
541static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
542{
543 struct btrfs_device *dev;
544 struct btrfs_fs_info *fs_info;
545 struct btrfs_path *path;
546 struct btrfs_key found_key;
547 struct extent_buffer *eb;
548 struct btrfs_extent_item *ei;
549 struct scrub_warning swarn;
550 unsigned long ptr = 0;
551 u64 extent_item_pos;
552 u64 flags = 0;
553 u64 ref_root;
554 u32 item_size;
555 u8 ref_level;
556 const int bufsize = 4096;
557 int ret;
558
559 WARN_ON(sblock->page_count < 1);
560 dev = sblock->pagev[0]->dev;
561 fs_info = sblock->sctx->dev_root->fs_info;
562
563 path = btrfs_alloc_path();
564
565 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
566 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
567 swarn.sector = (sblock->pagev[0]->physical) >> 9;
568 swarn.logical = sblock->pagev[0]->logical;
569 swarn.errstr = errstr;
570 swarn.dev = NULL;
571 swarn.msg_bufsize = bufsize;
572 swarn.scratch_bufsize = bufsize;
573
574 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
575 goto out;
576
577 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
578 &flags);
579 if (ret < 0)
580 goto out;
581
582 extent_item_pos = swarn.logical - found_key.objectid;
583 swarn.extent_item_size = found_key.offset;
584
585 eb = path->nodes[0];
586 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
587 item_size = btrfs_item_size_nr(eb, path->slots[0]);
588
589 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
590 do {
591 ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
592 item_size, &ref_root,
593 &ref_level);
594 printk_in_rcu(KERN_WARNING
595 "BTRFS: %s at logical %llu on dev %s, "
596 "sector %llu: metadata %s (level %d) in tree "
597 "%llu\n", errstr, swarn.logical,
598 rcu_str_deref(dev->name),
599 (unsigned long long)swarn.sector,
600 ref_level ? "node" : "leaf",
601 ret < 0 ? -1 : ref_level,
602 ret < 0 ? -1 : ref_root);
603 } while (ret != 1);
604 btrfs_release_path(path);
605 } else {
606 btrfs_release_path(path);
607 swarn.path = path;
608 swarn.dev = dev;
609 iterate_extent_inodes(fs_info, found_key.objectid,
610 extent_item_pos, 1,
611 scrub_print_warning_inode, &swarn);
612 }
613
614out:
615 btrfs_free_path(path);
616 kfree(swarn.scratch_buf);
617 kfree(swarn.msg_buf);
618}
619
620static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
621{
622 struct page *page = NULL;
623 unsigned long index;
624 struct scrub_fixup_nodatasum *fixup = fixup_ctx;
625 int ret;
626 int corrected = 0;
627 struct btrfs_key key;
628 struct inode *inode = NULL;
629 struct btrfs_fs_info *fs_info;
630 u64 end = offset + PAGE_SIZE - 1;
631 struct btrfs_root *local_root;
632 int srcu_index;
633
634 key.objectid = root;
635 key.type = BTRFS_ROOT_ITEM_KEY;
636 key.offset = (u64)-1;
637
638 fs_info = fixup->root->fs_info;
639 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
640
641 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
642 if (IS_ERR(local_root)) {
643 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
644 return PTR_ERR(local_root);
645 }
646
647 key.type = BTRFS_INODE_ITEM_KEY;
648 key.objectid = inum;
649 key.offset = 0;
650 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
651 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
652 if (IS_ERR(inode))
653 return PTR_ERR(inode);
654
655 index = offset >> PAGE_CACHE_SHIFT;
656
657 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
658 if (!page) {
659 ret = -ENOMEM;
660 goto out;
661 }
662
663 if (PageUptodate(page)) {
664 if (PageDirty(page)) {
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681 ret = -EIO;
682 goto out;
683 }
684 fs_info = BTRFS_I(inode)->root->fs_info;
685 ret = repair_io_failure(fs_info, offset, PAGE_SIZE,
686 fixup->logical, page,
687 fixup->mirror_num);
688 unlock_page(page);
689 corrected = !ret;
690 } else {
691
692
693
694
695
696 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
697 EXTENT_DAMAGED, GFP_NOFS);
698 if (ret) {
699
700 WARN_ON(ret > 0);
701 if (ret > 0)
702 ret = -EFAULT;
703 goto out;
704 }
705
706 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
707 btrfs_get_extent,
708 fixup->mirror_num);
709 wait_on_page_locked(page);
710
711 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
712 end, EXTENT_DAMAGED, 0, NULL);
713 if (!corrected)
714 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
715 EXTENT_DAMAGED, GFP_NOFS);
716 }
717
718out:
719 if (page)
720 put_page(page);
721
722 iput(inode);
723
724 if (ret < 0)
725 return ret;
726
727 if (ret == 0 && corrected) {
728
729
730
731
732 return 1;
733 }
734
735 return -EIO;
736}
737
738static void scrub_fixup_nodatasum(struct btrfs_work *work)
739{
740 int ret;
741 struct scrub_fixup_nodatasum *fixup;
742 struct scrub_ctx *sctx;
743 struct btrfs_trans_handle *trans = NULL;
744 struct btrfs_path *path;
745 int uncorrectable = 0;
746
747 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
748 sctx = fixup->sctx;
749
750 path = btrfs_alloc_path();
751 if (!path) {
752 spin_lock(&sctx->stat_lock);
753 ++sctx->stat.malloc_errors;
754 spin_unlock(&sctx->stat_lock);
755 uncorrectable = 1;
756 goto out;
757 }
758
759 trans = btrfs_join_transaction(fixup->root);
760 if (IS_ERR(trans)) {
761 uncorrectable = 1;
762 goto out;
763 }
764
765
766
767
768
769
770
771
772
773
774 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
775 path, scrub_fixup_readpage,
776 fixup);
777 if (ret < 0) {
778 uncorrectable = 1;
779 goto out;
780 }
781 WARN_ON(ret != 1);
782
783 spin_lock(&sctx->stat_lock);
784 ++sctx->stat.corrected_errors;
785 spin_unlock(&sctx->stat_lock);
786
787out:
788 if (trans && !IS_ERR(trans))
789 btrfs_end_transaction(trans, fixup->root);
790 if (uncorrectable) {
791 spin_lock(&sctx->stat_lock);
792 ++sctx->stat.uncorrectable_errors;
793 spin_unlock(&sctx->stat_lock);
794 btrfs_dev_replace_stats_inc(
795 &sctx->dev_root->fs_info->dev_replace.
796 num_uncorrectable_read_errors);
797 printk_ratelimited_in_rcu(KERN_ERR "BTRFS: "
798 "unable to fixup (nodatasum) error at logical %llu on dev %s\n",
799 fixup->logical, rcu_str_deref(fixup->dev->name));
800 }
801
802 btrfs_free_path(path);
803 kfree(fixup);
804
805 scrub_pending_trans_workers_dec(sctx);
806}
807
808
809
810
811
812
813
814
815
816static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
817{
818 struct scrub_ctx *sctx = sblock_to_check->sctx;
819 struct btrfs_device *dev;
820 struct btrfs_fs_info *fs_info;
821 u64 length;
822 u64 logical;
823 u64 generation;
824 unsigned int failed_mirror_index;
825 unsigned int is_metadata;
826 unsigned int have_csum;
827 u8 *csum;
828 struct scrub_block *sblocks_for_recheck;
829 struct scrub_block *sblock_bad;
830 int ret;
831 int mirror_index;
832 int page_num;
833 int success;
834 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
835 DEFAULT_RATELIMIT_BURST);
836
837 BUG_ON(sblock_to_check->page_count < 1);
838 fs_info = sctx->dev_root->fs_info;
839 if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
840
841
842
843
844
845 spin_lock(&sctx->stat_lock);
846 ++sctx->stat.super_errors;
847 spin_unlock(&sctx->stat_lock);
848 return 0;
849 }
850 length = sblock_to_check->page_count * PAGE_SIZE;
851 logical = sblock_to_check->pagev[0]->logical;
852 generation = sblock_to_check->pagev[0]->generation;
853 BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1);
854 failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1;
855 is_metadata = !(sblock_to_check->pagev[0]->flags &
856 BTRFS_EXTENT_FLAG_DATA);
857 have_csum = sblock_to_check->pagev[0]->have_csum;
858 csum = sblock_to_check->pagev[0]->csum;
859 dev = sblock_to_check->pagev[0]->dev;
860
861 if (sctx->is_dev_replace && !is_metadata && !have_csum) {
862 sblocks_for_recheck = NULL;
863 goto nodatasum_case;
864 }
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895 sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
896 sizeof(*sblocks_for_recheck),
897 GFP_NOFS);
898 if (!sblocks_for_recheck) {
899 spin_lock(&sctx->stat_lock);
900 sctx->stat.malloc_errors++;
901 sctx->stat.read_errors++;
902 sctx->stat.uncorrectable_errors++;
903 spin_unlock(&sctx->stat_lock);
904 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
905 goto out;
906 }
907
908
909 ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length,
910 logical, sblocks_for_recheck);
911 if (ret) {
912 spin_lock(&sctx->stat_lock);
913 sctx->stat.read_errors++;
914 sctx->stat.uncorrectable_errors++;
915 spin_unlock(&sctx->stat_lock);
916 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
917 goto out;
918 }
919 BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS);
920 sblock_bad = sblocks_for_recheck + failed_mirror_index;
921
922
923 scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
924 csum, generation, sctx->csum_size);
925
926 if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
927 sblock_bad->no_io_error_seen) {
928
929
930
931
932
933
934
935
936 spin_lock(&sctx->stat_lock);
937 sctx->stat.unverified_errors++;
938 spin_unlock(&sctx->stat_lock);
939
940 if (sctx->is_dev_replace)
941 scrub_write_block_to_dev_replace(sblock_bad);
942 goto out;
943 }
944
945 if (!sblock_bad->no_io_error_seen) {
946 spin_lock(&sctx->stat_lock);
947 sctx->stat.read_errors++;
948 spin_unlock(&sctx->stat_lock);
949 if (__ratelimit(&_rs))
950 scrub_print_warning("i/o error", sblock_to_check);
951 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
952 } else if (sblock_bad->checksum_error) {
953 spin_lock(&sctx->stat_lock);
954 sctx->stat.csum_errors++;
955 spin_unlock(&sctx->stat_lock);
956 if (__ratelimit(&_rs))
957 scrub_print_warning("checksum error", sblock_to_check);
958 btrfs_dev_stat_inc_and_print(dev,
959 BTRFS_DEV_STAT_CORRUPTION_ERRS);
960 } else if (sblock_bad->header_error) {
961 spin_lock(&sctx->stat_lock);
962 sctx->stat.verify_errors++;
963 spin_unlock(&sctx->stat_lock);
964 if (__ratelimit(&_rs))
965 scrub_print_warning("checksum/header error",
966 sblock_to_check);
967 if (sblock_bad->generation_error)
968 btrfs_dev_stat_inc_and_print(dev,
969 BTRFS_DEV_STAT_GENERATION_ERRS);
970 else
971 btrfs_dev_stat_inc_and_print(dev,
972 BTRFS_DEV_STAT_CORRUPTION_ERRS);
973 }
974
975 if (sctx->readonly) {
976 ASSERT(!sctx->is_dev_replace);
977 goto out;
978 }
979
980 if (!is_metadata && !have_csum) {
981 struct scrub_fixup_nodatasum *fixup_nodatasum;
982
983nodatasum_case:
984 WARN_ON(sctx->is_dev_replace);
985
986
987
988
989
990
991
992
993 fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
994 if (!fixup_nodatasum)
995 goto did_not_correct_error;
996 fixup_nodatasum->sctx = sctx;
997 fixup_nodatasum->dev = dev;
998 fixup_nodatasum->logical = logical;
999 fixup_nodatasum->root = fs_info->extent_root;
1000 fixup_nodatasum->mirror_num = failed_mirror_index + 1;
1001 scrub_pending_trans_workers_inc(sctx);
1002 btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum,
1003 NULL, NULL);
1004 btrfs_queue_work(fs_info->scrub_workers,
1005 &fixup_nodatasum->work);
1006 goto out;
1007 }
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024 for (mirror_index = 0;
1025 mirror_index < BTRFS_MAX_MIRRORS &&
1026 sblocks_for_recheck[mirror_index].page_count > 0;
1027 mirror_index++) {
1028 struct scrub_block *sblock_other;
1029
1030 if (mirror_index == failed_mirror_index)
1031 continue;
1032 sblock_other = sblocks_for_recheck + mirror_index;
1033
1034
1035 scrub_recheck_block(fs_info, sblock_other, is_metadata,
1036 have_csum, csum, generation,
1037 sctx->csum_size);
1038
1039 if (!sblock_other->header_error &&
1040 !sblock_other->checksum_error &&
1041 sblock_other->no_io_error_seen) {
1042 if (sctx->is_dev_replace) {
1043 scrub_write_block_to_dev_replace(sblock_other);
1044 } else {
1045 int force_write = is_metadata || have_csum;
1046
1047 ret = scrub_repair_block_from_good_copy(
1048 sblock_bad, sblock_other,
1049 force_write);
1050 }
1051 if (0 == ret)
1052 goto corrected_error;
1053 }
1054 }
1055
1056
1057
1058
1059 if (sctx->is_dev_replace) {
1060 success = 1;
1061 for (page_num = 0; page_num < sblock_bad->page_count;
1062 page_num++) {
1063 int sub_success;
1064
1065 sub_success = 0;
1066 for (mirror_index = 0;
1067 mirror_index < BTRFS_MAX_MIRRORS &&
1068 sblocks_for_recheck[mirror_index].page_count > 0;
1069 mirror_index++) {
1070 struct scrub_block *sblock_other =
1071 sblocks_for_recheck + mirror_index;
1072 struct scrub_page *page_other =
1073 sblock_other->pagev[page_num];
1074
1075 if (!page_other->io_error) {
1076 ret = scrub_write_page_to_dev_replace(
1077 sblock_other, page_num);
1078 if (ret == 0) {
1079
1080 sub_success = 1;
1081 break;
1082 } else {
1083 btrfs_dev_replace_stats_inc(
1084 &sctx->dev_root->
1085 fs_info->dev_replace.
1086 num_write_errors);
1087 }
1088 }
1089 }
1090
1091 if (!sub_success) {
1092
1093
1094
1095
1096
1097
1098
1099 success = 0;
1100 ret = scrub_write_page_to_dev_replace(
1101 sblock_bad, page_num);
1102 if (ret)
1103 btrfs_dev_replace_stats_inc(
1104 &sctx->dev_root->fs_info->
1105 dev_replace.num_write_errors);
1106 }
1107 }
1108
1109 goto out;
1110 }
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139 if (sblock_bad->no_io_error_seen)
1140 goto did_not_correct_error;
1141
1142 success = 1;
1143 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1144 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1145
1146 if (!page_bad->io_error)
1147 continue;
1148
1149 for (mirror_index = 0;
1150 mirror_index < BTRFS_MAX_MIRRORS &&
1151 sblocks_for_recheck[mirror_index].page_count > 0;
1152 mirror_index++) {
1153 struct scrub_block *sblock_other = sblocks_for_recheck +
1154 mirror_index;
1155 struct scrub_page *page_other = sblock_other->pagev[
1156 page_num];
1157
1158 if (!page_other->io_error) {
1159 ret = scrub_repair_page_from_good_copy(
1160 sblock_bad, sblock_other, page_num, 0);
1161 if (0 == ret) {
1162 page_bad->io_error = 0;
1163 break;
1164 }
1165 }
1166 }
1167
1168 if (page_bad->io_error) {
1169
1170 success = 0;
1171 }
1172 }
1173
1174 if (success) {
1175 if (is_metadata || have_csum) {
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185 scrub_recheck_block(fs_info, sblock_bad,
1186 is_metadata, have_csum, csum,
1187 generation, sctx->csum_size);
1188 if (!sblock_bad->header_error &&
1189 !sblock_bad->checksum_error &&
1190 sblock_bad->no_io_error_seen)
1191 goto corrected_error;
1192 else
1193 goto did_not_correct_error;
1194 } else {
1195corrected_error:
1196 spin_lock(&sctx->stat_lock);
1197 sctx->stat.corrected_errors++;
1198 spin_unlock(&sctx->stat_lock);
1199 printk_ratelimited_in_rcu(KERN_ERR
1200 "BTRFS: fixed up error at logical %llu on dev %s\n",
1201 logical, rcu_str_deref(dev->name));
1202 }
1203 } else {
1204did_not_correct_error:
1205 spin_lock(&sctx->stat_lock);
1206 sctx->stat.uncorrectable_errors++;
1207 spin_unlock(&sctx->stat_lock);
1208 printk_ratelimited_in_rcu(KERN_ERR
1209 "BTRFS: unable to fixup (regular) error at logical %llu on dev %s\n",
1210 logical, rcu_str_deref(dev->name));
1211 }
1212
1213out:
1214 if (sblocks_for_recheck) {
1215 for (mirror_index = 0; mirror_index < BTRFS_MAX_MIRRORS;
1216 mirror_index++) {
1217 struct scrub_block *sblock = sblocks_for_recheck +
1218 mirror_index;
1219 int page_index;
1220
1221 for (page_index = 0; page_index < sblock->page_count;
1222 page_index++) {
1223 sblock->pagev[page_index]->sblock = NULL;
1224 scrub_page_put(sblock->pagev[page_index]);
1225 }
1226 }
1227 kfree(sblocks_for_recheck);
1228 }
1229
1230 return 0;
1231}
1232
1233static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
1234 struct btrfs_fs_info *fs_info,
1235 struct scrub_block *original_sblock,
1236 u64 length, u64 logical,
1237 struct scrub_block *sblocks_for_recheck)
1238{
1239 int page_index;
1240 int mirror_index;
1241 int ret;
1242
1243
1244
1245
1246
1247
1248
1249 page_index = 0;
1250 while (length > 0) {
1251 u64 sublen = min_t(u64, length, PAGE_SIZE);
1252 u64 mapped_length = sublen;
1253 struct btrfs_bio *bbio = NULL;
1254
1255
1256
1257
1258
1259 ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
1260 &mapped_length, &bbio, 0);
1261 if (ret || !bbio || mapped_length < sublen) {
1262 kfree(bbio);
1263 return -EIO;
1264 }
1265
1266 BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
1267 for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
1268 mirror_index++) {
1269 struct scrub_block *sblock;
1270 struct scrub_page *page;
1271
1272 if (mirror_index >= BTRFS_MAX_MIRRORS)
1273 continue;
1274
1275 sblock = sblocks_for_recheck + mirror_index;
1276 sblock->sctx = sctx;
1277 page = kzalloc(sizeof(*page), GFP_NOFS);
1278 if (!page) {
1279leave_nomem:
1280 spin_lock(&sctx->stat_lock);
1281 sctx->stat.malloc_errors++;
1282 spin_unlock(&sctx->stat_lock);
1283 kfree(bbio);
1284 return -ENOMEM;
1285 }
1286 scrub_page_get(page);
1287 sblock->pagev[page_index] = page;
1288 page->logical = logical;
1289 page->physical = bbio->stripes[mirror_index].physical;
1290 BUG_ON(page_index >= original_sblock->page_count);
1291 page->physical_for_dev_replace =
1292 original_sblock->pagev[page_index]->
1293 physical_for_dev_replace;
1294
1295 page->dev = bbio->stripes[mirror_index].dev;
1296 page->mirror_num = mirror_index + 1;
1297 sblock->page_count++;
1298 page->page = alloc_page(GFP_NOFS);
1299 if (!page->page)
1300 goto leave_nomem;
1301 }
1302 kfree(bbio);
1303 length -= sublen;
1304 logical += sublen;
1305 page_index++;
1306 }
1307
1308 return 0;
1309}
1310
1311
1312
1313
1314
1315
1316
1317
1318static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
1319 struct scrub_block *sblock, int is_metadata,
1320 int have_csum, u8 *csum, u64 generation,
1321 u16 csum_size)
1322{
1323 int page_num;
1324
1325 sblock->no_io_error_seen = 1;
1326 sblock->header_error = 0;
1327 sblock->checksum_error = 0;
1328
1329 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1330 struct bio *bio;
1331 struct scrub_page *page = sblock->pagev[page_num];
1332
1333 if (page->dev->bdev == NULL) {
1334 page->io_error = 1;
1335 sblock->no_io_error_seen = 0;
1336 continue;
1337 }
1338
1339 WARN_ON(!page->page);
1340 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1341 if (!bio) {
1342 page->io_error = 1;
1343 sblock->no_io_error_seen = 0;
1344 continue;
1345 }
1346 bio->bi_bdev = page->dev->bdev;
1347 bio->bi_iter.bi_sector = page->physical >> 9;
1348
1349 bio_add_page(bio, page->page, PAGE_SIZE, 0);
1350 if (btrfsic_submit_bio_wait(READ, bio))
1351 sblock->no_io_error_seen = 0;
1352
1353 bio_put(bio);
1354 }
1355
1356 if (sblock->no_io_error_seen)
1357 scrub_recheck_block_checksum(fs_info, sblock, is_metadata,
1358 have_csum, csum, generation,
1359 csum_size);
1360
1361 return;
1362}
1363
1364static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
1365 struct scrub_block *sblock,
1366 int is_metadata, int have_csum,
1367 const u8 *csum, u64 generation,
1368 u16 csum_size)
1369{
1370 int page_num;
1371 u8 calculated_csum[BTRFS_CSUM_SIZE];
1372 u32 crc = ~(u32)0;
1373 void *mapped_buffer;
1374
1375 WARN_ON(!sblock->pagev[0]->page);
1376 if (is_metadata) {
1377 struct btrfs_header *h;
1378
1379 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1380 h = (struct btrfs_header *)mapped_buffer;
1381
1382 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
1383 memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
1384 memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1385 BTRFS_UUID_SIZE)) {
1386 sblock->header_error = 1;
1387 } else if (generation != btrfs_stack_header_generation(h)) {
1388 sblock->header_error = 1;
1389 sblock->generation_error = 1;
1390 }
1391 csum = h->csum;
1392 } else {
1393 if (!have_csum)
1394 return;
1395
1396 mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
1397 }
1398
1399 for (page_num = 0;;) {
1400 if (page_num == 0 && is_metadata)
1401 crc = btrfs_csum_data(
1402 ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
1403 crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
1404 else
1405 crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
1406
1407 kunmap_atomic(mapped_buffer);
1408 page_num++;
1409 if (page_num >= sblock->page_count)
1410 break;
1411 WARN_ON(!sblock->pagev[page_num]->page);
1412
1413 mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page);
1414 }
1415
1416 btrfs_csum_final(crc, calculated_csum);
1417 if (memcmp(calculated_csum, csum, csum_size))
1418 sblock->checksum_error = 1;
1419}
1420
1421static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad,
1422 struct scrub_block *sblock_good,
1423 int force_write)
1424{
1425 int page_num;
1426 int ret = 0;
1427
1428 for (page_num = 0; page_num < sblock_bad->page_count; page_num++) {
1429 int ret_sub;
1430
1431 ret_sub = scrub_repair_page_from_good_copy(sblock_bad,
1432 sblock_good,
1433 page_num,
1434 force_write);
1435 if (ret_sub)
1436 ret = ret_sub;
1437 }
1438
1439 return ret;
1440}
1441
1442static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
1443 struct scrub_block *sblock_good,
1444 int page_num, int force_write)
1445{
1446 struct scrub_page *page_bad = sblock_bad->pagev[page_num];
1447 struct scrub_page *page_good = sblock_good->pagev[page_num];
1448
1449 BUG_ON(page_bad->page == NULL);
1450 BUG_ON(page_good->page == NULL);
1451 if (force_write || sblock_bad->header_error ||
1452 sblock_bad->checksum_error || page_bad->io_error) {
1453 struct bio *bio;
1454 int ret;
1455
1456 if (!page_bad->dev->bdev) {
1457 printk_ratelimited(KERN_WARNING "BTRFS: "
1458 "scrub_repair_page_from_good_copy(bdev == NULL) "
1459 "is unexpected!\n");
1460 return -EIO;
1461 }
1462
1463 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
1464 if (!bio)
1465 return -EIO;
1466 bio->bi_bdev = page_bad->dev->bdev;
1467 bio->bi_iter.bi_sector = page_bad->physical >> 9;
1468
1469 ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
1470 if (PAGE_SIZE != ret) {
1471 bio_put(bio);
1472 return -EIO;
1473 }
1474
1475 if (btrfsic_submit_bio_wait(WRITE, bio)) {
1476 btrfs_dev_stat_inc_and_print(page_bad->dev,
1477 BTRFS_DEV_STAT_WRITE_ERRS);
1478 btrfs_dev_replace_stats_inc(
1479 &sblock_bad->sctx->dev_root->fs_info->
1480 dev_replace.num_write_errors);
1481 bio_put(bio);
1482 return -EIO;
1483 }
1484 bio_put(bio);
1485 }
1486
1487 return 0;
1488}
1489
1490static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
1491{
1492 int page_num;
1493
1494 for (page_num = 0; page_num < sblock->page_count; page_num++) {
1495 int ret;
1496
1497 ret = scrub_write_page_to_dev_replace(sblock, page_num);
1498 if (ret)
1499 btrfs_dev_replace_stats_inc(
1500 &sblock->sctx->dev_root->fs_info->dev_replace.
1501 num_write_errors);
1502 }
1503}
1504
1505static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
1506 int page_num)
1507{
1508 struct scrub_page *spage = sblock->pagev[page_num];
1509
1510 BUG_ON(spage->page == NULL);
1511 if (spage->io_error) {
1512 void *mapped_buffer = kmap_atomic(spage->page);
1513
1514 memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
1515 flush_dcache_page(spage->page);
1516 kunmap_atomic(mapped_buffer);
1517 }
1518 return scrub_add_page_to_wr_bio(sblock->sctx, spage);
1519}
1520
1521static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
1522 struct scrub_page *spage)
1523{
1524 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1525 struct scrub_bio *sbio;
1526 int ret;
1527
1528 mutex_lock(&wr_ctx->wr_lock);
1529again:
1530 if (!wr_ctx->wr_curr_bio) {
1531 wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
1532 GFP_NOFS);
1533 if (!wr_ctx->wr_curr_bio) {
1534 mutex_unlock(&wr_ctx->wr_lock);
1535 return -ENOMEM;
1536 }
1537 wr_ctx->wr_curr_bio->sctx = sctx;
1538 wr_ctx->wr_curr_bio->page_count = 0;
1539 }
1540 sbio = wr_ctx->wr_curr_bio;
1541 if (sbio->page_count == 0) {
1542 struct bio *bio;
1543
1544 sbio->physical = spage->physical_for_dev_replace;
1545 sbio->logical = spage->logical;
1546 sbio->dev = wr_ctx->tgtdev;
1547 bio = sbio->bio;
1548 if (!bio) {
1549 bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
1550 if (!bio) {
1551 mutex_unlock(&wr_ctx->wr_lock);
1552 return -ENOMEM;
1553 }
1554 sbio->bio = bio;
1555 }
1556
1557 bio->bi_private = sbio;
1558 bio->bi_end_io = scrub_wr_bio_end_io;
1559 bio->bi_bdev = sbio->dev->bdev;
1560 bio->bi_iter.bi_sector = sbio->physical >> 9;
1561 sbio->err = 0;
1562 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1563 spage->physical_for_dev_replace ||
1564 sbio->logical + sbio->page_count * PAGE_SIZE !=
1565 spage->logical) {
1566 scrub_wr_submit(sctx);
1567 goto again;
1568 }
1569
1570 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1571 if (ret != PAGE_SIZE) {
1572 if (sbio->page_count < 1) {
1573 bio_put(sbio->bio);
1574 sbio->bio = NULL;
1575 mutex_unlock(&wr_ctx->wr_lock);
1576 return -EIO;
1577 }
1578 scrub_wr_submit(sctx);
1579 goto again;
1580 }
1581
1582 sbio->pagev[sbio->page_count] = spage;
1583 scrub_page_get(spage);
1584 sbio->page_count++;
1585 if (sbio->page_count == wr_ctx->pages_per_wr_bio)
1586 scrub_wr_submit(sctx);
1587 mutex_unlock(&wr_ctx->wr_lock);
1588
1589 return 0;
1590}
1591
1592static void scrub_wr_submit(struct scrub_ctx *sctx)
1593{
1594 struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx;
1595 struct scrub_bio *sbio;
1596
1597 if (!wr_ctx->wr_curr_bio)
1598 return;
1599
1600 sbio = wr_ctx->wr_curr_bio;
1601 wr_ctx->wr_curr_bio = NULL;
1602 WARN_ON(!sbio->bio->bi_bdev);
1603 scrub_pending_bio_inc(sctx);
1604
1605
1606
1607
1608 btrfsic_submit_bio(WRITE, sbio->bio);
1609}
1610
1611static void scrub_wr_bio_end_io(struct bio *bio, int err)
1612{
1613 struct scrub_bio *sbio = bio->bi_private;
1614 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
1615
1616 sbio->err = err;
1617 sbio->bio = bio;
1618
1619 btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
1620 btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
1621}
1622
1623static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
1624{
1625 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
1626 struct scrub_ctx *sctx = sbio->sctx;
1627 int i;
1628
1629 WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO);
1630 if (sbio->err) {
1631 struct btrfs_dev_replace *dev_replace =
1632 &sbio->sctx->dev_root->fs_info->dev_replace;
1633
1634 for (i = 0; i < sbio->page_count; i++) {
1635 struct scrub_page *spage = sbio->pagev[i];
1636
1637 spage->io_error = 1;
1638 btrfs_dev_replace_stats_inc(&dev_replace->
1639 num_write_errors);
1640 }
1641 }
1642
1643 for (i = 0; i < sbio->page_count; i++)
1644 scrub_page_put(sbio->pagev[i]);
1645
1646 bio_put(sbio->bio);
1647 kfree(sbio);
1648 scrub_pending_bio_dec(sctx);
1649}
1650
1651static int scrub_checksum(struct scrub_block *sblock)
1652{
1653 u64 flags;
1654 int ret;
1655
1656 WARN_ON(sblock->page_count < 1);
1657 flags = sblock->pagev[0]->flags;
1658 ret = 0;
1659 if (flags & BTRFS_EXTENT_FLAG_DATA)
1660 ret = scrub_checksum_data(sblock);
1661 else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
1662 ret = scrub_checksum_tree_block(sblock);
1663 else if (flags & BTRFS_EXTENT_FLAG_SUPER)
1664 (void)scrub_checksum_super(sblock);
1665 else
1666 WARN_ON(1);
1667 if (ret)
1668 scrub_handle_errored_block(sblock);
1669
1670 return ret;
1671}
1672
1673static int scrub_checksum_data(struct scrub_block *sblock)
1674{
1675 struct scrub_ctx *sctx = sblock->sctx;
1676 u8 csum[BTRFS_CSUM_SIZE];
1677 u8 *on_disk_csum;
1678 struct page *page;
1679 void *buffer;
1680 u32 crc = ~(u32)0;
1681 int fail = 0;
1682 u64 len;
1683 int index;
1684
1685 BUG_ON(sblock->page_count < 1);
1686 if (!sblock->pagev[0]->have_csum)
1687 return 0;
1688
1689 on_disk_csum = sblock->pagev[0]->csum;
1690 page = sblock->pagev[0]->page;
1691 buffer = kmap_atomic(page);
1692
1693 len = sctx->sectorsize;
1694 index = 0;
1695 for (;;) {
1696 u64 l = min_t(u64, len, PAGE_SIZE);
1697
1698 crc = btrfs_csum_data(buffer, crc, l);
1699 kunmap_atomic(buffer);
1700 len -= l;
1701 if (len == 0)
1702 break;
1703 index++;
1704 BUG_ON(index >= sblock->page_count);
1705 BUG_ON(!sblock->pagev[index]->page);
1706 page = sblock->pagev[index]->page;
1707 buffer = kmap_atomic(page);
1708 }
1709
1710 btrfs_csum_final(crc, csum);
1711 if (memcmp(csum, on_disk_csum, sctx->csum_size))
1712 fail = 1;
1713
1714 return fail;
1715}
1716
1717static int scrub_checksum_tree_block(struct scrub_block *sblock)
1718{
1719 struct scrub_ctx *sctx = sblock->sctx;
1720 struct btrfs_header *h;
1721 struct btrfs_root *root = sctx->dev_root;
1722 struct btrfs_fs_info *fs_info = root->fs_info;
1723 u8 calculated_csum[BTRFS_CSUM_SIZE];
1724 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1725 struct page *page;
1726 void *mapped_buffer;
1727 u64 mapped_size;
1728 void *p;
1729 u32 crc = ~(u32)0;
1730 int fail = 0;
1731 int crc_fail = 0;
1732 u64 len;
1733 int index;
1734
1735 BUG_ON(sblock->page_count < 1);
1736 page = sblock->pagev[0]->page;
1737 mapped_buffer = kmap_atomic(page);
1738 h = (struct btrfs_header *)mapped_buffer;
1739 memcpy(on_disk_csum, h->csum, sctx->csum_size);
1740
1741
1742
1743
1744
1745
1746
1747 if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
1748 ++fail;
1749
1750 if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
1751 ++fail;
1752
1753 if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1754 ++fail;
1755
1756 if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
1757 BTRFS_UUID_SIZE))
1758 ++fail;
1759
1760 WARN_ON(sctx->nodesize != sctx->leafsize);
1761 len = sctx->nodesize - BTRFS_CSUM_SIZE;
1762 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1763 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1764 index = 0;
1765 for (;;) {
1766 u64 l = min_t(u64, len, mapped_size);
1767
1768 crc = btrfs_csum_data(p, crc, l);
1769 kunmap_atomic(mapped_buffer);
1770 len -= l;
1771 if (len == 0)
1772 break;
1773 index++;
1774 BUG_ON(index >= sblock->page_count);
1775 BUG_ON(!sblock->pagev[index]->page);
1776 page = sblock->pagev[index]->page;
1777 mapped_buffer = kmap_atomic(page);
1778 mapped_size = PAGE_SIZE;
1779 p = mapped_buffer;
1780 }
1781
1782 btrfs_csum_final(crc, calculated_csum);
1783 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1784 ++crc_fail;
1785
1786 return fail || crc_fail;
1787}
1788
1789static int scrub_checksum_super(struct scrub_block *sblock)
1790{
1791 struct btrfs_super_block *s;
1792 struct scrub_ctx *sctx = sblock->sctx;
1793 struct btrfs_root *root = sctx->dev_root;
1794 struct btrfs_fs_info *fs_info = root->fs_info;
1795 u8 calculated_csum[BTRFS_CSUM_SIZE];
1796 u8 on_disk_csum[BTRFS_CSUM_SIZE];
1797 struct page *page;
1798 void *mapped_buffer;
1799 u64 mapped_size;
1800 void *p;
1801 u32 crc = ~(u32)0;
1802 int fail_gen = 0;
1803 int fail_cor = 0;
1804 u64 len;
1805 int index;
1806
1807 BUG_ON(sblock->page_count < 1);
1808 page = sblock->pagev[0]->page;
1809 mapped_buffer = kmap_atomic(page);
1810 s = (struct btrfs_super_block *)mapped_buffer;
1811 memcpy(on_disk_csum, s->csum, sctx->csum_size);
1812
1813 if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
1814 ++fail_cor;
1815
1816 if (sblock->pagev[0]->generation != btrfs_super_generation(s))
1817 ++fail_gen;
1818
1819 if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
1820 ++fail_cor;
1821
1822 len = BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE;
1823 mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE;
1824 p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE;
1825 index = 0;
1826 for (;;) {
1827 u64 l = min_t(u64, len, mapped_size);
1828
1829 crc = btrfs_csum_data(p, crc, l);
1830 kunmap_atomic(mapped_buffer);
1831 len -= l;
1832 if (len == 0)
1833 break;
1834 index++;
1835 BUG_ON(index >= sblock->page_count);
1836 BUG_ON(!sblock->pagev[index]->page);
1837 page = sblock->pagev[index]->page;
1838 mapped_buffer = kmap_atomic(page);
1839 mapped_size = PAGE_SIZE;
1840 p = mapped_buffer;
1841 }
1842
1843 btrfs_csum_final(crc, calculated_csum);
1844 if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
1845 ++fail_cor;
1846
1847 if (fail_cor + fail_gen) {
1848
1849
1850
1851
1852
1853 spin_lock(&sctx->stat_lock);
1854 ++sctx->stat.super_errors;
1855 spin_unlock(&sctx->stat_lock);
1856 if (fail_cor)
1857 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1858 BTRFS_DEV_STAT_CORRUPTION_ERRS);
1859 else
1860 btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev,
1861 BTRFS_DEV_STAT_GENERATION_ERRS);
1862 }
1863
1864 return fail_cor + fail_gen;
1865}
1866
1867static void scrub_block_get(struct scrub_block *sblock)
1868{
1869 atomic_inc(&sblock->ref_count);
1870}
1871
1872static void scrub_block_put(struct scrub_block *sblock)
1873{
1874 if (atomic_dec_and_test(&sblock->ref_count)) {
1875 int i;
1876
1877 for (i = 0; i < sblock->page_count; i++)
1878 scrub_page_put(sblock->pagev[i]);
1879 kfree(sblock);
1880 }
1881}
1882
1883static void scrub_page_get(struct scrub_page *spage)
1884{
1885 atomic_inc(&spage->ref_count);
1886}
1887
1888static void scrub_page_put(struct scrub_page *spage)
1889{
1890 if (atomic_dec_and_test(&spage->ref_count)) {
1891 if (spage->page)
1892 __free_page(spage->page);
1893 kfree(spage);
1894 }
1895}
1896
1897static void scrub_submit(struct scrub_ctx *sctx)
1898{
1899 struct scrub_bio *sbio;
1900
1901 if (sctx->curr == -1)
1902 return;
1903
1904 sbio = sctx->bios[sctx->curr];
1905 sctx->curr = -1;
1906 scrub_pending_bio_inc(sctx);
1907
1908 if (!sbio->bio->bi_bdev) {
1909
1910
1911
1912
1913
1914
1915
1916 printk_ratelimited(KERN_WARNING
1917 "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n");
1918 bio_endio(sbio->bio, -EIO);
1919 } else {
1920 btrfsic_submit_bio(READ, sbio->bio);
1921 }
1922}
1923
1924static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
1925 struct scrub_page *spage)
1926{
1927 struct scrub_block *sblock = spage->sblock;
1928 struct scrub_bio *sbio;
1929 int ret;
1930
1931again:
1932
1933
1934
1935 while (sctx->curr == -1) {
1936 spin_lock(&sctx->list_lock);
1937 sctx->curr = sctx->first_free;
1938 if (sctx->curr != -1) {
1939 sctx->first_free = sctx->bios[sctx->curr]->next_free;
1940 sctx->bios[sctx->curr]->next_free = -1;
1941 sctx->bios[sctx->curr]->page_count = 0;
1942 spin_unlock(&sctx->list_lock);
1943 } else {
1944 spin_unlock(&sctx->list_lock);
1945 wait_event(sctx->list_wait, sctx->first_free != -1);
1946 }
1947 }
1948 sbio = sctx->bios[sctx->curr];
1949 if (sbio->page_count == 0) {
1950 struct bio *bio;
1951
1952 sbio->physical = spage->physical;
1953 sbio->logical = spage->logical;
1954 sbio->dev = spage->dev;
1955 bio = sbio->bio;
1956 if (!bio) {
1957 bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
1958 if (!bio)
1959 return -ENOMEM;
1960 sbio->bio = bio;
1961 }
1962
1963 bio->bi_private = sbio;
1964 bio->bi_end_io = scrub_bio_end_io;
1965 bio->bi_bdev = sbio->dev->bdev;
1966 bio->bi_iter.bi_sector = sbio->physical >> 9;
1967 sbio->err = 0;
1968 } else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
1969 spage->physical ||
1970 sbio->logical + sbio->page_count * PAGE_SIZE !=
1971 spage->logical ||
1972 sbio->dev != spage->dev) {
1973 scrub_submit(sctx);
1974 goto again;
1975 }
1976
1977 sbio->pagev[sbio->page_count] = spage;
1978 ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
1979 if (ret != PAGE_SIZE) {
1980 if (sbio->page_count < 1) {
1981 bio_put(sbio->bio);
1982 sbio->bio = NULL;
1983 return -EIO;
1984 }
1985 scrub_submit(sctx);
1986 goto again;
1987 }
1988
1989 scrub_block_get(sblock);
1990 atomic_inc(&sblock->outstanding_pages);
1991 sbio->page_count++;
1992 if (sbio->page_count == sctx->pages_per_rd_bio)
1993 scrub_submit(sctx);
1994
1995 return 0;
1996}
1997
1998static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
1999 u64 physical, struct btrfs_device *dev, u64 flags,
2000 u64 gen, int mirror_num, u8 *csum, int force,
2001 u64 physical_for_dev_replace)
2002{
2003 struct scrub_block *sblock;
2004 int index;
2005
2006 sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
2007 if (!sblock) {
2008 spin_lock(&sctx->stat_lock);
2009 sctx->stat.malloc_errors++;
2010 spin_unlock(&sctx->stat_lock);
2011 return -ENOMEM;
2012 }
2013
2014
2015
2016 atomic_set(&sblock->ref_count, 1);
2017 sblock->sctx = sctx;
2018 sblock->no_io_error_seen = 1;
2019
2020 for (index = 0; len > 0; index++) {
2021 struct scrub_page *spage;
2022 u64 l = min_t(u64, len, PAGE_SIZE);
2023
2024 spage = kzalloc(sizeof(*spage), GFP_NOFS);
2025 if (!spage) {
2026leave_nomem:
2027 spin_lock(&sctx->stat_lock);
2028 sctx->stat.malloc_errors++;
2029 spin_unlock(&sctx->stat_lock);
2030 scrub_block_put(sblock);
2031 return -ENOMEM;
2032 }
2033 BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
2034 scrub_page_get(spage);
2035 sblock->pagev[index] = spage;
2036 spage->sblock = sblock;
2037 spage->dev = dev;
2038 spage->flags = flags;
2039 spage->generation = gen;
2040 spage->logical = logical;
2041 spage->physical = physical;
2042 spage->physical_for_dev_replace = physical_for_dev_replace;
2043 spage->mirror_num = mirror_num;
2044 if (csum) {
2045 spage->have_csum = 1;
2046 memcpy(spage->csum, csum, sctx->csum_size);
2047 } else {
2048 spage->have_csum = 0;
2049 }
2050 sblock->page_count++;
2051 spage->page = alloc_page(GFP_NOFS);
2052 if (!spage->page)
2053 goto leave_nomem;
2054 len -= l;
2055 logical += l;
2056 physical += l;
2057 physical_for_dev_replace += l;
2058 }
2059
2060 WARN_ON(sblock->page_count == 0);
2061 for (index = 0; index < sblock->page_count; index++) {
2062 struct scrub_page *spage = sblock->pagev[index];
2063 int ret;
2064
2065 ret = scrub_add_page_to_rd_bio(sctx, spage);
2066 if (ret) {
2067 scrub_block_put(sblock);
2068 return ret;
2069 }
2070 }
2071
2072 if (force)
2073 scrub_submit(sctx);
2074
2075
2076 scrub_block_put(sblock);
2077 return 0;
2078}
2079
2080static void scrub_bio_end_io(struct bio *bio, int err)
2081{
2082 struct scrub_bio *sbio = bio->bi_private;
2083 struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info;
2084
2085 sbio->err = err;
2086 sbio->bio = bio;
2087
2088 btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
2089}
2090
2091static void scrub_bio_end_io_worker(struct btrfs_work *work)
2092{
2093 struct scrub_bio *sbio = container_of(work, struct scrub_bio, work);
2094 struct scrub_ctx *sctx = sbio->sctx;
2095 int i;
2096
2097 BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO);
2098 if (sbio->err) {
2099 for (i = 0; i < sbio->page_count; i++) {
2100 struct scrub_page *spage = sbio->pagev[i];
2101
2102 spage->io_error = 1;
2103 spage->sblock->no_io_error_seen = 0;
2104 }
2105 }
2106
2107
2108 for (i = 0; i < sbio->page_count; i++) {
2109 struct scrub_page *spage = sbio->pagev[i];
2110 struct scrub_block *sblock = spage->sblock;
2111
2112 if (atomic_dec_and_test(&sblock->outstanding_pages))
2113 scrub_block_complete(sblock);
2114 scrub_block_put(sblock);
2115 }
2116
2117 bio_put(sbio->bio);
2118 sbio->bio = NULL;
2119 spin_lock(&sctx->list_lock);
2120 sbio->next_free = sctx->first_free;
2121 sctx->first_free = sbio->index;
2122 spin_unlock(&sctx->list_lock);
2123
2124 if (sctx->is_dev_replace &&
2125 atomic_read(&sctx->wr_ctx.flush_all_writes)) {
2126 mutex_lock(&sctx->wr_ctx.wr_lock);
2127 scrub_wr_submit(sctx);
2128 mutex_unlock(&sctx->wr_ctx.wr_lock);
2129 }
2130
2131 scrub_pending_bio_dec(sctx);
2132}
2133
2134static void scrub_block_complete(struct scrub_block *sblock)
2135{
2136 if (!sblock->no_io_error_seen) {
2137 scrub_handle_errored_block(sblock);
2138 } else {
2139
2140
2141
2142
2143
2144 if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
2145 scrub_write_block_to_dev_replace(sblock);
2146 }
2147}
2148
2149static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
2150 u8 *csum)
2151{
2152 struct btrfs_ordered_sum *sum = NULL;
2153 unsigned long index;
2154 unsigned long num_sectors;
2155
2156 while (!list_empty(&sctx->csum_list)) {
2157 sum = list_first_entry(&sctx->csum_list,
2158 struct btrfs_ordered_sum, list);
2159 if (sum->bytenr > logical)
2160 return 0;
2161 if (sum->bytenr + sum->len > logical)
2162 break;
2163
2164 ++sctx->stat.csum_discards;
2165 list_del(&sum->list);
2166 kfree(sum);
2167 sum = NULL;
2168 }
2169 if (!sum)
2170 return 0;
2171
2172 index = ((u32)(logical - sum->bytenr)) / sctx->sectorsize;
2173 num_sectors = sum->len / sctx->sectorsize;
2174 memcpy(csum, sum->sums + index, sctx->csum_size);
2175 if (index == num_sectors - 1) {
2176 list_del(&sum->list);
2177 kfree(sum);
2178 }
2179 return 1;
2180}
2181
2182
2183static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len,
2184 u64 physical, struct btrfs_device *dev, u64 flags,
2185 u64 gen, int mirror_num, u64 physical_for_dev_replace)
2186{
2187 int ret;
2188 u8 csum[BTRFS_CSUM_SIZE];
2189 u32 blocksize;
2190
2191 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2192 blocksize = sctx->sectorsize;
2193 spin_lock(&sctx->stat_lock);
2194 sctx->stat.data_extents_scrubbed++;
2195 sctx->stat.data_bytes_scrubbed += len;
2196 spin_unlock(&sctx->stat_lock);
2197 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
2198 WARN_ON(sctx->nodesize != sctx->leafsize);
2199 blocksize = sctx->nodesize;
2200 spin_lock(&sctx->stat_lock);
2201 sctx->stat.tree_extents_scrubbed++;
2202 sctx->stat.tree_bytes_scrubbed += len;
2203 spin_unlock(&sctx->stat_lock);
2204 } else {
2205 blocksize = sctx->sectorsize;
2206 WARN_ON(1);
2207 }
2208
2209 while (len) {
2210 u64 l = min_t(u64, len, blocksize);
2211 int have_csum = 0;
2212
2213 if (flags & BTRFS_EXTENT_FLAG_DATA) {
2214
2215 have_csum = scrub_find_csum(sctx, logical, l, csum);
2216 if (have_csum == 0)
2217 ++sctx->stat.no_csum;
2218 if (sctx->is_dev_replace && !have_csum) {
2219 ret = copy_nocow_pages(sctx, logical, l,
2220 mirror_num,
2221 physical_for_dev_replace);
2222 goto behind_scrub_pages;
2223 }
2224 }
2225 ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
2226 mirror_num, have_csum ? csum : NULL, 0,
2227 physical_for_dev_replace);
2228behind_scrub_pages:
2229 if (ret)
2230 return ret;
2231 len -= l;
2232 logical += l;
2233 physical += l;
2234 physical_for_dev_replace += l;
2235 }
2236 return 0;
2237}
2238
2239
2240
2241
2242
2243
2244
2245
2246static int get_raid56_logic_offset(u64 physical, int num,
2247 struct map_lookup *map, u64 *offset)
2248{
2249 int i;
2250 int j = 0;
2251 u64 stripe_nr;
2252 u64 last_offset;
2253 int stripe_index;
2254 int rot;
2255
2256 last_offset = (physical - map->stripes[num].physical) *
2257 nr_data_stripes(map);
2258 *offset = last_offset;
2259 for (i = 0; i < nr_data_stripes(map); i++) {
2260 *offset = last_offset + i * map->stripe_len;
2261
2262 stripe_nr = *offset;
2263 do_div(stripe_nr, map->stripe_len);
2264 do_div(stripe_nr, nr_data_stripes(map));
2265
2266
2267 rot = do_div(stripe_nr, map->num_stripes);
2268
2269 rot += i;
2270 stripe_index = rot % map->num_stripes;
2271 if (stripe_index == num)
2272 return 0;
2273 if (stripe_index < num)
2274 j++;
2275 }
2276 *offset = last_offset + j * map->stripe_len;
2277 return 1;
2278}
2279
2280static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
2281 struct map_lookup *map,
2282 struct btrfs_device *scrub_dev,
2283 int num, u64 base, u64 length,
2284 int is_dev_replace)
2285{
2286 struct btrfs_path *path;
2287 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
2288 struct btrfs_root *root = fs_info->extent_root;
2289 struct btrfs_root *csum_root = fs_info->csum_root;
2290 struct btrfs_extent_item *extent;
2291 struct blk_plug plug;
2292 u64 flags;
2293 int ret;
2294 int slot;
2295 u64 nstripes;
2296 struct extent_buffer *l;
2297 struct btrfs_key key;
2298 u64 physical;
2299 u64 logical;
2300 u64 logic_end;
2301 u64 physical_end;
2302 u64 generation;
2303 int mirror_num;
2304 struct reada_control *reada1;
2305 struct reada_control *reada2;
2306 struct btrfs_key key_start;
2307 struct btrfs_key key_end;
2308 u64 increment = map->stripe_len;
2309 u64 offset;
2310 u64 extent_logical;
2311 u64 extent_physical;
2312 u64 extent_len;
2313 struct btrfs_device *extent_dev;
2314 int extent_mirror_num;
2315 int stop_loop = 0;
2316
2317 nstripes = length;
2318 physical = map->stripes[num].physical;
2319 offset = 0;
2320 do_div(nstripes, map->stripe_len);
2321 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
2322 offset = map->stripe_len * num;
2323 increment = map->stripe_len * map->num_stripes;
2324 mirror_num = 1;
2325 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
2326 int factor = map->num_stripes / map->sub_stripes;
2327 offset = map->stripe_len * (num / map->sub_stripes);
2328 increment = map->stripe_len * factor;
2329 mirror_num = num % map->sub_stripes + 1;
2330 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
2331 increment = map->stripe_len;
2332 mirror_num = num % map->num_stripes + 1;
2333 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
2334 increment = map->stripe_len;
2335 mirror_num = num % map->num_stripes + 1;
2336 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2337 BTRFS_BLOCK_GROUP_RAID6)) {
2338 get_raid56_logic_offset(physical, num, map, &offset);
2339 increment = map->stripe_len * nr_data_stripes(map);
2340 mirror_num = 1;
2341 } else {
2342 increment = map->stripe_len;
2343 mirror_num = 1;
2344 }
2345
2346 path = btrfs_alloc_path();
2347 if (!path)
2348 return -ENOMEM;
2349
2350
2351
2352
2353
2354
2355 path->search_commit_root = 1;
2356 path->skip_locking = 1;
2357
2358
2359
2360
2361
2362
2363 logical = base + offset;
2364 physical_end = physical + nstripes * map->stripe_len;
2365 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2366 BTRFS_BLOCK_GROUP_RAID6)) {
2367 get_raid56_logic_offset(physical_end, num,
2368 map, &logic_end);
2369 logic_end += base;
2370 } else {
2371 logic_end = logical + increment * nstripes;
2372 }
2373 wait_event(sctx->list_wait,
2374 atomic_read(&sctx->bios_in_flight) == 0);
2375 scrub_blocked_if_needed(fs_info);
2376
2377
2378 key_start.objectid = logical;
2379 key_start.type = BTRFS_EXTENT_ITEM_KEY;
2380 key_start.offset = (u64)0;
2381 key_end.objectid = logic_end;
2382 key_end.type = BTRFS_METADATA_ITEM_KEY;
2383 key_end.offset = (u64)-1;
2384 reada1 = btrfs_reada_add(root, &key_start, &key_end);
2385
2386 key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2387 key_start.type = BTRFS_EXTENT_CSUM_KEY;
2388 key_start.offset = logical;
2389 key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
2390 key_end.type = BTRFS_EXTENT_CSUM_KEY;
2391 key_end.offset = logic_end;
2392 reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
2393
2394 if (!IS_ERR(reada1))
2395 btrfs_reada_wait(reada1);
2396 if (!IS_ERR(reada2))
2397 btrfs_reada_wait(reada2);
2398
2399
2400
2401
2402
2403
2404 blk_start_plug(&plug);
2405
2406
2407
2408
2409 ret = 0;
2410 while (physical < physical_end) {
2411
2412 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2413 BTRFS_BLOCK_GROUP_RAID6)) {
2414 ret = get_raid56_logic_offset(physical, num,
2415 map, &logical);
2416 logical += base;
2417 if (ret)
2418 goto skip;
2419 }
2420
2421
2422
2423 if (atomic_read(&fs_info->scrub_cancel_req) ||
2424 atomic_read(&sctx->cancel_req)) {
2425 ret = -ECANCELED;
2426 goto out;
2427 }
2428
2429
2430
2431 if (atomic_read(&fs_info->scrub_pause_req)) {
2432
2433 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2434 scrub_submit(sctx);
2435 mutex_lock(&sctx->wr_ctx.wr_lock);
2436 scrub_wr_submit(sctx);
2437 mutex_unlock(&sctx->wr_ctx.wr_lock);
2438 wait_event(sctx->list_wait,
2439 atomic_read(&sctx->bios_in_flight) == 0);
2440 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2441 scrub_blocked_if_needed(fs_info);
2442 }
2443
2444 if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
2445 key.type = BTRFS_METADATA_ITEM_KEY;
2446 else
2447 key.type = BTRFS_EXTENT_ITEM_KEY;
2448 key.objectid = logical;
2449 key.offset = (u64)-1;
2450
2451 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2452 if (ret < 0)
2453 goto out;
2454
2455 if (ret > 0) {
2456 ret = btrfs_previous_extent_item(root, path, 0);
2457 if (ret < 0)
2458 goto out;
2459 if (ret > 0) {
2460
2461
2462 btrfs_release_path(path);
2463 ret = btrfs_search_slot(NULL, root, &key,
2464 path, 0, 0);
2465 if (ret < 0)
2466 goto out;
2467 }
2468 }
2469
2470 stop_loop = 0;
2471 while (1) {
2472 u64 bytes;
2473
2474 l = path->nodes[0];
2475 slot = path->slots[0];
2476 if (slot >= btrfs_header_nritems(l)) {
2477 ret = btrfs_next_leaf(root, path);
2478 if (ret == 0)
2479 continue;
2480 if (ret < 0)
2481 goto out;
2482
2483 stop_loop = 1;
2484 break;
2485 }
2486 btrfs_item_key_to_cpu(l, &key, slot);
2487
2488 if (key.type == BTRFS_METADATA_ITEM_KEY)
2489 bytes = root->leafsize;
2490 else
2491 bytes = key.offset;
2492
2493 if (key.objectid + bytes <= logical)
2494 goto next;
2495
2496 if (key.type != BTRFS_EXTENT_ITEM_KEY &&
2497 key.type != BTRFS_METADATA_ITEM_KEY)
2498 goto next;
2499
2500 if (key.objectid >= logical + map->stripe_len) {
2501
2502 if (key.objectid >= logic_end)
2503 stop_loop = 1;
2504 break;
2505 }
2506
2507 extent = btrfs_item_ptr(l, slot,
2508 struct btrfs_extent_item);
2509 flags = btrfs_extent_flags(l, extent);
2510 generation = btrfs_extent_generation(l, extent);
2511
2512 if (key.objectid < logical &&
2513 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
2514 btrfs_err(fs_info,
2515 "scrub: tree block %llu spanning "
2516 "stripes, ignored. logical=%llu",
2517 key.objectid, logical);
2518 goto next;
2519 }
2520
2521again:
2522 extent_logical = key.objectid;
2523 extent_len = bytes;
2524
2525
2526
2527
2528 if (extent_logical < logical) {
2529 extent_len -= logical - extent_logical;
2530 extent_logical = logical;
2531 }
2532 if (extent_logical + extent_len >
2533 logical + map->stripe_len) {
2534 extent_len = logical + map->stripe_len -
2535 extent_logical;
2536 }
2537
2538 extent_physical = extent_logical - logical + physical;
2539 extent_dev = scrub_dev;
2540 extent_mirror_num = mirror_num;
2541 if (is_dev_replace)
2542 scrub_remap_extent(fs_info, extent_logical,
2543 extent_len, &extent_physical,
2544 &extent_dev,
2545 &extent_mirror_num);
2546
2547 ret = btrfs_lookup_csums_range(csum_root, logical,
2548 logical + map->stripe_len - 1,
2549 &sctx->csum_list, 1);
2550 if (ret)
2551 goto out;
2552
2553 ret = scrub_extent(sctx, extent_logical, extent_len,
2554 extent_physical, extent_dev, flags,
2555 generation, extent_mirror_num,
2556 extent_logical - logical + physical);
2557 if (ret)
2558 goto out;
2559
2560 scrub_free_csums(sctx);
2561 if (extent_logical + extent_len <
2562 key.objectid + bytes) {
2563 if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
2564 BTRFS_BLOCK_GROUP_RAID6)) {
2565
2566
2567
2568
2569 do {
2570 physical += map->stripe_len;
2571 ret = get_raid56_logic_offset(
2572 physical, num,
2573 map, &logical);
2574 logical += base;
2575 } while (physical < physical_end && ret);
2576 } else {
2577 physical += map->stripe_len;
2578 logical += increment;
2579 }
2580 if (logical < key.objectid + bytes) {
2581 cond_resched();
2582 goto again;
2583 }
2584
2585 if (physical >= physical_end) {
2586 stop_loop = 1;
2587 break;
2588 }
2589 }
2590next:
2591 path->slots[0]++;
2592 }
2593 btrfs_release_path(path);
2594skip:
2595 logical += increment;
2596 physical += map->stripe_len;
2597 spin_lock(&sctx->stat_lock);
2598 if (stop_loop)
2599 sctx->stat.last_physical = map->stripes[num].physical +
2600 length;
2601 else
2602 sctx->stat.last_physical = physical;
2603 spin_unlock(&sctx->stat_lock);
2604 if (stop_loop)
2605 break;
2606 }
2607out:
2608
2609 scrub_submit(sctx);
2610 mutex_lock(&sctx->wr_ctx.wr_lock);
2611 scrub_wr_submit(sctx);
2612 mutex_unlock(&sctx->wr_ctx.wr_lock);
2613
2614 blk_finish_plug(&plug);
2615 btrfs_free_path(path);
2616 return ret < 0 ? ret : 0;
2617}
2618
2619static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
2620 struct btrfs_device *scrub_dev,
2621 u64 chunk_tree, u64 chunk_objectid,
2622 u64 chunk_offset, u64 length,
2623 u64 dev_offset, int is_dev_replace)
2624{
2625 struct btrfs_mapping_tree *map_tree =
2626 &sctx->dev_root->fs_info->mapping_tree;
2627 struct map_lookup *map;
2628 struct extent_map *em;
2629 int i;
2630 int ret = 0;
2631
2632 read_lock(&map_tree->map_tree.lock);
2633 em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
2634 read_unlock(&map_tree->map_tree.lock);
2635
2636 if (!em)
2637 return -EINVAL;
2638
2639 map = (struct map_lookup *)em->bdev;
2640 if (em->start != chunk_offset)
2641 goto out;
2642
2643 if (em->len < length)
2644 goto out;
2645
2646 for (i = 0; i < map->num_stripes; ++i) {
2647 if (map->stripes[i].dev->bdev == scrub_dev->bdev &&
2648 map->stripes[i].physical == dev_offset) {
2649 ret = scrub_stripe(sctx, map, scrub_dev, i,
2650 chunk_offset, length,
2651 is_dev_replace);
2652 if (ret)
2653 goto out;
2654 }
2655 }
2656out:
2657 free_extent_map(em);
2658
2659 return ret;
2660}
2661
2662static noinline_for_stack
2663int scrub_enumerate_chunks(struct scrub_ctx *sctx,
2664 struct btrfs_device *scrub_dev, u64 start, u64 end,
2665 int is_dev_replace)
2666{
2667 struct btrfs_dev_extent *dev_extent = NULL;
2668 struct btrfs_path *path;
2669 struct btrfs_root *root = sctx->dev_root;
2670 struct btrfs_fs_info *fs_info = root->fs_info;
2671 u64 length;
2672 u64 chunk_tree;
2673 u64 chunk_objectid;
2674 u64 chunk_offset;
2675 int ret;
2676 int slot;
2677 struct extent_buffer *l;
2678 struct btrfs_key key;
2679 struct btrfs_key found_key;
2680 struct btrfs_block_group_cache *cache;
2681 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
2682
2683 path = btrfs_alloc_path();
2684 if (!path)
2685 return -ENOMEM;
2686
2687 path->reada = 2;
2688 path->search_commit_root = 1;
2689 path->skip_locking = 1;
2690
2691 key.objectid = scrub_dev->devid;
2692 key.offset = 0ull;
2693 key.type = BTRFS_DEV_EXTENT_KEY;
2694
2695 while (1) {
2696 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2697 if (ret < 0)
2698 break;
2699 if (ret > 0) {
2700 if (path->slots[0] >=
2701 btrfs_header_nritems(path->nodes[0])) {
2702 ret = btrfs_next_leaf(root, path);
2703 if (ret)
2704 break;
2705 }
2706 }
2707
2708 l = path->nodes[0];
2709 slot = path->slots[0];
2710
2711 btrfs_item_key_to_cpu(l, &found_key, slot);
2712
2713 if (found_key.objectid != scrub_dev->devid)
2714 break;
2715
2716 if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY)
2717 break;
2718
2719 if (found_key.offset >= end)
2720 break;
2721
2722 if (found_key.offset < key.offset)
2723 break;
2724
2725 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
2726 length = btrfs_dev_extent_length(l, dev_extent);
2727
2728 if (found_key.offset + length <= start)
2729 goto skip;
2730
2731 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
2732 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
2733 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
2734
2735
2736
2737
2738
2739 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2740
2741
2742
2743 if (!cache)
2744 goto skip;
2745
2746 dev_replace->cursor_right = found_key.offset + length;
2747 dev_replace->cursor_left = found_key.offset;
2748 dev_replace->item_needs_writeback = 1;
2749 ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid,
2750 chunk_offset, length, found_key.offset,
2751 is_dev_replace);
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763 atomic_set(&sctx->wr_ctx.flush_all_writes, 1);
2764 scrub_submit(sctx);
2765 mutex_lock(&sctx->wr_ctx.wr_lock);
2766 scrub_wr_submit(sctx);
2767 mutex_unlock(&sctx->wr_ctx.wr_lock);
2768
2769 wait_event(sctx->list_wait,
2770 atomic_read(&sctx->bios_in_flight) == 0);
2771 atomic_inc(&fs_info->scrubs_paused);
2772 wake_up(&fs_info->scrub_pause_wait);
2773
2774
2775
2776
2777
2778
2779 wait_event(sctx->list_wait,
2780 atomic_read(&sctx->workers_pending) == 0);
2781 atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
2782
2783 mutex_lock(&fs_info->scrub_lock);
2784 __scrub_blocked_if_needed(fs_info);
2785 atomic_dec(&fs_info->scrubs_paused);
2786 mutex_unlock(&fs_info->scrub_lock);
2787 wake_up(&fs_info->scrub_pause_wait);
2788
2789 btrfs_put_block_group(cache);
2790 if (ret)
2791 break;
2792 if (is_dev_replace &&
2793 atomic64_read(&dev_replace->num_write_errors) > 0) {
2794 ret = -EIO;
2795 break;
2796 }
2797 if (sctx->stat.malloc_errors > 0) {
2798 ret = -ENOMEM;
2799 break;
2800 }
2801
2802 dev_replace->cursor_left = dev_replace->cursor_right;
2803 dev_replace->item_needs_writeback = 1;
2804skip:
2805 key.offset = found_key.offset + length;
2806 btrfs_release_path(path);
2807 }
2808
2809 btrfs_free_path(path);
2810
2811
2812
2813
2814
2815 return ret < 0 ? ret : 0;
2816}
2817
2818static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
2819 struct btrfs_device *scrub_dev)
2820{
2821 int i;
2822 u64 bytenr;
2823 u64 gen;
2824 int ret;
2825 struct btrfs_root *root = sctx->dev_root;
2826
2827 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
2828 return -EIO;
2829
2830 gen = root->fs_info->last_trans_committed;
2831
2832 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
2833 bytenr = btrfs_sb_offset(i);
2834 if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes)
2835 break;
2836
2837 ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
2838 scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
2839 NULL, 1, bytenr);
2840 if (ret)
2841 return ret;
2842 }
2843 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
2844
2845 return 0;
2846}
2847
2848
2849
2850
2851static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
2852 int is_dev_replace)
2853{
2854 int ret = 0;
2855 int flags = WQ_FREEZABLE | WQ_UNBOUND;
2856 int max_active = fs_info->thread_pool_size;
2857
2858 if (fs_info->scrub_workers_refcnt == 0) {
2859 if (is_dev_replace)
2860 fs_info->scrub_workers =
2861 btrfs_alloc_workqueue("btrfs-scrub", flags,
2862 1, 4);
2863 else
2864 fs_info->scrub_workers =
2865 btrfs_alloc_workqueue("btrfs-scrub", flags,
2866 max_active, 4);
2867 if (!fs_info->scrub_workers) {
2868 ret = -ENOMEM;
2869 goto out;
2870 }
2871 fs_info->scrub_wr_completion_workers =
2872 btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
2873 max_active, 2);
2874 if (!fs_info->scrub_wr_completion_workers) {
2875 ret = -ENOMEM;
2876 goto out;
2877 }
2878 fs_info->scrub_nocow_workers =
2879 btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
2880 if (!fs_info->scrub_nocow_workers) {
2881 ret = -ENOMEM;
2882 goto out;
2883 }
2884 }
2885 ++fs_info->scrub_workers_refcnt;
2886out:
2887 return ret;
2888}
2889
2890static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
2891{
2892 if (--fs_info->scrub_workers_refcnt == 0) {
2893 btrfs_destroy_workqueue(fs_info->scrub_workers);
2894 btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
2895 btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
2896 }
2897 WARN_ON(fs_info->scrub_workers_refcnt < 0);
2898}
2899
2900int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
2901 u64 end, struct btrfs_scrub_progress *progress,
2902 int readonly, int is_dev_replace)
2903{
2904 struct scrub_ctx *sctx;
2905 int ret;
2906 struct btrfs_device *dev;
2907
2908 if (btrfs_fs_closing(fs_info))
2909 return -EINVAL;
2910
2911
2912
2913
2914 if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) {
2915 btrfs_err(fs_info,
2916 "scrub: size assumption nodesize == leafsize (%d == %d) fails",
2917 fs_info->chunk_root->nodesize,
2918 fs_info->chunk_root->leafsize);
2919 return -EINVAL;
2920 }
2921
2922 if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) {
2923
2924
2925
2926
2927
2928 btrfs_err(fs_info,
2929 "scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails",
2930 fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN);
2931 return -EINVAL;
2932 }
2933
2934 if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
2935
2936 btrfs_err(fs_info,
2937 "scrub: size assumption sectorsize != PAGE_SIZE "
2938 "(%d != %lu) fails",
2939 fs_info->chunk_root->sectorsize, PAGE_SIZE);
2940 return -EINVAL;
2941 }
2942
2943 if (fs_info->chunk_root->nodesize >
2944 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
2945 fs_info->chunk_root->sectorsize >
2946 PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
2947
2948
2949
2950
2951 btrfs_err(fs_info, "scrub: size assumption nodesize and sectorsize "
2952 "<= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails",
2953 fs_info->chunk_root->nodesize,
2954 SCRUB_MAX_PAGES_PER_BLOCK,
2955 fs_info->chunk_root->sectorsize,
2956 SCRUB_MAX_PAGES_PER_BLOCK);
2957 return -EINVAL;
2958 }
2959
2960
2961 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2962 dev = btrfs_find_device(fs_info, devid, NULL, NULL);
2963 if (!dev || (dev->missing && !is_dev_replace)) {
2964 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2965 return -ENODEV;
2966 }
2967
2968 mutex_lock(&fs_info->scrub_lock);
2969 if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
2970 mutex_unlock(&fs_info->scrub_lock);
2971 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2972 return -EIO;
2973 }
2974
2975 btrfs_dev_replace_lock(&fs_info->dev_replace);
2976 if (dev->scrub_device ||
2977 (!is_dev_replace &&
2978 btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
2979 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2980 mutex_unlock(&fs_info->scrub_lock);
2981 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2982 return -EINPROGRESS;
2983 }
2984 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2985
2986 ret = scrub_workers_get(fs_info, is_dev_replace);
2987 if (ret) {
2988 mutex_unlock(&fs_info->scrub_lock);
2989 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2990 return ret;
2991 }
2992
2993 sctx = scrub_setup_ctx(dev, is_dev_replace);
2994 if (IS_ERR(sctx)) {
2995 mutex_unlock(&fs_info->scrub_lock);
2996 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2997 scrub_workers_put(fs_info);
2998 return PTR_ERR(sctx);
2999 }
3000 sctx->readonly = readonly;
3001 dev->scrub_device = sctx;
3002 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3003
3004
3005
3006
3007
3008 __scrub_blocked_if_needed(fs_info);
3009 atomic_inc(&fs_info->scrubs_running);
3010 mutex_unlock(&fs_info->scrub_lock);
3011
3012 if (!is_dev_replace) {
3013
3014
3015
3016
3017 mutex_lock(&fs_info->fs_devices->device_list_mutex);
3018 ret = scrub_supers(sctx, dev);
3019 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
3020 }
3021
3022 if (!ret)
3023 ret = scrub_enumerate_chunks(sctx, dev, start, end,
3024 is_dev_replace);
3025
3026 wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0);
3027 atomic_dec(&fs_info->scrubs_running);
3028 wake_up(&fs_info->scrub_pause_wait);
3029
3030 wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0);
3031
3032 if (progress)
3033 memcpy(progress, &sctx->stat, sizeof(*progress));
3034
3035 mutex_lock(&fs_info->scrub_lock);
3036 dev->scrub_device = NULL;
3037 scrub_workers_put(fs_info);
3038 mutex_unlock(&fs_info->scrub_lock);
3039
3040 scrub_free_ctx(sctx);
3041
3042 return ret;
3043}
3044
3045void btrfs_scrub_pause(struct btrfs_root *root)
3046{
3047 struct btrfs_fs_info *fs_info = root->fs_info;
3048
3049 mutex_lock(&fs_info->scrub_lock);
3050 atomic_inc(&fs_info->scrub_pause_req);
3051 while (atomic_read(&fs_info->scrubs_paused) !=
3052 atomic_read(&fs_info->scrubs_running)) {
3053 mutex_unlock(&fs_info->scrub_lock);
3054 wait_event(fs_info->scrub_pause_wait,
3055 atomic_read(&fs_info->scrubs_paused) ==
3056 atomic_read(&fs_info->scrubs_running));
3057 mutex_lock(&fs_info->scrub_lock);
3058 }
3059 mutex_unlock(&fs_info->scrub_lock);
3060}
3061
3062void btrfs_scrub_continue(struct btrfs_root *root)
3063{
3064 struct btrfs_fs_info *fs_info = root->fs_info;
3065
3066 atomic_dec(&fs_info->scrub_pause_req);
3067 wake_up(&fs_info->scrub_pause_wait);
3068}
3069
3070int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
3071{
3072 mutex_lock(&fs_info->scrub_lock);
3073 if (!atomic_read(&fs_info->scrubs_running)) {
3074 mutex_unlock(&fs_info->scrub_lock);
3075 return -ENOTCONN;
3076 }
3077
3078 atomic_inc(&fs_info->scrub_cancel_req);
3079 while (atomic_read(&fs_info->scrubs_running)) {
3080 mutex_unlock(&fs_info->scrub_lock);
3081 wait_event(fs_info->scrub_pause_wait,
3082 atomic_read(&fs_info->scrubs_running) == 0);
3083 mutex_lock(&fs_info->scrub_lock);
3084 }
3085 atomic_dec(&fs_info->scrub_cancel_req);
3086 mutex_unlock(&fs_info->scrub_lock);
3087
3088 return 0;
3089}
3090
3091int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
3092 struct btrfs_device *dev)
3093{
3094 struct scrub_ctx *sctx;
3095
3096 mutex_lock(&fs_info->scrub_lock);
3097 sctx = dev->scrub_device;
3098 if (!sctx) {
3099 mutex_unlock(&fs_info->scrub_lock);
3100 return -ENOTCONN;
3101 }
3102 atomic_inc(&sctx->cancel_req);
3103 while (dev->scrub_device) {
3104 mutex_unlock(&fs_info->scrub_lock);
3105 wait_event(fs_info->scrub_pause_wait,
3106 dev->scrub_device == NULL);
3107 mutex_lock(&fs_info->scrub_lock);
3108 }
3109 mutex_unlock(&fs_info->scrub_lock);
3110
3111 return 0;
3112}
3113
3114int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
3115 struct btrfs_scrub_progress *progress)
3116{
3117 struct btrfs_device *dev;
3118 struct scrub_ctx *sctx = NULL;
3119
3120 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
3121 dev = btrfs_find_device(root->fs_info, devid, NULL, NULL);
3122 if (dev)
3123 sctx = dev->scrub_device;
3124 if (sctx)
3125 memcpy(progress, &sctx->stat, sizeof(*progress));
3126 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3127
3128 return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV;
3129}
3130
3131static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
3132 u64 extent_logical, u64 extent_len,
3133 u64 *extent_physical,
3134 struct btrfs_device **extent_dev,
3135 int *extent_mirror_num)
3136{
3137 u64 mapped_length;
3138 struct btrfs_bio *bbio = NULL;
3139 int ret;
3140
3141 mapped_length = extent_len;
3142 ret = btrfs_map_block(fs_info, READ, extent_logical,
3143 &mapped_length, &bbio, 0);
3144 if (ret || !bbio || mapped_length < extent_len ||
3145 !bbio->stripes[0].dev->bdev) {
3146 kfree(bbio);
3147 return;
3148 }
3149
3150 *extent_physical = bbio->stripes[0].physical;
3151 *extent_mirror_num = bbio->mirror_num;
3152 *extent_dev = bbio->stripes[0].dev;
3153 kfree(bbio);
3154}
3155
3156static int scrub_setup_wr_ctx(struct scrub_ctx *sctx,
3157 struct scrub_wr_ctx *wr_ctx,
3158 struct btrfs_fs_info *fs_info,
3159 struct btrfs_device *dev,
3160 int is_dev_replace)
3161{
3162 WARN_ON(wr_ctx->wr_curr_bio != NULL);
3163
3164 mutex_init(&wr_ctx->wr_lock);
3165 wr_ctx->wr_curr_bio = NULL;
3166 if (!is_dev_replace)
3167 return 0;
3168
3169 WARN_ON(!dev->bdev);
3170 wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO,
3171 bio_get_nr_vecs(dev->bdev));
3172 wr_ctx->tgtdev = dev;
3173 atomic_set(&wr_ctx->flush_all_writes, 0);
3174 return 0;
3175}
3176
3177static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx)
3178{
3179 mutex_lock(&wr_ctx->wr_lock);
3180 kfree(wr_ctx->wr_curr_bio);
3181 wr_ctx->wr_curr_bio = NULL;
3182 mutex_unlock(&wr_ctx->wr_lock);
3183}
3184
3185static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3186 int mirror_num, u64 physical_for_dev_replace)
3187{
3188 struct scrub_copy_nocow_ctx *nocow_ctx;
3189 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
3190
3191 nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS);
3192 if (!nocow_ctx) {
3193 spin_lock(&sctx->stat_lock);
3194 sctx->stat.malloc_errors++;
3195 spin_unlock(&sctx->stat_lock);
3196 return -ENOMEM;
3197 }
3198
3199 scrub_pending_trans_workers_inc(sctx);
3200
3201 nocow_ctx->sctx = sctx;
3202 nocow_ctx->logical = logical;
3203 nocow_ctx->len = len;
3204 nocow_ctx->mirror_num = mirror_num;
3205 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3206 btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL);
3207 INIT_LIST_HEAD(&nocow_ctx->inodes);
3208 btrfs_queue_work(fs_info->scrub_nocow_workers,
3209 &nocow_ctx->work);
3210
3211 return 0;
3212}
3213
3214static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
3215{
3216 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3217 struct scrub_nocow_inode *nocow_inode;
3218
3219 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
3220 if (!nocow_inode)
3221 return -ENOMEM;
3222 nocow_inode->inum = inum;
3223 nocow_inode->offset = offset;
3224 nocow_inode->root = root;
3225 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
3226 return 0;
3227}
3228
3229#define COPY_COMPLETE 1
3230
3231static void copy_nocow_pages_worker(struct btrfs_work *work)
3232{
3233 struct scrub_copy_nocow_ctx *nocow_ctx =
3234 container_of(work, struct scrub_copy_nocow_ctx, work);
3235 struct scrub_ctx *sctx = nocow_ctx->sctx;
3236 u64 logical = nocow_ctx->logical;
3237 u64 len = nocow_ctx->len;
3238 int mirror_num = nocow_ctx->mirror_num;
3239 u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3240 int ret;
3241 struct btrfs_trans_handle *trans = NULL;
3242 struct btrfs_fs_info *fs_info;
3243 struct btrfs_path *path;
3244 struct btrfs_root *root;
3245 int not_written = 0;
3246
3247 fs_info = sctx->dev_root->fs_info;
3248 root = fs_info->extent_root;
3249
3250 path = btrfs_alloc_path();
3251 if (!path) {
3252 spin_lock(&sctx->stat_lock);
3253 sctx->stat.malloc_errors++;
3254 spin_unlock(&sctx->stat_lock);
3255 not_written = 1;
3256 goto out;
3257 }
3258
3259 trans = btrfs_join_transaction(root);
3260 if (IS_ERR(trans)) {
3261 not_written = 1;
3262 goto out;
3263 }
3264
3265 ret = iterate_inodes_from_logical(logical, fs_info, path,
3266 record_inode_for_nocow, nocow_ctx);
3267 if (ret != 0 && ret != -ENOENT) {
3268 btrfs_warn(fs_info, "iterate_inodes_from_logical() failed: log %llu, "
3269 "phys %llu, len %llu, mir %u, ret %d",
3270 logical, physical_for_dev_replace, len, mirror_num,
3271 ret);
3272 not_written = 1;
3273 goto out;
3274 }
3275
3276 btrfs_end_transaction(trans, root);
3277 trans = NULL;
3278 while (!list_empty(&nocow_ctx->inodes)) {
3279 struct scrub_nocow_inode *entry;
3280 entry = list_first_entry(&nocow_ctx->inodes,
3281 struct scrub_nocow_inode,
3282 list);
3283 list_del_init(&entry->list);
3284 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
3285 entry->root, nocow_ctx);
3286 kfree(entry);
3287 if (ret == COPY_COMPLETE) {
3288 ret = 0;
3289 break;
3290 } else if (ret) {
3291 break;
3292 }
3293 }
3294out:
3295 while (!list_empty(&nocow_ctx->inodes)) {
3296 struct scrub_nocow_inode *entry;
3297 entry = list_first_entry(&nocow_ctx->inodes,
3298 struct scrub_nocow_inode,
3299 list);
3300 list_del_init(&entry->list);
3301 kfree(entry);
3302 }
3303 if (trans && !IS_ERR(trans))
3304 btrfs_end_transaction(trans, root);
3305 if (not_written)
3306 btrfs_dev_replace_stats_inc(&fs_info->dev_replace.
3307 num_uncorrectable_read_errors);
3308
3309 btrfs_free_path(path);
3310 kfree(nocow_ctx);
3311
3312 scrub_pending_trans_workers_dec(sctx);
3313}
3314
3315static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3316 struct scrub_copy_nocow_ctx *nocow_ctx)
3317{
3318 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3319 struct btrfs_key key;
3320 struct inode *inode;
3321 struct page *page;
3322 struct btrfs_root *local_root;
3323 struct btrfs_ordered_extent *ordered;
3324 struct extent_map *em;
3325 struct extent_state *cached_state = NULL;
3326 struct extent_io_tree *io_tree;
3327 u64 physical_for_dev_replace;
3328 u64 len = nocow_ctx->len;
3329 u64 lockstart = offset, lockend = offset + len - 1;
3330 unsigned long index;
3331 int srcu_index;
3332 int ret = 0;
3333 int err = 0;
3334
3335 key.objectid = root;
3336 key.type = BTRFS_ROOT_ITEM_KEY;
3337 key.offset = (u64)-1;
3338
3339 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3340
3341 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3342 if (IS_ERR(local_root)) {
3343 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3344 return PTR_ERR(local_root);
3345 }
3346
3347 key.type = BTRFS_INODE_ITEM_KEY;
3348 key.objectid = inum;
3349 key.offset = 0;
3350 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3351 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3352 if (IS_ERR(inode))
3353 return PTR_ERR(inode);
3354
3355
3356 mutex_lock(&inode->i_mutex);
3357 inode_dio_wait(inode);
3358
3359 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3360 io_tree = &BTRFS_I(inode)->io_tree;
3361
3362 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
3363 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
3364 if (ordered) {
3365 btrfs_put_ordered_extent(ordered);
3366 goto out_unlock;
3367 }
3368
3369 em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
3370 if (IS_ERR(em)) {
3371 ret = PTR_ERR(em);
3372 goto out_unlock;
3373 }
3374
3375
3376
3377
3378
3379 if (em->block_start > nocow_ctx->logical ||
3380 em->block_start + em->block_len < nocow_ctx->logical + len) {
3381 free_extent_map(em);
3382 goto out_unlock;
3383 }
3384 free_extent_map(em);
3385
3386 while (len >= PAGE_CACHE_SIZE) {
3387 index = offset >> PAGE_CACHE_SHIFT;
3388again:
3389 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
3390 if (!page) {
3391 btrfs_err(fs_info, "find_or_create_page() failed");
3392 ret = -ENOMEM;
3393 goto out;
3394 }
3395
3396 if (PageUptodate(page)) {
3397 if (PageDirty(page))
3398 goto next_page;
3399 } else {
3400 ClearPageError(page);
3401 err = extent_read_full_page_nolock(io_tree, page,
3402 btrfs_get_extent,
3403 nocow_ctx->mirror_num);
3404 if (err) {
3405 ret = err;
3406 goto next_page;
3407 }
3408
3409 lock_page(page);
3410
3411
3412
3413
3414
3415
3416 if (page->mapping != inode->i_mapping) {
3417 unlock_page(page);
3418 page_cache_release(page);
3419 goto again;
3420 }
3421 if (!PageUptodate(page)) {
3422 ret = -EIO;
3423 goto next_page;
3424 }
3425 }
3426 err = write_page_nocow(nocow_ctx->sctx,
3427 physical_for_dev_replace, page);
3428 if (err)
3429 ret = err;
3430next_page:
3431 unlock_page(page);
3432 page_cache_release(page);
3433
3434 if (ret)
3435 break;
3436
3437 offset += PAGE_CACHE_SIZE;
3438 physical_for_dev_replace += PAGE_CACHE_SIZE;
3439 len -= PAGE_CACHE_SIZE;
3440 }
3441 ret = COPY_COMPLETE;
3442out_unlock:
3443 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3444 GFP_NOFS);
3445out:
3446 mutex_unlock(&inode->i_mutex);
3447 iput(inode);
3448 return ret;
3449}
3450
3451static int write_page_nocow(struct scrub_ctx *sctx,
3452 u64 physical_for_dev_replace, struct page *page)
3453{
3454 struct bio *bio;
3455 struct btrfs_device *dev;
3456 int ret;
3457
3458 dev = sctx->wr_ctx.tgtdev;
3459 if (!dev)
3460 return -EIO;
3461 if (!dev->bdev) {
3462 printk_ratelimited(KERN_WARNING
3463 "BTRFS: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
3464 return -EIO;
3465 }
3466 bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
3467 if (!bio) {
3468 spin_lock(&sctx->stat_lock);
3469 sctx->stat.malloc_errors++;
3470 spin_unlock(&sctx->stat_lock);
3471 return -ENOMEM;
3472 }
3473 bio->bi_iter.bi_size = 0;
3474 bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
3475 bio->bi_bdev = dev->bdev;
3476 ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
3477 if (ret != PAGE_CACHE_SIZE) {
3478leave_with_eio:
3479 bio_put(bio);
3480 btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
3481 return -EIO;
3482 }
3483
3484 if (btrfsic_submit_bio_wait(WRITE_SYNC, bio))
3485 goto leave_with_eio;
3486
3487 bio_put(bio);
3488 return 0;
3489}
3490