1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/mm.h>
8#include <linux/bio.h>
9#include <linux/slab.h>
10#include <linux/blkdev.h>
11#include <linux/ratelimit.h>
12#include <linux/kthread.h>
13#include <linux/raid/pq.h>
14#include <linux/semaphore.h>
15#include <linux/uuid.h>
16#include <linux/list_sort.h>
17#include "misc.h"
18#include "ctree.h"
19#include "extent_map.h"
20#include "disk-io.h"
21#include "transaction.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "async-thread.h"
26#include "check-integrity.h"
27#include "rcu-string.h"
28#include "dev-replace.h"
29#include "sysfs.h"
30#include "tree-checker.h"
31#include "space-info.h"
32#include "block-group.h"
33#include "discard.h"
34#include "zoned.h"
35
36const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
37 [BTRFS_RAID_RAID10] = {
38 .sub_stripes = 2,
39 .dev_stripes = 1,
40 .devs_max = 0,
41 .devs_min = 2,
42 .tolerated_failures = 1,
43 .devs_increment = 2,
44 .ncopies = 2,
45 .nparity = 0,
46 .raid_name = "raid10",
47 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
48 .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
49 },
50 [BTRFS_RAID_RAID1] = {
51 .sub_stripes = 1,
52 .dev_stripes = 1,
53 .devs_max = 2,
54 .devs_min = 2,
55 .tolerated_failures = 1,
56 .devs_increment = 2,
57 .ncopies = 2,
58 .nparity = 0,
59 .raid_name = "raid1",
60 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
61 .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
62 },
63 [BTRFS_RAID_RAID1C3] = {
64 .sub_stripes = 1,
65 .dev_stripes = 1,
66 .devs_max = 3,
67 .devs_min = 3,
68 .tolerated_failures = 2,
69 .devs_increment = 3,
70 .ncopies = 3,
71 .nparity = 0,
72 .raid_name = "raid1c3",
73 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
74 .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
75 },
76 [BTRFS_RAID_RAID1C4] = {
77 .sub_stripes = 1,
78 .dev_stripes = 1,
79 .devs_max = 4,
80 .devs_min = 4,
81 .tolerated_failures = 3,
82 .devs_increment = 4,
83 .ncopies = 4,
84 .nparity = 0,
85 .raid_name = "raid1c4",
86 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
87 .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
88 },
89 [BTRFS_RAID_DUP] = {
90 .sub_stripes = 1,
91 .dev_stripes = 2,
92 .devs_max = 1,
93 .devs_min = 1,
94 .tolerated_failures = 0,
95 .devs_increment = 1,
96 .ncopies = 2,
97 .nparity = 0,
98 .raid_name = "dup",
99 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
100 .mindev_error = 0,
101 },
102 [BTRFS_RAID_RAID0] = {
103 .sub_stripes = 1,
104 .dev_stripes = 1,
105 .devs_max = 0,
106 .devs_min = 1,
107 .tolerated_failures = 0,
108 .devs_increment = 1,
109 .ncopies = 1,
110 .nparity = 0,
111 .raid_name = "raid0",
112 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
113 .mindev_error = 0,
114 },
115 [BTRFS_RAID_SINGLE] = {
116 .sub_stripes = 1,
117 .dev_stripes = 1,
118 .devs_max = 1,
119 .devs_min = 1,
120 .tolerated_failures = 0,
121 .devs_increment = 1,
122 .ncopies = 1,
123 .nparity = 0,
124 .raid_name = "single",
125 .bg_flag = 0,
126 .mindev_error = 0,
127 },
128 [BTRFS_RAID_RAID5] = {
129 .sub_stripes = 1,
130 .dev_stripes = 1,
131 .devs_max = 0,
132 .devs_min = 2,
133 .tolerated_failures = 1,
134 .devs_increment = 1,
135 .ncopies = 1,
136 .nparity = 1,
137 .raid_name = "raid5",
138 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
139 .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
140 },
141 [BTRFS_RAID_RAID6] = {
142 .sub_stripes = 1,
143 .dev_stripes = 1,
144 .devs_max = 0,
145 .devs_min = 3,
146 .tolerated_failures = 2,
147 .devs_increment = 1,
148 .ncopies = 1,
149 .nparity = 2,
150 .raid_name = "raid6",
151 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
152 .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
153 },
154};
155
156
157
158
159
160enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags)
161{
162 if (flags & BTRFS_BLOCK_GROUP_RAID10)
163 return BTRFS_RAID_RAID10;
164 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
165 return BTRFS_RAID_RAID1;
166 else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
167 return BTRFS_RAID_RAID1C3;
168 else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
169 return BTRFS_RAID_RAID1C4;
170 else if (flags & BTRFS_BLOCK_GROUP_DUP)
171 return BTRFS_RAID_DUP;
172 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
173 return BTRFS_RAID_RAID0;
174 else if (flags & BTRFS_BLOCK_GROUP_RAID5)
175 return BTRFS_RAID_RAID5;
176 else if (flags & BTRFS_BLOCK_GROUP_RAID6)
177 return BTRFS_RAID_RAID6;
178
179 return BTRFS_RAID_SINGLE;
180}
181
182const char *btrfs_bg_type_to_raid_name(u64 flags)
183{
184 const int index = btrfs_bg_flags_to_raid_index(flags);
185
186 if (index >= BTRFS_NR_RAID_TYPES)
187 return NULL;
188
189 return btrfs_raid_array[index].raid_name;
190}
191
192
193
194
195
196void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
197{
198 int i;
199 int ret;
200 char *bp = buf;
201 u64 flags = bg_flags;
202 u32 size_bp = size_buf;
203
204 if (!flags) {
205 strcpy(bp, "NONE");
206 return;
207 }
208
209#define DESCRIBE_FLAG(flag, desc) \
210 do { \
211 if (flags & (flag)) { \
212 ret = snprintf(bp, size_bp, "%s|", (desc)); \
213 if (ret < 0 || ret >= size_bp) \
214 goto out_overflow; \
215 size_bp -= ret; \
216 bp += ret; \
217 flags &= ~(flag); \
218 } \
219 } while (0)
220
221 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
222 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
223 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
224
225 DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
226 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
227 DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
228 btrfs_raid_array[i].raid_name);
229#undef DESCRIBE_FLAG
230
231 if (flags) {
232 ret = snprintf(bp, size_bp, "0x%llx|", flags);
233 size_bp -= ret;
234 }
235
236 if (size_bp < size_buf)
237 buf[size_buf - size_bp - 1] = '\0';
238
239
240
241
242
243out_overflow:;
244}
245
246static int init_first_rw_device(struct btrfs_trans_handle *trans);
247static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
248static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
249static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
250static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
251 enum btrfs_map_op op,
252 u64 logical, u64 *length,
253 struct btrfs_bio **bbio_ret,
254 int mirror_num, int need_raid_map);
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356DEFINE_MUTEX(uuid_mutex);
357static LIST_HEAD(fs_uuids);
358struct list_head * __attribute_const__ btrfs_get_fs_uuids(void)
359{
360 return &fs_uuids;
361}
362
363
364
365
366
367
368
369
370
371
372static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
373 const u8 *metadata_fsid)
374{
375 struct btrfs_fs_devices *fs_devs;
376
377 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
378 if (!fs_devs)
379 return ERR_PTR(-ENOMEM);
380
381 mutex_init(&fs_devs->device_list_mutex);
382
383 INIT_LIST_HEAD(&fs_devs->devices);
384 INIT_LIST_HEAD(&fs_devs->alloc_list);
385 INIT_LIST_HEAD(&fs_devs->fs_list);
386 INIT_LIST_HEAD(&fs_devs->seed_list);
387 if (fsid)
388 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
389
390 if (metadata_fsid)
391 memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
392 else if (fsid)
393 memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
394
395 return fs_devs;
396}
397
398void btrfs_free_device(struct btrfs_device *device)
399{
400 WARN_ON(!list_empty(&device->post_commit_list));
401 rcu_string_free(device->name);
402 extent_io_tree_release(&device->alloc_state);
403 bio_put(device->flush_bio);
404 btrfs_destroy_dev_zone_info(device);
405 kfree(device);
406}
407
408static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
409{
410 struct btrfs_device *device;
411 WARN_ON(fs_devices->opened);
412 while (!list_empty(&fs_devices->devices)) {
413 device = list_entry(fs_devices->devices.next,
414 struct btrfs_device, dev_list);
415 list_del(&device->dev_list);
416 btrfs_free_device(device);
417 }
418 kfree(fs_devices);
419}
420
421void __exit btrfs_cleanup_fs_uuids(void)
422{
423 struct btrfs_fs_devices *fs_devices;
424
425 while (!list_empty(&fs_uuids)) {
426 fs_devices = list_entry(fs_uuids.next,
427 struct btrfs_fs_devices, fs_list);
428 list_del(&fs_devices->fs_list);
429 free_fs_devices(fs_devices);
430 }
431}
432
433static noinline struct btrfs_fs_devices *find_fsid(
434 const u8 *fsid, const u8 *metadata_fsid)
435{
436 struct btrfs_fs_devices *fs_devices;
437
438 ASSERT(fsid);
439
440
441 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
442 if (metadata_fsid) {
443 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
444 && memcmp(metadata_fsid, fs_devices->metadata_uuid,
445 BTRFS_FSID_SIZE) == 0)
446 return fs_devices;
447 } else {
448 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
449 return fs_devices;
450 }
451 }
452 return NULL;
453}
454
455static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
456 struct btrfs_super_block *disk_super)
457{
458
459 struct btrfs_fs_devices *fs_devices;
460
461
462
463
464
465
466
467 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
468 if (fs_devices->fsid_change &&
469 memcmp(disk_super->metadata_uuid, fs_devices->fsid,
470 BTRFS_FSID_SIZE) == 0 &&
471 memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
472 BTRFS_FSID_SIZE) == 0) {
473 return fs_devices;
474 }
475 }
476
477
478
479
480
481
482 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
483 if (fs_devices->fsid_change &&
484 memcmp(fs_devices->metadata_uuid,
485 fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
486 memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
487 BTRFS_FSID_SIZE) == 0) {
488 return fs_devices;
489 }
490 }
491
492 return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
493}
494
495
496static int
497btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
498 int flush, struct block_device **bdev,
499 struct btrfs_super_block **disk_super)
500{
501 int ret;
502
503 *bdev = blkdev_get_by_path(device_path, flags, holder);
504
505 if (IS_ERR(*bdev)) {
506 ret = PTR_ERR(*bdev);
507 goto error;
508 }
509
510 if (flush)
511 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
512 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
513 if (ret) {
514 blkdev_put(*bdev, flags);
515 goto error;
516 }
517 invalidate_bdev(*bdev);
518 *disk_super = btrfs_read_dev_super(*bdev);
519 if (IS_ERR(*disk_super)) {
520 ret = PTR_ERR(*disk_super);
521 blkdev_put(*bdev, flags);
522 goto error;
523 }
524
525 return 0;
526
527error:
528 *bdev = NULL;
529 return ret;
530}
531
532static bool device_path_matched(const char *path, struct btrfs_device *device)
533{
534 int found;
535
536 rcu_read_lock();
537 found = strcmp(rcu_str_deref(device->name), path);
538 rcu_read_unlock();
539
540 return found == 0;
541}
542
543
544
545
546
547
548
549
550
551
552
553
554static int btrfs_free_stale_devices(const char *path,
555 struct btrfs_device *skip_device)
556{
557 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
558 struct btrfs_device *device, *tmp_device;
559 int ret = 0;
560
561 lockdep_assert_held(&uuid_mutex);
562
563 if (path)
564 ret = -ENOENT;
565
566 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
567
568 mutex_lock(&fs_devices->device_list_mutex);
569 list_for_each_entry_safe(device, tmp_device,
570 &fs_devices->devices, dev_list) {
571 if (skip_device && skip_device == device)
572 continue;
573 if (path && !device->name)
574 continue;
575 if (path && !device_path_matched(path, device))
576 continue;
577 if (fs_devices->opened) {
578
579 if (path && ret != 0)
580 ret = -EBUSY;
581 break;
582 }
583
584
585 fs_devices->num_devices--;
586 list_del(&device->dev_list);
587 btrfs_free_device(device);
588
589 ret = 0;
590 }
591 mutex_unlock(&fs_devices->device_list_mutex);
592
593 if (fs_devices->num_devices == 0) {
594 btrfs_sysfs_remove_fsid(fs_devices);
595 list_del(&fs_devices->fs_list);
596 free_fs_devices(fs_devices);
597 }
598 }
599
600 return ret;
601}
602
603
604
605
606
607
608static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
609 struct btrfs_device *device, fmode_t flags,
610 void *holder)
611{
612 struct request_queue *q;
613 struct block_device *bdev;
614 struct btrfs_super_block *disk_super;
615 u64 devid;
616 int ret;
617
618 if (device->bdev)
619 return -EINVAL;
620 if (!device->name)
621 return -EINVAL;
622
623 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
624 &bdev, &disk_super);
625 if (ret)
626 return ret;
627
628 devid = btrfs_stack_device_id(&disk_super->dev_item);
629 if (devid != device->devid)
630 goto error_free_page;
631
632 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
633 goto error_free_page;
634
635 device->generation = btrfs_super_generation(disk_super);
636
637 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
638 if (btrfs_super_incompat_flags(disk_super) &
639 BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
640 pr_err(
641 "BTRFS: Invalid seeding and uuid-changed device detected\n");
642 goto error_free_page;
643 }
644
645 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
646 fs_devices->seeding = true;
647 } else {
648 if (bdev_read_only(bdev))
649 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
650 else
651 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
652 }
653
654 q = bdev_get_queue(bdev);
655 if (!blk_queue_nonrot(q))
656 fs_devices->rotating = true;
657
658 device->bdev = bdev;
659 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
660 device->mode = flags;
661
662 fs_devices->open_devices++;
663 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
664 device->devid != BTRFS_DEV_REPLACE_DEVID) {
665 fs_devices->rw_devices++;
666 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
667 }
668 btrfs_release_disk_super(disk_super);
669
670 return 0;
671
672error_free_page:
673 btrfs_release_disk_super(disk_super);
674 blkdev_put(bdev, flags);
675
676 return -EINVAL;
677}
678
679
680
681
682
683
684
685static struct btrfs_fs_devices *find_fsid_inprogress(
686 struct btrfs_super_block *disk_super)
687{
688 struct btrfs_fs_devices *fs_devices;
689
690 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
691 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
692 BTRFS_FSID_SIZE) != 0 &&
693 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
694 BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
695 return fs_devices;
696 }
697 }
698
699 return find_fsid(disk_super->fsid, NULL);
700}
701
702
703static struct btrfs_fs_devices *find_fsid_changed(
704 struct btrfs_super_block *disk_super)
705{
706 struct btrfs_fs_devices *fs_devices;
707
708
709
710
711
712
713
714
715
716
717 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
718
719 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
720 BTRFS_FSID_SIZE) != 0 &&
721 memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
722 BTRFS_FSID_SIZE) == 0 &&
723 memcmp(fs_devices->fsid, disk_super->fsid,
724 BTRFS_FSID_SIZE) != 0)
725 return fs_devices;
726
727
728 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
729 BTRFS_FSID_SIZE) == 0 &&
730 memcmp(fs_devices->fsid, disk_super->metadata_uuid,
731 BTRFS_FSID_SIZE) == 0)
732 return fs_devices;
733 }
734
735 return NULL;
736}
737
738static struct btrfs_fs_devices *find_fsid_reverted_metadata(
739 struct btrfs_super_block *disk_super)
740{
741 struct btrfs_fs_devices *fs_devices;
742
743
744
745
746
747
748
749
750
751
752 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
753 if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
754 BTRFS_FSID_SIZE) != 0 &&
755 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
756 BTRFS_FSID_SIZE) == 0 &&
757 fs_devices->fsid_change)
758 return fs_devices;
759 }
760
761 return NULL;
762}
763
764
765
766
767
768
769
770static noinline struct btrfs_device *device_list_add(const char *path,
771 struct btrfs_super_block *disk_super,
772 bool *new_device_added)
773{
774 struct btrfs_device *device;
775 struct btrfs_fs_devices *fs_devices = NULL;
776 struct rcu_string *name;
777 u64 found_transid = btrfs_super_generation(disk_super);
778 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
779 bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
780 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
781 bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
782 BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
783
784 if (fsid_change_in_progress) {
785 if (!has_metadata_uuid)
786 fs_devices = find_fsid_inprogress(disk_super);
787 else
788 fs_devices = find_fsid_changed(disk_super);
789 } else if (has_metadata_uuid) {
790 fs_devices = find_fsid_with_metadata_uuid(disk_super);
791 } else {
792 fs_devices = find_fsid_reverted_metadata(disk_super);
793 if (!fs_devices)
794 fs_devices = find_fsid(disk_super->fsid, NULL);
795 }
796
797
798 if (!fs_devices) {
799 if (has_metadata_uuid)
800 fs_devices = alloc_fs_devices(disk_super->fsid,
801 disk_super->metadata_uuid);
802 else
803 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
804
805 if (IS_ERR(fs_devices))
806 return ERR_CAST(fs_devices);
807
808 fs_devices->fsid_change = fsid_change_in_progress;
809
810 mutex_lock(&fs_devices->device_list_mutex);
811 list_add(&fs_devices->fs_list, &fs_uuids);
812
813 device = NULL;
814 } else {
815 mutex_lock(&fs_devices->device_list_mutex);
816 device = btrfs_find_device(fs_devices, devid,
817 disk_super->dev_item.uuid, NULL);
818
819
820
821
822
823
824 if (fs_devices->fsid_change &&
825 found_transid > fs_devices->latest_generation) {
826 memcpy(fs_devices->fsid, disk_super->fsid,
827 BTRFS_FSID_SIZE);
828
829 if (has_metadata_uuid)
830 memcpy(fs_devices->metadata_uuid,
831 disk_super->metadata_uuid,
832 BTRFS_FSID_SIZE);
833 else
834 memcpy(fs_devices->metadata_uuid,
835 disk_super->fsid, BTRFS_FSID_SIZE);
836
837 fs_devices->fsid_change = false;
838 }
839 }
840
841 if (!device) {
842 if (fs_devices->opened) {
843 mutex_unlock(&fs_devices->device_list_mutex);
844 return ERR_PTR(-EBUSY);
845 }
846
847 device = btrfs_alloc_device(NULL, &devid,
848 disk_super->dev_item.uuid);
849 if (IS_ERR(device)) {
850 mutex_unlock(&fs_devices->device_list_mutex);
851
852 return device;
853 }
854
855 name = rcu_string_strdup(path, GFP_NOFS);
856 if (!name) {
857 btrfs_free_device(device);
858 mutex_unlock(&fs_devices->device_list_mutex);
859 return ERR_PTR(-ENOMEM);
860 }
861 rcu_assign_pointer(device->name, name);
862
863 list_add_rcu(&device->dev_list, &fs_devices->devices);
864 fs_devices->num_devices++;
865
866 device->fs_devices = fs_devices;
867 *new_device_added = true;
868
869 if (disk_super->label[0])
870 pr_info(
871 "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
872 disk_super->label, devid, found_transid, path,
873 current->comm, task_pid_nr(current));
874 else
875 pr_info(
876 "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
877 disk_super->fsid, devid, found_transid, path,
878 current->comm, task_pid_nr(current));
879
880 } else if (!device->name || strcmp(device->name->str, path)) {
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907 if (!fs_devices->opened && found_transid < device->generation) {
908
909
910
911
912
913
914
915 mutex_unlock(&fs_devices->device_list_mutex);
916 return ERR_PTR(-EEXIST);
917 }
918
919
920
921
922
923 if (device->bdev) {
924 int error;
925 dev_t path_dev;
926
927 error = lookup_bdev(path, &path_dev);
928 if (error) {
929 mutex_unlock(&fs_devices->device_list_mutex);
930 return ERR_PTR(error);
931 }
932
933 if (device->bdev->bd_dev != path_dev) {
934 mutex_unlock(&fs_devices->device_list_mutex);
935
936
937
938
939
940
941 btrfs_warn_in_rcu(NULL,
942 "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
943 path, devid, found_transid,
944 current->comm,
945 task_pid_nr(current));
946 return ERR_PTR(-EEXIST);
947 }
948 btrfs_info_in_rcu(device->fs_info,
949 "devid %llu device path %s changed to %s scanned by %s (%d)",
950 devid, rcu_str_deref(device->name),
951 path, current->comm,
952 task_pid_nr(current));
953 }
954
955 name = rcu_string_strdup(path, GFP_NOFS);
956 if (!name) {
957 mutex_unlock(&fs_devices->device_list_mutex);
958 return ERR_PTR(-ENOMEM);
959 }
960 rcu_string_free(device->name);
961 rcu_assign_pointer(device->name, name);
962 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
963 fs_devices->missing_devices--;
964 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
965 }
966 }
967
968
969
970
971
972
973
974 if (!fs_devices->opened) {
975 device->generation = found_transid;
976 fs_devices->latest_generation = max_t(u64, found_transid,
977 fs_devices->latest_generation);
978 }
979
980 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
981
982 mutex_unlock(&fs_devices->device_list_mutex);
983 return device;
984}
985
986static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
987{
988 struct btrfs_fs_devices *fs_devices;
989 struct btrfs_device *device;
990 struct btrfs_device *orig_dev;
991 int ret = 0;
992
993 lockdep_assert_held(&uuid_mutex);
994
995 fs_devices = alloc_fs_devices(orig->fsid, NULL);
996 if (IS_ERR(fs_devices))
997 return fs_devices;
998
999 fs_devices->total_devices = orig->total_devices;
1000
1001 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
1002 struct rcu_string *name;
1003
1004 device = btrfs_alloc_device(NULL, &orig_dev->devid,
1005 orig_dev->uuid);
1006 if (IS_ERR(device)) {
1007 ret = PTR_ERR(device);
1008 goto error;
1009 }
1010
1011
1012
1013
1014
1015 if (orig_dev->name) {
1016 name = rcu_string_strdup(orig_dev->name->str,
1017 GFP_KERNEL);
1018 if (!name) {
1019 btrfs_free_device(device);
1020 ret = -ENOMEM;
1021 goto error;
1022 }
1023 rcu_assign_pointer(device->name, name);
1024 }
1025
1026 list_add(&device->dev_list, &fs_devices->devices);
1027 device->fs_devices = fs_devices;
1028 fs_devices->num_devices++;
1029 }
1030 return fs_devices;
1031error:
1032 free_fs_devices(fs_devices);
1033 return ERR_PTR(ret);
1034}
1035
1036static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
1037 struct btrfs_device **latest_dev)
1038{
1039 struct btrfs_device *device, *next;
1040
1041
1042 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1043 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)) {
1044 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1045 &device->dev_state) &&
1046 !test_bit(BTRFS_DEV_STATE_MISSING,
1047 &device->dev_state) &&
1048 (!*latest_dev ||
1049 device->generation > (*latest_dev)->generation)) {
1050 *latest_dev = device;
1051 }
1052 continue;
1053 }
1054
1055
1056
1057
1058
1059 if (device->devid == BTRFS_DEV_REPLACE_DEVID)
1060 continue;
1061
1062 if (device->bdev) {
1063 blkdev_put(device->bdev, device->mode);
1064 device->bdev = NULL;
1065 fs_devices->open_devices--;
1066 }
1067 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1068 list_del_init(&device->dev_alloc_list);
1069 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1070 fs_devices->rw_devices--;
1071 }
1072 list_del_init(&device->dev_list);
1073 fs_devices->num_devices--;
1074 btrfs_free_device(device);
1075 }
1076
1077}
1078
1079
1080
1081
1082
1083void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
1084{
1085 struct btrfs_device *latest_dev = NULL;
1086 struct btrfs_fs_devices *seed_dev;
1087
1088 mutex_lock(&uuid_mutex);
1089 __btrfs_free_extra_devids(fs_devices, &latest_dev);
1090
1091 list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
1092 __btrfs_free_extra_devids(seed_dev, &latest_dev);
1093
1094 fs_devices->latest_bdev = latest_dev->bdev;
1095
1096 mutex_unlock(&uuid_mutex);
1097}
1098
1099static void btrfs_close_bdev(struct btrfs_device *device)
1100{
1101 if (!device->bdev)
1102 return;
1103
1104 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1105 sync_blockdev(device->bdev);
1106 invalidate_bdev(device->bdev);
1107 }
1108
1109 blkdev_put(device->bdev, device->mode);
1110}
1111
1112static void btrfs_close_one_device(struct btrfs_device *device)
1113{
1114 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1115
1116 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1117 device->devid != BTRFS_DEV_REPLACE_DEVID) {
1118 list_del_init(&device->dev_alloc_list);
1119 fs_devices->rw_devices--;
1120 }
1121
1122 if (device->devid == BTRFS_DEV_REPLACE_DEVID)
1123 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
1124
1125 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1126 fs_devices->missing_devices--;
1127
1128 btrfs_close_bdev(device);
1129 if (device->bdev) {
1130 fs_devices->open_devices--;
1131 device->bdev = NULL;
1132 }
1133 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1134 btrfs_destroy_dev_zone_info(device);
1135
1136 device->fs_info = NULL;
1137 atomic_set(&device->dev_stats_ccnt, 0);
1138 extent_io_tree_release(&device->alloc_state);
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151 device->last_flush_error = 0;
1152
1153
1154 ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
1155 ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1156 ASSERT(list_empty(&device->dev_alloc_list));
1157 ASSERT(list_empty(&device->post_commit_list));
1158 ASSERT(atomic_read(&device->reada_in_flight) == 0);
1159}
1160
1161static void close_fs_devices(struct btrfs_fs_devices *fs_devices)
1162{
1163 struct btrfs_device *device, *tmp;
1164
1165 lockdep_assert_held(&uuid_mutex);
1166
1167 if (--fs_devices->opened > 0)
1168 return;
1169
1170 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list)
1171 btrfs_close_one_device(device);
1172
1173 WARN_ON(fs_devices->open_devices);
1174 WARN_ON(fs_devices->rw_devices);
1175 fs_devices->opened = 0;
1176 fs_devices->seeding = false;
1177 fs_devices->fs_info = NULL;
1178}
1179
1180void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1181{
1182 LIST_HEAD(list);
1183 struct btrfs_fs_devices *tmp;
1184
1185 mutex_lock(&uuid_mutex);
1186 close_fs_devices(fs_devices);
1187 if (!fs_devices->opened)
1188 list_splice_init(&fs_devices->seed_list, &list);
1189
1190 list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
1191 close_fs_devices(fs_devices);
1192 list_del(&fs_devices->seed_list);
1193 free_fs_devices(fs_devices);
1194 }
1195 mutex_unlock(&uuid_mutex);
1196}
1197
1198static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
1199 fmode_t flags, void *holder)
1200{
1201 struct btrfs_device *device;
1202 struct btrfs_device *latest_dev = NULL;
1203 struct btrfs_device *tmp_device;
1204
1205 flags |= FMODE_EXCL;
1206
1207 list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
1208 dev_list) {
1209 int ret;
1210
1211 ret = btrfs_open_one_device(fs_devices, device, flags, holder);
1212 if (ret == 0 &&
1213 (!latest_dev || device->generation > latest_dev->generation)) {
1214 latest_dev = device;
1215 } else if (ret == -ENODATA) {
1216 fs_devices->num_devices--;
1217 list_del(&device->dev_list);
1218 btrfs_free_device(device);
1219 }
1220 }
1221 if (fs_devices->open_devices == 0)
1222 return -EINVAL;
1223
1224 fs_devices->opened = 1;
1225 fs_devices->latest_bdev = latest_dev->bdev;
1226 fs_devices->total_rw_bytes = 0;
1227 fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
1228 fs_devices->read_policy = BTRFS_READ_POLICY_PID;
1229
1230 return 0;
1231}
1232
1233static int devid_cmp(void *priv, const struct list_head *a,
1234 const struct list_head *b)
1235{
1236 const struct btrfs_device *dev1, *dev2;
1237
1238 dev1 = list_entry(a, struct btrfs_device, dev_list);
1239 dev2 = list_entry(b, struct btrfs_device, dev_list);
1240
1241 if (dev1->devid < dev2->devid)
1242 return -1;
1243 else if (dev1->devid > dev2->devid)
1244 return 1;
1245 return 0;
1246}
1247
1248int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1249 fmode_t flags, void *holder)
1250{
1251 int ret;
1252
1253 lockdep_assert_held(&uuid_mutex);
1254
1255
1256
1257
1258
1259
1260
1261
1262 if (fs_devices->opened) {
1263 fs_devices->opened++;
1264 ret = 0;
1265 } else {
1266 list_sort(NULL, &fs_devices->devices, devid_cmp);
1267 ret = open_fs_devices(fs_devices, flags, holder);
1268 }
1269
1270 return ret;
1271}
1272
1273void btrfs_release_disk_super(struct btrfs_super_block *super)
1274{
1275 struct page *page = virt_to_page(super);
1276
1277 put_page(page);
1278}
1279
1280static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
1281 u64 bytenr, u64 bytenr_orig)
1282{
1283 struct btrfs_super_block *disk_super;
1284 struct page *page;
1285 void *p;
1286 pgoff_t index;
1287
1288
1289 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1290 return ERR_PTR(-EINVAL);
1291
1292
1293 if (sizeof(*disk_super) > PAGE_SIZE)
1294 return ERR_PTR(-EINVAL);
1295
1296
1297 index = bytenr >> PAGE_SHIFT;
1298 if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
1299 return ERR_PTR(-EINVAL);
1300
1301
1302 page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
1303
1304 if (IS_ERR(page))
1305 return ERR_CAST(page);
1306
1307 p = page_address(page);
1308
1309
1310 disk_super = p + offset_in_page(bytenr);
1311
1312 if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
1313 btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
1314 btrfs_release_disk_super(p);
1315 return ERR_PTR(-EINVAL);
1316 }
1317
1318 if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
1319 disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
1320
1321 return disk_super;
1322}
1323
1324int btrfs_forget_devices(const char *path)
1325{
1326 int ret;
1327
1328 mutex_lock(&uuid_mutex);
1329 ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
1330 mutex_unlock(&uuid_mutex);
1331
1332 return ret;
1333}
1334
1335
1336
1337
1338
1339
1340struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1341 void *holder)
1342{
1343 struct btrfs_super_block *disk_super;
1344 bool new_device_added = false;
1345 struct btrfs_device *device = NULL;
1346 struct block_device *bdev;
1347 u64 bytenr, bytenr_orig;
1348 int ret;
1349
1350 lockdep_assert_held(&uuid_mutex);
1351
1352
1353
1354
1355
1356
1357
1358 flags |= FMODE_EXCL;
1359
1360 bdev = blkdev_get_by_path(path, flags, holder);
1361 if (IS_ERR(bdev))
1362 return ERR_CAST(bdev);
1363
1364 bytenr_orig = btrfs_sb_offset(0);
1365 ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
1366 if (ret)
1367 return ERR_PTR(ret);
1368
1369 disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
1370 if (IS_ERR(disk_super)) {
1371 device = ERR_CAST(disk_super);
1372 goto error_bdev_put;
1373 }
1374
1375 device = device_list_add(path, disk_super, &new_device_added);
1376 if (!IS_ERR(device)) {
1377 if (new_device_added)
1378 btrfs_free_stale_devices(path, device);
1379 }
1380
1381 btrfs_release_disk_super(disk_super);
1382
1383error_bdev_put:
1384 blkdev_put(bdev, flags);
1385
1386 return device;
1387}
1388
1389
1390
1391
1392
1393static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1394 u64 len)
1395{
1396 u64 physical_start, physical_end;
1397
1398 lockdep_assert_held(&device->fs_info->chunk_mutex);
1399
1400 if (!find_first_extent_bit(&device->alloc_state, *start,
1401 &physical_start, &physical_end,
1402 CHUNK_ALLOCATED, NULL)) {
1403
1404 if (in_range(physical_start, *start, len) ||
1405 in_range(*start, physical_start,
1406 physical_end - physical_start)) {
1407 *start = physical_end + 1;
1408 return true;
1409 }
1410 }
1411 return false;
1412}
1413
1414static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
1415{
1416 switch (device->fs_devices->chunk_alloc_policy) {
1417 case BTRFS_CHUNK_ALLOC_REGULAR:
1418
1419
1420
1421
1422
1423 return max_t(u64, start, SZ_1M);
1424 case BTRFS_CHUNK_ALLOC_ZONED:
1425
1426
1427
1428
1429
1430 return ALIGN(start, device->zone_info->zone_size);
1431 default:
1432 BUG();
1433 }
1434}
1435
1436static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
1437 u64 *hole_start, u64 *hole_size,
1438 u64 num_bytes)
1439{
1440 u64 zone_size = device->zone_info->zone_size;
1441 u64 pos;
1442 int ret;
1443 bool changed = false;
1444
1445 ASSERT(IS_ALIGNED(*hole_start, zone_size));
1446
1447 while (*hole_size > 0) {
1448 pos = btrfs_find_allocatable_zones(device, *hole_start,
1449 *hole_start + *hole_size,
1450 num_bytes);
1451 if (pos != *hole_start) {
1452 *hole_size = *hole_start + *hole_size - pos;
1453 *hole_start = pos;
1454 changed = true;
1455 if (*hole_size < num_bytes)
1456 break;
1457 }
1458
1459 ret = btrfs_ensure_empty_zones(device, pos, num_bytes);
1460
1461
1462 if (!ret)
1463 return changed;
1464
1465
1466 if (ret == -ERANGE) {
1467 *hole_start += *hole_size;
1468 *hole_size = 0;
1469 return true;
1470 }
1471
1472 *hole_start += zone_size;
1473 *hole_size -= zone_size;
1474 changed = true;
1475 }
1476
1477 return changed;
1478}
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
1491 u64 *hole_size, u64 num_bytes)
1492{
1493 bool changed = false;
1494 u64 hole_end = *hole_start + *hole_size;
1495
1496 for (;;) {
1497
1498
1499
1500
1501 if (contains_pending_extent(device, hole_start, *hole_size)) {
1502 if (hole_end >= *hole_start)
1503 *hole_size = hole_end - *hole_start;
1504 else
1505 *hole_size = 0;
1506 changed = true;
1507 }
1508
1509 switch (device->fs_devices->chunk_alloc_policy) {
1510 case BTRFS_CHUNK_ALLOC_REGULAR:
1511
1512 break;
1513 case BTRFS_CHUNK_ALLOC_ZONED:
1514 if (dev_extent_hole_check_zoned(device, hole_start,
1515 hole_size, num_bytes)) {
1516 changed = true;
1517
1518
1519
1520
1521 continue;
1522 }
1523 break;
1524 default:
1525 BUG();
1526 }
1527
1528 break;
1529 }
1530
1531 return changed;
1532}
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561static int find_free_dev_extent_start(struct btrfs_device *device,
1562 u64 num_bytes, u64 search_start, u64 *start,
1563 u64 *len)
1564{
1565 struct btrfs_fs_info *fs_info = device->fs_info;
1566 struct btrfs_root *root = fs_info->dev_root;
1567 struct btrfs_key key;
1568 struct btrfs_dev_extent *dev_extent;
1569 struct btrfs_path *path;
1570 u64 hole_size;
1571 u64 max_hole_start;
1572 u64 max_hole_size;
1573 u64 extent_end;
1574 u64 search_end = device->total_bytes;
1575 int ret;
1576 int slot;
1577 struct extent_buffer *l;
1578
1579 search_start = dev_extent_search_start(device, search_start);
1580
1581 WARN_ON(device->zone_info &&
1582 !IS_ALIGNED(num_bytes, device->zone_info->zone_size));
1583
1584 path = btrfs_alloc_path();
1585 if (!path)
1586 return -ENOMEM;
1587
1588 max_hole_start = search_start;
1589 max_hole_size = 0;
1590
1591again:
1592 if (search_start >= search_end ||
1593 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1594 ret = -ENOSPC;
1595 goto out;
1596 }
1597
1598 path->reada = READA_FORWARD;
1599 path->search_commit_root = 1;
1600 path->skip_locking = 1;
1601
1602 key.objectid = device->devid;
1603 key.offset = search_start;
1604 key.type = BTRFS_DEV_EXTENT_KEY;
1605
1606 ret = btrfs_search_backwards(root, &key, path);
1607 if (ret < 0)
1608 goto out;
1609
1610 while (1) {
1611 l = path->nodes[0];
1612 slot = path->slots[0];
1613 if (slot >= btrfs_header_nritems(l)) {
1614 ret = btrfs_next_leaf(root, path);
1615 if (ret == 0)
1616 continue;
1617 if (ret < 0)
1618 goto out;
1619
1620 break;
1621 }
1622 btrfs_item_key_to_cpu(l, &key, slot);
1623
1624 if (key.objectid < device->devid)
1625 goto next;
1626
1627 if (key.objectid > device->devid)
1628 break;
1629
1630 if (key.type != BTRFS_DEV_EXTENT_KEY)
1631 goto next;
1632
1633 if (key.offset > search_start) {
1634 hole_size = key.offset - search_start;
1635 dev_extent_hole_check(device, &search_start, &hole_size,
1636 num_bytes);
1637
1638 if (hole_size > max_hole_size) {
1639 max_hole_start = search_start;
1640 max_hole_size = hole_size;
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652 if (hole_size >= num_bytes) {
1653 ret = 0;
1654 goto out;
1655 }
1656 }
1657
1658 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1659 extent_end = key.offset + btrfs_dev_extent_length(l,
1660 dev_extent);
1661 if (extent_end > search_start)
1662 search_start = extent_end;
1663next:
1664 path->slots[0]++;
1665 cond_resched();
1666 }
1667
1668
1669
1670
1671
1672
1673 if (search_end > search_start) {
1674 hole_size = search_end - search_start;
1675 if (dev_extent_hole_check(device, &search_start, &hole_size,
1676 num_bytes)) {
1677 btrfs_release_path(path);
1678 goto again;
1679 }
1680
1681 if (hole_size > max_hole_size) {
1682 max_hole_start = search_start;
1683 max_hole_size = hole_size;
1684 }
1685 }
1686
1687
1688 if (max_hole_size < num_bytes)
1689 ret = -ENOSPC;
1690 else
1691 ret = 0;
1692
1693out:
1694 btrfs_free_path(path);
1695 *start = max_hole_start;
1696 if (len)
1697 *len = max_hole_size;
1698 return ret;
1699}
1700
1701int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1702 u64 *start, u64 *len)
1703{
1704
1705 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1706}
1707
1708static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1709 struct btrfs_device *device,
1710 u64 start, u64 *dev_extent_len)
1711{
1712 struct btrfs_fs_info *fs_info = device->fs_info;
1713 struct btrfs_root *root = fs_info->dev_root;
1714 int ret;
1715 struct btrfs_path *path;
1716 struct btrfs_key key;
1717 struct btrfs_key found_key;
1718 struct extent_buffer *leaf = NULL;
1719 struct btrfs_dev_extent *extent = NULL;
1720
1721 path = btrfs_alloc_path();
1722 if (!path)
1723 return -ENOMEM;
1724
1725 key.objectid = device->devid;
1726 key.offset = start;
1727 key.type = BTRFS_DEV_EXTENT_KEY;
1728again:
1729 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1730 if (ret > 0) {
1731 ret = btrfs_previous_item(root, path, key.objectid,
1732 BTRFS_DEV_EXTENT_KEY);
1733 if (ret)
1734 goto out;
1735 leaf = path->nodes[0];
1736 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1737 extent = btrfs_item_ptr(leaf, path->slots[0],
1738 struct btrfs_dev_extent);
1739 BUG_ON(found_key.offset > start || found_key.offset +
1740 btrfs_dev_extent_length(leaf, extent) < start);
1741 key = found_key;
1742 btrfs_release_path(path);
1743 goto again;
1744 } else if (ret == 0) {
1745 leaf = path->nodes[0];
1746 extent = btrfs_item_ptr(leaf, path->slots[0],
1747 struct btrfs_dev_extent);
1748 } else {
1749 goto out;
1750 }
1751
1752 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1753
1754 ret = btrfs_del_item(trans, root, path);
1755 if (ret == 0)
1756 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1757out:
1758 btrfs_free_path(path);
1759 return ret;
1760}
1761
1762static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1763{
1764 struct extent_map_tree *em_tree;
1765 struct extent_map *em;
1766 struct rb_node *n;
1767 u64 ret = 0;
1768
1769 em_tree = &fs_info->mapping_tree;
1770 read_lock(&em_tree->lock);
1771 n = rb_last(&em_tree->map.rb_root);
1772 if (n) {
1773 em = rb_entry(n, struct extent_map, rb_node);
1774 ret = em->start + em->len;
1775 }
1776 read_unlock(&em_tree->lock);
1777
1778 return ret;
1779}
1780
1781static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1782 u64 *devid_ret)
1783{
1784 int ret;
1785 struct btrfs_key key;
1786 struct btrfs_key found_key;
1787 struct btrfs_path *path;
1788
1789 path = btrfs_alloc_path();
1790 if (!path)
1791 return -ENOMEM;
1792
1793 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1794 key.type = BTRFS_DEV_ITEM_KEY;
1795 key.offset = (u64)-1;
1796
1797 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1798 if (ret < 0)
1799 goto error;
1800
1801 if (ret == 0) {
1802
1803 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
1804 ret = -EUCLEAN;
1805 goto error;
1806 }
1807
1808 ret = btrfs_previous_item(fs_info->chunk_root, path,
1809 BTRFS_DEV_ITEMS_OBJECTID,
1810 BTRFS_DEV_ITEM_KEY);
1811 if (ret) {
1812 *devid_ret = 1;
1813 } else {
1814 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1815 path->slots[0]);
1816 *devid_ret = found_key.offset + 1;
1817 }
1818 ret = 0;
1819error:
1820 btrfs_free_path(path);
1821 return ret;
1822}
1823
1824
1825
1826
1827
1828static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1829 struct btrfs_device *device)
1830{
1831 int ret;
1832 struct btrfs_path *path;
1833 struct btrfs_dev_item *dev_item;
1834 struct extent_buffer *leaf;
1835 struct btrfs_key key;
1836 unsigned long ptr;
1837
1838 path = btrfs_alloc_path();
1839 if (!path)
1840 return -ENOMEM;
1841
1842 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1843 key.type = BTRFS_DEV_ITEM_KEY;
1844 key.offset = device->devid;
1845
1846 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1847 &key, sizeof(*dev_item));
1848 if (ret)
1849 goto out;
1850
1851 leaf = path->nodes[0];
1852 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1853
1854 btrfs_set_device_id(leaf, dev_item, device->devid);
1855 btrfs_set_device_generation(leaf, dev_item, 0);
1856 btrfs_set_device_type(leaf, dev_item, device->type);
1857 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1858 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1859 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1860 btrfs_set_device_total_bytes(leaf, dev_item,
1861 btrfs_device_get_disk_total_bytes(device));
1862 btrfs_set_device_bytes_used(leaf, dev_item,
1863 btrfs_device_get_bytes_used(device));
1864 btrfs_set_device_group(leaf, dev_item, 0);
1865 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1866 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1867 btrfs_set_device_start_offset(leaf, dev_item, 0);
1868
1869 ptr = btrfs_device_uuid(dev_item);
1870 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1871 ptr = btrfs_device_fsid(dev_item);
1872 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
1873 ptr, BTRFS_FSID_SIZE);
1874 btrfs_mark_buffer_dirty(leaf);
1875
1876 ret = 0;
1877out:
1878 btrfs_free_path(path);
1879 return ret;
1880}
1881
1882
1883
1884
1885
1886static void update_dev_time(struct block_device *bdev)
1887{
1888 struct inode *inode = bdev->bd_inode;
1889 struct timespec64 now;
1890
1891
1892 if (!inode)
1893 return;
1894
1895 now = current_time(inode);
1896 generic_update_time(inode, &now, S_MTIME | S_CTIME);
1897}
1898
1899static int btrfs_rm_dev_item(struct btrfs_device *device)
1900{
1901 struct btrfs_root *root = device->fs_info->chunk_root;
1902 int ret;
1903 struct btrfs_path *path;
1904 struct btrfs_key key;
1905 struct btrfs_trans_handle *trans;
1906
1907 path = btrfs_alloc_path();
1908 if (!path)
1909 return -ENOMEM;
1910
1911 trans = btrfs_start_transaction(root, 0);
1912 if (IS_ERR(trans)) {
1913 btrfs_free_path(path);
1914 return PTR_ERR(trans);
1915 }
1916 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1917 key.type = BTRFS_DEV_ITEM_KEY;
1918 key.offset = device->devid;
1919
1920 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1921 if (ret) {
1922 if (ret > 0)
1923 ret = -ENOENT;
1924 btrfs_abort_transaction(trans, ret);
1925 btrfs_end_transaction(trans);
1926 goto out;
1927 }
1928
1929 ret = btrfs_del_item(trans, root, path);
1930 if (ret) {
1931 btrfs_abort_transaction(trans, ret);
1932 btrfs_end_transaction(trans);
1933 }
1934
1935out:
1936 btrfs_free_path(path);
1937 if (!ret)
1938 ret = btrfs_commit_transaction(trans);
1939 return ret;
1940}
1941
1942
1943
1944
1945
1946
1947static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
1948 u64 num_devices)
1949{
1950 u64 all_avail;
1951 unsigned seq;
1952 int i;
1953
1954 do {
1955 seq = read_seqbegin(&fs_info->profiles_lock);
1956
1957 all_avail = fs_info->avail_data_alloc_bits |
1958 fs_info->avail_system_alloc_bits |
1959 fs_info->avail_metadata_alloc_bits;
1960 } while (read_seqretry(&fs_info->profiles_lock, seq));
1961
1962 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1963 if (!(all_avail & btrfs_raid_array[i].bg_flag))
1964 continue;
1965
1966 if (num_devices < btrfs_raid_array[i].devs_min)
1967 return btrfs_raid_array[i].mindev_error;
1968 }
1969
1970 return 0;
1971}
1972
1973static struct btrfs_device * btrfs_find_next_active_device(
1974 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
1975{
1976 struct btrfs_device *next_device;
1977
1978 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
1979 if (next_device != device &&
1980 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
1981 && next_device->bdev)
1982 return next_device;
1983 }
1984
1985 return NULL;
1986}
1987
1988
1989
1990
1991
1992
1993
1994void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
1995 struct btrfs_device *next_device)
1996{
1997 struct btrfs_fs_info *fs_info = device->fs_info;
1998
1999 if (!next_device)
2000 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
2001 device);
2002 ASSERT(next_device);
2003
2004 if (fs_info->sb->s_bdev &&
2005 (fs_info->sb->s_bdev == device->bdev))
2006 fs_info->sb->s_bdev = next_device->bdev;
2007
2008 if (fs_info->fs_devices->latest_bdev == device->bdev)
2009 fs_info->fs_devices->latest_bdev = next_device->bdev;
2010}
2011
2012
2013
2014
2015
2016static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
2017{
2018 u64 num_devices = fs_info->fs_devices->num_devices;
2019
2020 down_read(&fs_info->dev_replace.rwsem);
2021 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
2022 ASSERT(num_devices > 1);
2023 num_devices--;
2024 }
2025 up_read(&fs_info->dev_replace.rwsem);
2026
2027 return num_devices;
2028}
2029
2030void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
2031 struct block_device *bdev,
2032 const char *device_path)
2033{
2034 struct btrfs_super_block *disk_super;
2035 int copy_num;
2036
2037 if (!bdev)
2038 return;
2039
2040 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
2041 struct page *page;
2042 int ret;
2043
2044 disk_super = btrfs_read_dev_one_super(bdev, copy_num);
2045 if (IS_ERR(disk_super))
2046 continue;
2047
2048 if (bdev_is_zoned(bdev)) {
2049 btrfs_reset_sb_log_zones(bdev, copy_num);
2050 continue;
2051 }
2052
2053 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
2054
2055 page = virt_to_page(disk_super);
2056 set_page_dirty(page);
2057 lock_page(page);
2058
2059 ret = write_one_page(page);
2060 if (ret)
2061 btrfs_warn(fs_info,
2062 "error clearing superblock number %d (%d)",
2063 copy_num, ret);
2064 btrfs_release_disk_super(disk_super);
2065
2066 }
2067
2068
2069 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
2070
2071
2072 update_dev_time(bdev);
2073}
2074
2075int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2076 u64 devid, struct block_device **bdev, fmode_t *mode)
2077{
2078 struct btrfs_device *device;
2079 struct btrfs_fs_devices *cur_devices;
2080 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2081 u64 num_devices;
2082 int ret = 0;
2083
2084 mutex_lock(&uuid_mutex);
2085
2086 num_devices = btrfs_num_devices(fs_info);
2087
2088 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
2089 if (ret)
2090 goto out;
2091
2092 device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
2093
2094 if (IS_ERR(device)) {
2095 if (PTR_ERR(device) == -ENOENT &&
2096 device_path && strcmp(device_path, "missing") == 0)
2097 ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2098 else
2099 ret = PTR_ERR(device);
2100 goto out;
2101 }
2102
2103 if (btrfs_pinned_by_swapfile(fs_info, device)) {
2104 btrfs_warn_in_rcu(fs_info,
2105 "cannot remove device %s (devid %llu) due to active swapfile",
2106 rcu_str_deref(device->name), device->devid);
2107 ret = -ETXTBSY;
2108 goto out;
2109 }
2110
2111 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2112 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
2113 goto out;
2114 }
2115
2116 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
2117 fs_info->fs_devices->rw_devices == 1) {
2118 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
2119 goto out;
2120 }
2121
2122 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2123 mutex_lock(&fs_info->chunk_mutex);
2124 list_del_init(&device->dev_alloc_list);
2125 device->fs_devices->rw_devices--;
2126 mutex_unlock(&fs_info->chunk_mutex);
2127 }
2128
2129 mutex_unlock(&uuid_mutex);
2130 ret = btrfs_shrink_device(device, 0);
2131 if (!ret)
2132 btrfs_reada_remove_dev(device);
2133 mutex_lock(&uuid_mutex);
2134 if (ret)
2135 goto error_undo;
2136
2137
2138
2139
2140
2141
2142 ret = btrfs_rm_dev_item(device);
2143 if (ret)
2144 goto error_undo;
2145
2146 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2147 btrfs_scrub_cancel_dev(device);
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164 cur_devices = device->fs_devices;
2165 mutex_lock(&fs_devices->device_list_mutex);
2166 list_del_rcu(&device->dev_list);
2167
2168 cur_devices->num_devices--;
2169 cur_devices->total_devices--;
2170
2171 if (cur_devices != fs_devices)
2172 fs_devices->total_devices--;
2173
2174 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2175 cur_devices->missing_devices--;
2176
2177 btrfs_assign_next_active_device(device, NULL);
2178
2179 if (device->bdev) {
2180 cur_devices->open_devices--;
2181
2182 btrfs_sysfs_remove_device(device);
2183 }
2184
2185 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2186 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2187 mutex_unlock(&fs_devices->device_list_mutex);
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2200 btrfs_scratch_superblocks(fs_info, device->bdev,
2201 device->name->str);
2202 if (device->bdev) {
2203 sync_blockdev(device->bdev);
2204 invalidate_bdev(device->bdev);
2205 }
2206 }
2207
2208 *bdev = device->bdev;
2209 *mode = device->mode;
2210 synchronize_rcu();
2211 btrfs_free_device(device);
2212
2213 if (cur_devices->open_devices == 0) {
2214 list_del_init(&cur_devices->seed_list);
2215 close_fs_devices(cur_devices);
2216 free_fs_devices(cur_devices);
2217 }
2218
2219out:
2220 mutex_unlock(&uuid_mutex);
2221 return ret;
2222
2223error_undo:
2224 btrfs_reada_undo_remove_dev(device);
2225 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2226 mutex_lock(&fs_info->chunk_mutex);
2227 list_add(&device->dev_alloc_list,
2228 &fs_devices->alloc_list);
2229 device->fs_devices->rw_devices++;
2230 mutex_unlock(&fs_info->chunk_mutex);
2231 }
2232 goto out;
2233}
2234
2235void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2236{
2237 struct btrfs_fs_devices *fs_devices;
2238
2239 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2240
2241
2242
2243
2244
2245
2246
2247 fs_devices = srcdev->fs_devices;
2248
2249 list_del_rcu(&srcdev->dev_list);
2250 list_del(&srcdev->dev_alloc_list);
2251 fs_devices->num_devices--;
2252 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2253 fs_devices->missing_devices--;
2254
2255 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2256 fs_devices->rw_devices--;
2257
2258 if (srcdev->bdev)
2259 fs_devices->open_devices--;
2260}
2261
2262void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2263{
2264 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2265
2266 mutex_lock(&uuid_mutex);
2267
2268 btrfs_close_bdev(srcdev);
2269 synchronize_rcu();
2270 btrfs_free_device(srcdev);
2271
2272
2273 if (!fs_devices->num_devices) {
2274
2275
2276
2277
2278
2279
2280 ASSERT(fs_devices->seeding);
2281
2282 list_del_init(&fs_devices->seed_list);
2283 close_fs_devices(fs_devices);
2284 free_fs_devices(fs_devices);
2285 }
2286 mutex_unlock(&uuid_mutex);
2287}
2288
2289void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2290{
2291 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2292
2293 mutex_lock(&fs_devices->device_list_mutex);
2294
2295 btrfs_sysfs_remove_device(tgtdev);
2296
2297 if (tgtdev->bdev)
2298 fs_devices->open_devices--;
2299
2300 fs_devices->num_devices--;
2301
2302 btrfs_assign_next_active_device(tgtdev, NULL);
2303
2304 list_del_rcu(&tgtdev->dev_list);
2305
2306 mutex_unlock(&fs_devices->device_list_mutex);
2307
2308
2309
2310
2311
2312
2313
2314
2315 btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
2316 tgtdev->name->str);
2317
2318 btrfs_close_bdev(tgtdev);
2319 synchronize_rcu();
2320 btrfs_free_device(tgtdev);
2321}
2322
2323static struct btrfs_device *btrfs_find_device_by_path(
2324 struct btrfs_fs_info *fs_info, const char *device_path)
2325{
2326 int ret = 0;
2327 struct btrfs_super_block *disk_super;
2328 u64 devid;
2329 u8 *dev_uuid;
2330 struct block_device *bdev;
2331 struct btrfs_device *device;
2332
2333 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2334 fs_info->bdev_holder, 0, &bdev, &disk_super);
2335 if (ret)
2336 return ERR_PTR(ret);
2337
2338 devid = btrfs_stack_device_id(&disk_super->dev_item);
2339 dev_uuid = disk_super->dev_item.uuid;
2340 if (btrfs_fs_incompat(fs_info, METADATA_UUID))
2341 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2342 disk_super->metadata_uuid);
2343 else
2344 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2345 disk_super->fsid);
2346
2347 btrfs_release_disk_super(disk_super);
2348 if (!device)
2349 device = ERR_PTR(-ENOENT);
2350 blkdev_put(bdev, FMODE_READ);
2351 return device;
2352}
2353
2354
2355
2356
2357struct btrfs_device *btrfs_find_device_by_devspec(
2358 struct btrfs_fs_info *fs_info, u64 devid,
2359 const char *device_path)
2360{
2361 struct btrfs_device *device;
2362
2363 if (devid) {
2364 device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
2365 NULL);
2366 if (!device)
2367 return ERR_PTR(-ENOENT);
2368 return device;
2369 }
2370
2371 if (!device_path || !device_path[0])
2372 return ERR_PTR(-EINVAL);
2373
2374 if (strcmp(device_path, "missing") == 0) {
2375
2376 list_for_each_entry(device, &fs_info->fs_devices->devices,
2377 dev_list) {
2378 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2379 &device->dev_state) && !device->bdev)
2380 return device;
2381 }
2382 return ERR_PTR(-ENOENT);
2383 }
2384
2385 return btrfs_find_device_by_path(fs_info, device_path);
2386}
2387
2388
2389
2390
2391static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2392{
2393 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2394 struct btrfs_fs_devices *old_devices;
2395 struct btrfs_fs_devices *seed_devices;
2396 struct btrfs_super_block *disk_super = fs_info->super_copy;
2397 struct btrfs_device *device;
2398 u64 super_flags;
2399
2400 lockdep_assert_held(&uuid_mutex);
2401 if (!fs_devices->seeding)
2402 return -EINVAL;
2403
2404
2405
2406
2407
2408 seed_devices = alloc_fs_devices(NULL, NULL);
2409 if (IS_ERR(seed_devices))
2410 return PTR_ERR(seed_devices);
2411
2412
2413
2414
2415
2416
2417
2418 old_devices = clone_fs_devices(fs_devices);
2419 if (IS_ERR(old_devices)) {
2420 kfree(seed_devices);
2421 return PTR_ERR(old_devices);
2422 }
2423
2424 list_add(&old_devices->fs_list, &fs_uuids);
2425
2426 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2427 seed_devices->opened = 1;
2428 INIT_LIST_HEAD(&seed_devices->devices);
2429 INIT_LIST_HEAD(&seed_devices->alloc_list);
2430 mutex_init(&seed_devices->device_list_mutex);
2431
2432 mutex_lock(&fs_devices->device_list_mutex);
2433 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2434 synchronize_rcu);
2435 list_for_each_entry(device, &seed_devices->devices, dev_list)
2436 device->fs_devices = seed_devices;
2437
2438 fs_devices->seeding = false;
2439 fs_devices->num_devices = 0;
2440 fs_devices->open_devices = 0;
2441 fs_devices->missing_devices = 0;
2442 fs_devices->rotating = false;
2443 list_add(&seed_devices->seed_list, &fs_devices->seed_list);
2444
2445 generate_random_uuid(fs_devices->fsid);
2446 memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
2447 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2448 mutex_unlock(&fs_devices->device_list_mutex);
2449
2450 super_flags = btrfs_super_flags(disk_super) &
2451 ~BTRFS_SUPER_FLAG_SEEDING;
2452 btrfs_set_super_flags(disk_super, super_flags);
2453
2454 return 0;
2455}
2456
2457
2458
2459
2460static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2461{
2462 struct btrfs_fs_info *fs_info = trans->fs_info;
2463 struct btrfs_root *root = fs_info->chunk_root;
2464 struct btrfs_path *path;
2465 struct extent_buffer *leaf;
2466 struct btrfs_dev_item *dev_item;
2467 struct btrfs_device *device;
2468 struct btrfs_key key;
2469 u8 fs_uuid[BTRFS_FSID_SIZE];
2470 u8 dev_uuid[BTRFS_UUID_SIZE];
2471 u64 devid;
2472 int ret;
2473
2474 path = btrfs_alloc_path();
2475 if (!path)
2476 return -ENOMEM;
2477
2478 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2479 key.offset = 0;
2480 key.type = BTRFS_DEV_ITEM_KEY;
2481
2482 while (1) {
2483 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2484 if (ret < 0)
2485 goto error;
2486
2487 leaf = path->nodes[0];
2488next_slot:
2489 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2490 ret = btrfs_next_leaf(root, path);
2491 if (ret > 0)
2492 break;
2493 if (ret < 0)
2494 goto error;
2495 leaf = path->nodes[0];
2496 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2497 btrfs_release_path(path);
2498 continue;
2499 }
2500
2501 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2502 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2503 key.type != BTRFS_DEV_ITEM_KEY)
2504 break;
2505
2506 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2507 struct btrfs_dev_item);
2508 devid = btrfs_device_id(leaf, dev_item);
2509 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2510 BTRFS_UUID_SIZE);
2511 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2512 BTRFS_FSID_SIZE);
2513 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2514 fs_uuid);
2515 BUG_ON(!device);
2516
2517 if (device->fs_devices->seeding) {
2518 btrfs_set_device_generation(leaf, dev_item,
2519 device->generation);
2520 btrfs_mark_buffer_dirty(leaf);
2521 }
2522
2523 path->slots[0]++;
2524 goto next_slot;
2525 }
2526 ret = 0;
2527error:
2528 btrfs_free_path(path);
2529 return ret;
2530}
2531
2532int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2533{
2534 struct btrfs_root *root = fs_info->dev_root;
2535 struct request_queue *q;
2536 struct btrfs_trans_handle *trans;
2537 struct btrfs_device *device;
2538 struct block_device *bdev;
2539 struct super_block *sb = fs_info->sb;
2540 struct rcu_string *name;
2541 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2542 u64 orig_super_total_bytes;
2543 u64 orig_super_num_devices;
2544 int seeding_dev = 0;
2545 int ret = 0;
2546 bool locked = false;
2547
2548 if (sb_rdonly(sb) && !fs_devices->seeding)
2549 return -EROFS;
2550
2551 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2552 fs_info->bdev_holder);
2553 if (IS_ERR(bdev))
2554 return PTR_ERR(bdev);
2555
2556 if (!btrfs_check_device_zone_type(fs_info, bdev)) {
2557 ret = -EINVAL;
2558 goto error;
2559 }
2560
2561 if (fs_devices->seeding) {
2562 seeding_dev = 1;
2563 down_write(&sb->s_umount);
2564 mutex_lock(&uuid_mutex);
2565 locked = true;
2566 }
2567
2568 sync_blockdev(bdev);
2569
2570 rcu_read_lock();
2571 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2572 if (device->bdev == bdev) {
2573 ret = -EEXIST;
2574 rcu_read_unlock();
2575 goto error;
2576 }
2577 }
2578 rcu_read_unlock();
2579
2580 device = btrfs_alloc_device(fs_info, NULL, NULL);
2581 if (IS_ERR(device)) {
2582
2583 ret = PTR_ERR(device);
2584 goto error;
2585 }
2586
2587 name = rcu_string_strdup(device_path, GFP_KERNEL);
2588 if (!name) {
2589 ret = -ENOMEM;
2590 goto error_free_device;
2591 }
2592 rcu_assign_pointer(device->name, name);
2593
2594 device->fs_info = fs_info;
2595 device->bdev = bdev;
2596
2597 ret = btrfs_get_dev_zone_info(device);
2598 if (ret)
2599 goto error_free_device;
2600
2601 trans = btrfs_start_transaction(root, 0);
2602 if (IS_ERR(trans)) {
2603 ret = PTR_ERR(trans);
2604 goto error_free_zone;
2605 }
2606
2607 q = bdev_get_queue(bdev);
2608 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2609 device->generation = trans->transid;
2610 device->io_width = fs_info->sectorsize;
2611 device->io_align = fs_info->sectorsize;
2612 device->sector_size = fs_info->sectorsize;
2613 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2614 fs_info->sectorsize);
2615 device->disk_total_bytes = device->total_bytes;
2616 device->commit_total_bytes = device->total_bytes;
2617 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2618 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2619 device->mode = FMODE_EXCL;
2620 device->dev_stats_valid = 1;
2621 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2622
2623 if (seeding_dev) {
2624 btrfs_clear_sb_rdonly(sb);
2625 ret = btrfs_prepare_sprout(fs_info);
2626 if (ret) {
2627 btrfs_abort_transaction(trans, ret);
2628 goto error_trans;
2629 }
2630 }
2631
2632 device->fs_devices = fs_devices;
2633
2634 mutex_lock(&fs_devices->device_list_mutex);
2635 mutex_lock(&fs_info->chunk_mutex);
2636 list_add_rcu(&device->dev_list, &fs_devices->devices);
2637 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2638 fs_devices->num_devices++;
2639 fs_devices->open_devices++;
2640 fs_devices->rw_devices++;
2641 fs_devices->total_devices++;
2642 fs_devices->total_rw_bytes += device->total_bytes;
2643
2644 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2645
2646 if (!blk_queue_nonrot(q))
2647 fs_devices->rotating = true;
2648
2649 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2650 btrfs_set_super_total_bytes(fs_info->super_copy,
2651 round_down(orig_super_total_bytes + device->total_bytes,
2652 fs_info->sectorsize));
2653
2654 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2655 btrfs_set_super_num_devices(fs_info->super_copy,
2656 orig_super_num_devices + 1);
2657
2658
2659
2660
2661
2662 btrfs_clear_space_info_full(fs_info);
2663
2664 mutex_unlock(&fs_info->chunk_mutex);
2665
2666
2667 btrfs_sysfs_add_device(device);
2668
2669 mutex_unlock(&fs_devices->device_list_mutex);
2670
2671 if (seeding_dev) {
2672 mutex_lock(&fs_info->chunk_mutex);
2673 ret = init_first_rw_device(trans);
2674 mutex_unlock(&fs_info->chunk_mutex);
2675 if (ret) {
2676 btrfs_abort_transaction(trans, ret);
2677 goto error_sysfs;
2678 }
2679 }
2680
2681 ret = btrfs_add_dev_item(trans, device);
2682 if (ret) {
2683 btrfs_abort_transaction(trans, ret);
2684 goto error_sysfs;
2685 }
2686
2687 if (seeding_dev) {
2688 ret = btrfs_finish_sprout(trans);
2689 if (ret) {
2690 btrfs_abort_transaction(trans, ret);
2691 goto error_sysfs;
2692 }
2693
2694
2695
2696
2697
2698 btrfs_sysfs_update_sprout_fsid(fs_devices);
2699 }
2700
2701 ret = btrfs_commit_transaction(trans);
2702
2703 if (seeding_dev) {
2704 mutex_unlock(&uuid_mutex);
2705 up_write(&sb->s_umount);
2706 locked = false;
2707
2708 if (ret)
2709 return ret;
2710
2711 ret = btrfs_relocate_sys_chunks(fs_info);
2712 if (ret < 0)
2713 btrfs_handle_fs_error(fs_info, ret,
2714 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2715 trans = btrfs_attach_transaction(root);
2716 if (IS_ERR(trans)) {
2717 if (PTR_ERR(trans) == -ENOENT)
2718 return 0;
2719 ret = PTR_ERR(trans);
2720 trans = NULL;
2721 goto error_sysfs;
2722 }
2723 ret = btrfs_commit_transaction(trans);
2724 }
2725
2726
2727
2728
2729
2730
2731
2732
2733 btrfs_forget_devices(device_path);
2734
2735
2736 update_dev_time(bdev);
2737
2738 return ret;
2739
2740error_sysfs:
2741 btrfs_sysfs_remove_device(device);
2742 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2743 mutex_lock(&fs_info->chunk_mutex);
2744 list_del_rcu(&device->dev_list);
2745 list_del(&device->dev_alloc_list);
2746 fs_info->fs_devices->num_devices--;
2747 fs_info->fs_devices->open_devices--;
2748 fs_info->fs_devices->rw_devices--;
2749 fs_info->fs_devices->total_devices--;
2750 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2751 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2752 btrfs_set_super_total_bytes(fs_info->super_copy,
2753 orig_super_total_bytes);
2754 btrfs_set_super_num_devices(fs_info->super_copy,
2755 orig_super_num_devices);
2756 mutex_unlock(&fs_info->chunk_mutex);
2757 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2758error_trans:
2759 if (seeding_dev)
2760 btrfs_set_sb_rdonly(sb);
2761 if (trans)
2762 btrfs_end_transaction(trans);
2763error_free_zone:
2764 btrfs_destroy_dev_zone_info(device);
2765error_free_device:
2766 btrfs_free_device(device);
2767error:
2768 blkdev_put(bdev, FMODE_EXCL);
2769 if (locked) {
2770 mutex_unlock(&uuid_mutex);
2771 up_write(&sb->s_umount);
2772 }
2773 return ret;
2774}
2775
2776static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2777 struct btrfs_device *device)
2778{
2779 int ret;
2780 struct btrfs_path *path;
2781 struct btrfs_root *root = device->fs_info->chunk_root;
2782 struct btrfs_dev_item *dev_item;
2783 struct extent_buffer *leaf;
2784 struct btrfs_key key;
2785
2786 path = btrfs_alloc_path();
2787 if (!path)
2788 return -ENOMEM;
2789
2790 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2791 key.type = BTRFS_DEV_ITEM_KEY;
2792 key.offset = device->devid;
2793
2794 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2795 if (ret < 0)
2796 goto out;
2797
2798 if (ret > 0) {
2799 ret = -ENOENT;
2800 goto out;
2801 }
2802
2803 leaf = path->nodes[0];
2804 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2805
2806 btrfs_set_device_id(leaf, dev_item, device->devid);
2807 btrfs_set_device_type(leaf, dev_item, device->type);
2808 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2809 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2810 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2811 btrfs_set_device_total_bytes(leaf, dev_item,
2812 btrfs_device_get_disk_total_bytes(device));
2813 btrfs_set_device_bytes_used(leaf, dev_item,
2814 btrfs_device_get_bytes_used(device));
2815 btrfs_mark_buffer_dirty(leaf);
2816
2817out:
2818 btrfs_free_path(path);
2819 return ret;
2820}
2821
2822int btrfs_grow_device(struct btrfs_trans_handle *trans,
2823 struct btrfs_device *device, u64 new_size)
2824{
2825 struct btrfs_fs_info *fs_info = device->fs_info;
2826 struct btrfs_super_block *super_copy = fs_info->super_copy;
2827 u64 old_total;
2828 u64 diff;
2829
2830 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2831 return -EACCES;
2832
2833 new_size = round_down(new_size, fs_info->sectorsize);
2834
2835 mutex_lock(&fs_info->chunk_mutex);
2836 old_total = btrfs_super_total_bytes(super_copy);
2837 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2838
2839 if (new_size <= device->total_bytes ||
2840 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2841 mutex_unlock(&fs_info->chunk_mutex);
2842 return -EINVAL;
2843 }
2844
2845 btrfs_set_super_total_bytes(super_copy,
2846 round_down(old_total + diff, fs_info->sectorsize));
2847 device->fs_devices->total_rw_bytes += diff;
2848
2849 btrfs_device_set_total_bytes(device, new_size);
2850 btrfs_device_set_disk_total_bytes(device, new_size);
2851 btrfs_clear_space_info_full(device->fs_info);
2852 if (list_empty(&device->post_commit_list))
2853 list_add_tail(&device->post_commit_list,
2854 &trans->transaction->dev_update_list);
2855 mutex_unlock(&fs_info->chunk_mutex);
2856
2857 return btrfs_update_device(trans, device);
2858}
2859
2860static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2861{
2862 struct btrfs_fs_info *fs_info = trans->fs_info;
2863 struct btrfs_root *root = fs_info->chunk_root;
2864 int ret;
2865 struct btrfs_path *path;
2866 struct btrfs_key key;
2867
2868 path = btrfs_alloc_path();
2869 if (!path)
2870 return -ENOMEM;
2871
2872 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2873 key.offset = chunk_offset;
2874 key.type = BTRFS_CHUNK_ITEM_KEY;
2875
2876 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2877 if (ret < 0)
2878 goto out;
2879 else if (ret > 0) {
2880 btrfs_handle_fs_error(fs_info, -ENOENT,
2881 "Failed lookup while freeing chunk.");
2882 ret = -ENOENT;
2883 goto out;
2884 }
2885
2886 ret = btrfs_del_item(trans, root, path);
2887 if (ret < 0)
2888 btrfs_handle_fs_error(fs_info, ret,
2889 "Failed to delete chunk item.");
2890out:
2891 btrfs_free_path(path);
2892 return ret;
2893}
2894
2895static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2896{
2897 struct btrfs_super_block *super_copy = fs_info->super_copy;
2898 struct btrfs_disk_key *disk_key;
2899 struct btrfs_chunk *chunk;
2900 u8 *ptr;
2901 int ret = 0;
2902 u32 num_stripes;
2903 u32 array_size;
2904 u32 len = 0;
2905 u32 cur;
2906 struct btrfs_key key;
2907
2908 lockdep_assert_held(&fs_info->chunk_mutex);
2909 array_size = btrfs_super_sys_array_size(super_copy);
2910
2911 ptr = super_copy->sys_chunk_array;
2912 cur = 0;
2913
2914 while (cur < array_size) {
2915 disk_key = (struct btrfs_disk_key *)ptr;
2916 btrfs_disk_key_to_cpu(&key, disk_key);
2917
2918 len = sizeof(*disk_key);
2919
2920 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2921 chunk = (struct btrfs_chunk *)(ptr + len);
2922 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2923 len += btrfs_chunk_item_size(num_stripes);
2924 } else {
2925 ret = -EIO;
2926 break;
2927 }
2928 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2929 key.offset == chunk_offset) {
2930 memmove(ptr, ptr + len, array_size - (cur + len));
2931 array_size -= len;
2932 btrfs_set_super_sys_array_size(super_copy, array_size);
2933 } else {
2934 ptr += len;
2935 cur += len;
2936 }
2937 }
2938 return ret;
2939}
2940
2941
2942
2943
2944
2945
2946
2947
2948struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
2949 u64 logical, u64 length)
2950{
2951 struct extent_map_tree *em_tree;
2952 struct extent_map *em;
2953
2954 em_tree = &fs_info->mapping_tree;
2955 read_lock(&em_tree->lock);
2956 em = lookup_extent_mapping(em_tree, logical, length);
2957 read_unlock(&em_tree->lock);
2958
2959 if (!em) {
2960 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2961 logical, length);
2962 return ERR_PTR(-EINVAL);
2963 }
2964
2965 if (em->start > logical || em->start + em->len < logical) {
2966 btrfs_crit(fs_info,
2967 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
2968 logical, length, em->start, em->start + em->len);
2969 free_extent_map(em);
2970 return ERR_PTR(-EINVAL);
2971 }
2972
2973
2974 return em;
2975}
2976
2977static int remove_chunk_item(struct btrfs_trans_handle *trans,
2978 struct map_lookup *map, u64 chunk_offset)
2979{
2980 int i;
2981
2982
2983
2984
2985
2986
2987 lockdep_assert_held(&trans->fs_info->chunk_mutex);
2988
2989 for (i = 0; i < map->num_stripes; i++) {
2990 int ret;
2991
2992 ret = btrfs_update_device(trans, map->stripes[i].dev);
2993 if (ret)
2994 return ret;
2995 }
2996
2997 return btrfs_free_chunk(trans, chunk_offset);
2998}
2999
3000int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
3001{
3002 struct btrfs_fs_info *fs_info = trans->fs_info;
3003 struct extent_map *em;
3004 struct map_lookup *map;
3005 u64 dev_extent_len = 0;
3006 int i, ret = 0;
3007 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
3008
3009 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
3010 if (IS_ERR(em)) {
3011
3012
3013
3014
3015
3016 ASSERT(0);
3017 return PTR_ERR(em);
3018 }
3019 map = em->map_lookup;
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031 mutex_lock(&fs_devices->device_list_mutex);
3032 for (i = 0; i < map->num_stripes; i++) {
3033 struct btrfs_device *device = map->stripes[i].dev;
3034 ret = btrfs_free_dev_extent(trans, device,
3035 map->stripes[i].physical,
3036 &dev_extent_len);
3037 if (ret) {
3038 mutex_unlock(&fs_devices->device_list_mutex);
3039 btrfs_abort_transaction(trans, ret);
3040 goto out;
3041 }
3042
3043 if (device->bytes_used > 0) {
3044 mutex_lock(&fs_info->chunk_mutex);
3045 btrfs_device_set_bytes_used(device,
3046 device->bytes_used - dev_extent_len);
3047 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
3048 btrfs_clear_space_info_full(fs_info);
3049 mutex_unlock(&fs_info->chunk_mutex);
3050 }
3051 }
3052 mutex_unlock(&fs_devices->device_list_mutex);
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075 trans->removing_chunk = true;
3076 mutex_lock(&fs_info->chunk_mutex);
3077
3078 check_system_chunk(trans, map->type);
3079
3080 ret = remove_chunk_item(trans, map, chunk_offset);
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095 if (ret == -ENOSPC) {
3096 const u64 sys_flags = btrfs_system_alloc_profile(fs_info);
3097 struct btrfs_block_group *sys_bg;
3098
3099 sys_bg = btrfs_alloc_chunk(trans, sys_flags);
3100 if (IS_ERR(sys_bg)) {
3101 ret = PTR_ERR(sys_bg);
3102 btrfs_abort_transaction(trans, ret);
3103 goto out;
3104 }
3105
3106 ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
3107 if (ret) {
3108 btrfs_abort_transaction(trans, ret);
3109 goto out;
3110 }
3111
3112 ret = remove_chunk_item(trans, map, chunk_offset);
3113 if (ret) {
3114 btrfs_abort_transaction(trans, ret);
3115 goto out;
3116 }
3117 } else if (ret) {
3118 btrfs_abort_transaction(trans, ret);
3119 goto out;
3120 }
3121
3122 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
3123
3124 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3125 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
3126 if (ret) {
3127 btrfs_abort_transaction(trans, ret);
3128 goto out;
3129 }
3130 }
3131
3132 mutex_unlock(&fs_info->chunk_mutex);
3133 trans->removing_chunk = false;
3134
3135
3136
3137
3138
3139 btrfs_trans_release_chunk_metadata(trans);
3140
3141 ret = btrfs_remove_block_group(trans, chunk_offset, em);
3142 if (ret) {
3143 btrfs_abort_transaction(trans, ret);
3144 goto out;
3145 }
3146
3147out:
3148 if (trans->removing_chunk) {
3149 mutex_unlock(&fs_info->chunk_mutex);
3150 trans->removing_chunk = false;
3151 }
3152
3153 free_extent_map(em);
3154 return ret;
3155}
3156
3157int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3158{
3159 struct btrfs_root *root = fs_info->chunk_root;
3160 struct btrfs_trans_handle *trans;
3161 struct btrfs_block_group *block_group;
3162 u64 length;
3163 int ret;
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177 lockdep_assert_held(&fs_info->reclaim_bgs_lock);
3178
3179
3180 btrfs_scrub_pause(fs_info);
3181 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
3182 btrfs_scrub_continue(fs_info);
3183 if (ret)
3184 return ret;
3185
3186 block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
3187 if (!block_group)
3188 return -ENOENT;
3189 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
3190 length = block_group->length;
3191 btrfs_put_block_group(block_group);
3192
3193
3194
3195
3196
3197
3198
3199 if (btrfs_is_zoned(fs_info)) {
3200 ret = btrfs_discard_extent(fs_info, chunk_offset, length, NULL);
3201 if (ret)
3202 btrfs_info(fs_info,
3203 "failed to reset zone %llu after relocation",
3204 chunk_offset);
3205 }
3206
3207 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3208 chunk_offset);
3209 if (IS_ERR(trans)) {
3210 ret = PTR_ERR(trans);
3211 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3212 return ret;
3213 }
3214
3215
3216
3217
3218
3219 ret = btrfs_remove_chunk(trans, chunk_offset);
3220 btrfs_end_transaction(trans);
3221 return ret;
3222}
3223
3224static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3225{
3226 struct btrfs_root *chunk_root = fs_info->chunk_root;
3227 struct btrfs_path *path;
3228 struct extent_buffer *leaf;
3229 struct btrfs_chunk *chunk;
3230 struct btrfs_key key;
3231 struct btrfs_key found_key;
3232 u64 chunk_type;
3233 bool retried = false;
3234 int failed = 0;
3235 int ret;
3236
3237 path = btrfs_alloc_path();
3238 if (!path)
3239 return -ENOMEM;
3240
3241again:
3242 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3243 key.offset = (u64)-1;
3244 key.type = BTRFS_CHUNK_ITEM_KEY;
3245
3246 while (1) {
3247 mutex_lock(&fs_info->reclaim_bgs_lock);
3248 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3249 if (ret < 0) {
3250 mutex_unlock(&fs_info->reclaim_bgs_lock);
3251 goto error;
3252 }
3253 BUG_ON(ret == 0);
3254
3255 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3256 key.type);
3257 if (ret)
3258 mutex_unlock(&fs_info->reclaim_bgs_lock);
3259 if (ret < 0)
3260 goto error;
3261 if (ret > 0)
3262 break;
3263
3264 leaf = path->nodes[0];
3265 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3266
3267 chunk = btrfs_item_ptr(leaf, path->slots[0],
3268 struct btrfs_chunk);
3269 chunk_type = btrfs_chunk_type(leaf, chunk);
3270 btrfs_release_path(path);
3271
3272 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3273 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3274 if (ret == -ENOSPC)
3275 failed++;
3276 else
3277 BUG_ON(ret);
3278 }
3279 mutex_unlock(&fs_info->reclaim_bgs_lock);
3280
3281 if (found_key.offset == 0)
3282 break;
3283 key.offset = found_key.offset - 1;
3284 }
3285 ret = 0;
3286 if (failed && !retried) {
3287 failed = 0;
3288 retried = true;
3289 goto again;
3290 } else if (WARN_ON(failed && retried)) {
3291 ret = -ENOSPC;
3292 }
3293error:
3294 btrfs_free_path(path);
3295 return ret;
3296}
3297
3298
3299
3300
3301
3302
3303static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3304 u64 chunk_offset)
3305{
3306 struct btrfs_block_group *cache;
3307 u64 bytes_used;
3308 u64 chunk_type;
3309
3310 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3311 ASSERT(cache);
3312 chunk_type = cache->flags;
3313 btrfs_put_block_group(cache);
3314
3315 if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA))
3316 return 0;
3317
3318 spin_lock(&fs_info->data_sinfo->lock);
3319 bytes_used = fs_info->data_sinfo->bytes_used;
3320 spin_unlock(&fs_info->data_sinfo->lock);
3321
3322 if (!bytes_used) {
3323 struct btrfs_trans_handle *trans;
3324 int ret;
3325
3326 trans = btrfs_join_transaction(fs_info->tree_root);
3327 if (IS_ERR(trans))
3328 return PTR_ERR(trans);
3329
3330 ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA);
3331 btrfs_end_transaction(trans);
3332 if (ret < 0)
3333 return ret;
3334 return 1;
3335 }
3336
3337 return 0;
3338}
3339
3340static int insert_balance_item(struct btrfs_fs_info *fs_info,
3341 struct btrfs_balance_control *bctl)
3342{
3343 struct btrfs_root *root = fs_info->tree_root;
3344 struct btrfs_trans_handle *trans;
3345 struct btrfs_balance_item *item;
3346 struct btrfs_disk_balance_args disk_bargs;
3347 struct btrfs_path *path;
3348 struct extent_buffer *leaf;
3349 struct btrfs_key key;
3350 int ret, err;
3351
3352 path = btrfs_alloc_path();
3353 if (!path)
3354 return -ENOMEM;
3355
3356 trans = btrfs_start_transaction(root, 0);
3357 if (IS_ERR(trans)) {
3358 btrfs_free_path(path);
3359 return PTR_ERR(trans);
3360 }
3361
3362 key.objectid = BTRFS_BALANCE_OBJECTID;
3363 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3364 key.offset = 0;
3365
3366 ret = btrfs_insert_empty_item(trans, root, path, &key,
3367 sizeof(*item));
3368 if (ret)
3369 goto out;
3370
3371 leaf = path->nodes[0];
3372 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3373
3374 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3375
3376 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3377 btrfs_set_balance_data(leaf, item, &disk_bargs);
3378 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3379 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3380 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3381 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3382
3383 btrfs_set_balance_flags(leaf, item, bctl->flags);
3384
3385 btrfs_mark_buffer_dirty(leaf);
3386out:
3387 btrfs_free_path(path);
3388 err = btrfs_commit_transaction(trans);
3389 if (err && !ret)
3390 ret = err;
3391 return ret;
3392}
3393
3394static int del_balance_item(struct btrfs_fs_info *fs_info)
3395{
3396 struct btrfs_root *root = fs_info->tree_root;
3397 struct btrfs_trans_handle *trans;
3398 struct btrfs_path *path;
3399 struct btrfs_key key;
3400 int ret, err;
3401
3402 path = btrfs_alloc_path();
3403 if (!path)
3404 return -ENOMEM;
3405
3406 trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
3407 if (IS_ERR(trans)) {
3408 btrfs_free_path(path);
3409 return PTR_ERR(trans);
3410 }
3411
3412 key.objectid = BTRFS_BALANCE_OBJECTID;
3413 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3414 key.offset = 0;
3415
3416 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3417 if (ret < 0)
3418 goto out;
3419 if (ret > 0) {
3420 ret = -ENOENT;
3421 goto out;
3422 }
3423
3424 ret = btrfs_del_item(trans, root, path);
3425out:
3426 btrfs_free_path(path);
3427 err = btrfs_commit_transaction(trans);
3428 if (err && !ret)
3429 ret = err;
3430 return ret;
3431}
3432
3433
3434
3435
3436
3437static void update_balance_args(struct btrfs_balance_control *bctl)
3438{
3439
3440
3441
3442 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3443 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3444 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3445 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3446 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3447 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3448
3449
3450
3451
3452
3453
3454
3455
3456 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3457 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3458 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3459 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3460 bctl->data.usage = 90;
3461 }
3462 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3463 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3464 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3465 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3466 bctl->sys.usage = 90;
3467 }
3468 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3469 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3470 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3471 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3472 bctl->meta.usage = 90;
3473 }
3474}
3475
3476
3477
3478
3479static void reset_balance_state(struct btrfs_fs_info *fs_info)
3480{
3481 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3482 int ret;
3483
3484 BUG_ON(!fs_info->balance_ctl);
3485
3486 spin_lock(&fs_info->balance_lock);
3487 fs_info->balance_ctl = NULL;
3488 spin_unlock(&fs_info->balance_lock);
3489
3490 kfree(bctl);
3491 ret = del_balance_item(fs_info);
3492 if (ret)
3493 btrfs_handle_fs_error(fs_info, ret, NULL);
3494}
3495
3496
3497
3498
3499
3500static int chunk_profiles_filter(u64 chunk_type,
3501 struct btrfs_balance_args *bargs)
3502{
3503 chunk_type = chunk_to_extended(chunk_type) &
3504 BTRFS_EXTENDED_PROFILE_MASK;
3505
3506 if (bargs->profiles & chunk_type)
3507 return 0;
3508
3509 return 1;
3510}
3511
3512static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3513 struct btrfs_balance_args *bargs)
3514{
3515 struct btrfs_block_group *cache;
3516 u64 chunk_used;
3517 u64 user_thresh_min;
3518 u64 user_thresh_max;
3519 int ret = 1;
3520
3521 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3522 chunk_used = cache->used;
3523
3524 if (bargs->usage_min == 0)
3525 user_thresh_min = 0;
3526 else
3527 user_thresh_min = div_factor_fine(cache->length,
3528 bargs->usage_min);
3529
3530 if (bargs->usage_max == 0)
3531 user_thresh_max = 1;
3532 else if (bargs->usage_max > 100)
3533 user_thresh_max = cache->length;
3534 else
3535 user_thresh_max = div_factor_fine(cache->length,
3536 bargs->usage_max);
3537
3538 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3539 ret = 0;
3540
3541 btrfs_put_block_group(cache);
3542 return ret;
3543}
3544
3545static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3546 u64 chunk_offset, struct btrfs_balance_args *bargs)
3547{
3548 struct btrfs_block_group *cache;
3549 u64 chunk_used, user_thresh;
3550 int ret = 1;
3551
3552 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3553 chunk_used = cache->used;
3554
3555 if (bargs->usage_min == 0)
3556 user_thresh = 1;
3557 else if (bargs->usage > 100)
3558 user_thresh = cache->length;
3559 else
3560 user_thresh = div_factor_fine(cache->length, bargs->usage);
3561
3562 if (chunk_used < user_thresh)
3563 ret = 0;
3564
3565 btrfs_put_block_group(cache);
3566 return ret;
3567}
3568
3569static int chunk_devid_filter(struct extent_buffer *leaf,
3570 struct btrfs_chunk *chunk,
3571 struct btrfs_balance_args *bargs)
3572{
3573 struct btrfs_stripe *stripe;
3574 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3575 int i;
3576
3577 for (i = 0; i < num_stripes; i++) {
3578 stripe = btrfs_stripe_nr(chunk, i);
3579 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3580 return 0;
3581 }
3582
3583 return 1;
3584}
3585
3586static u64 calc_data_stripes(u64 type, int num_stripes)
3587{
3588 const int index = btrfs_bg_flags_to_raid_index(type);
3589 const int ncopies = btrfs_raid_array[index].ncopies;
3590 const int nparity = btrfs_raid_array[index].nparity;
3591
3592 return (num_stripes - nparity) / ncopies;
3593}
3594
3595
3596static int chunk_drange_filter(struct extent_buffer *leaf,
3597 struct btrfs_chunk *chunk,
3598 struct btrfs_balance_args *bargs)
3599{
3600 struct btrfs_stripe *stripe;
3601 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3602 u64 stripe_offset;
3603 u64 stripe_length;
3604 u64 type;
3605 int factor;
3606 int i;
3607
3608 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3609 return 0;
3610
3611 type = btrfs_chunk_type(leaf, chunk);
3612 factor = calc_data_stripes(type, num_stripes);
3613
3614 for (i = 0; i < num_stripes; i++) {
3615 stripe = btrfs_stripe_nr(chunk, i);
3616 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3617 continue;
3618
3619 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3620 stripe_length = btrfs_chunk_length(leaf, chunk);
3621 stripe_length = div_u64(stripe_length, factor);
3622
3623 if (stripe_offset < bargs->pend &&
3624 stripe_offset + stripe_length > bargs->pstart)
3625 return 0;
3626 }
3627
3628 return 1;
3629}
3630
3631
3632static int chunk_vrange_filter(struct extent_buffer *leaf,
3633 struct btrfs_chunk *chunk,
3634 u64 chunk_offset,
3635 struct btrfs_balance_args *bargs)
3636{
3637 if (chunk_offset < bargs->vend &&
3638 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3639
3640 return 0;
3641
3642 return 1;
3643}
3644
3645static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3646 struct btrfs_chunk *chunk,
3647 struct btrfs_balance_args *bargs)
3648{
3649 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3650
3651 if (bargs->stripes_min <= num_stripes
3652 && num_stripes <= bargs->stripes_max)
3653 return 0;
3654
3655 return 1;
3656}
3657
3658static int chunk_soft_convert_filter(u64 chunk_type,
3659 struct btrfs_balance_args *bargs)
3660{
3661 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3662 return 0;
3663
3664 chunk_type = chunk_to_extended(chunk_type) &
3665 BTRFS_EXTENDED_PROFILE_MASK;
3666
3667 if (bargs->target == chunk_type)
3668 return 1;
3669
3670 return 0;
3671}
3672
3673static int should_balance_chunk(struct extent_buffer *leaf,
3674 struct btrfs_chunk *chunk, u64 chunk_offset)
3675{
3676 struct btrfs_fs_info *fs_info = leaf->fs_info;
3677 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3678 struct btrfs_balance_args *bargs = NULL;
3679 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3680
3681
3682 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3683 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3684 return 0;
3685 }
3686
3687 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3688 bargs = &bctl->data;
3689 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3690 bargs = &bctl->sys;
3691 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3692 bargs = &bctl->meta;
3693
3694
3695 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3696 chunk_profiles_filter(chunk_type, bargs)) {
3697 return 0;
3698 }
3699
3700
3701 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3702 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3703 return 0;
3704 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3705 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3706 return 0;
3707 }
3708
3709
3710 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3711 chunk_devid_filter(leaf, chunk, bargs)) {
3712 return 0;
3713 }
3714
3715
3716 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3717 chunk_drange_filter(leaf, chunk, bargs)) {
3718 return 0;
3719 }
3720
3721
3722 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3723 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3724 return 0;
3725 }
3726
3727
3728 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3729 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3730 return 0;
3731 }
3732
3733
3734 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3735 chunk_soft_convert_filter(chunk_type, bargs)) {
3736 return 0;
3737 }
3738
3739
3740
3741
3742 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3743 if (bargs->limit == 0)
3744 return 0;
3745 else
3746 bargs->limit--;
3747 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3748
3749
3750
3751
3752
3753 if (bargs->limit_max == 0)
3754 return 0;
3755 else
3756 bargs->limit_max--;
3757 }
3758
3759 return 1;
3760}
3761
3762static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3763{
3764 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3765 struct btrfs_root *chunk_root = fs_info->chunk_root;
3766 u64 chunk_type;
3767 struct btrfs_chunk *chunk;
3768 struct btrfs_path *path = NULL;
3769 struct btrfs_key key;
3770 struct btrfs_key found_key;
3771 struct extent_buffer *leaf;
3772 int slot;
3773 int ret;
3774 int enospc_errors = 0;
3775 bool counting = true;
3776
3777 u64 limit_data = bctl->data.limit;
3778 u64 limit_meta = bctl->meta.limit;
3779 u64 limit_sys = bctl->sys.limit;
3780 u32 count_data = 0;
3781 u32 count_meta = 0;
3782 u32 count_sys = 0;
3783 int chunk_reserved = 0;
3784
3785 path = btrfs_alloc_path();
3786 if (!path) {
3787 ret = -ENOMEM;
3788 goto error;
3789 }
3790
3791
3792 spin_lock(&fs_info->balance_lock);
3793 memset(&bctl->stat, 0, sizeof(bctl->stat));
3794 spin_unlock(&fs_info->balance_lock);
3795again:
3796 if (!counting) {
3797
3798
3799
3800
3801 bctl->data.limit = limit_data;
3802 bctl->meta.limit = limit_meta;
3803 bctl->sys.limit = limit_sys;
3804 }
3805 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3806 key.offset = (u64)-1;
3807 key.type = BTRFS_CHUNK_ITEM_KEY;
3808
3809 while (1) {
3810 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3811 atomic_read(&fs_info->balance_cancel_req)) {
3812 ret = -ECANCELED;
3813 goto error;
3814 }
3815
3816 mutex_lock(&fs_info->reclaim_bgs_lock);
3817 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3818 if (ret < 0) {
3819 mutex_unlock(&fs_info->reclaim_bgs_lock);
3820 goto error;
3821 }
3822
3823
3824
3825
3826
3827 if (ret == 0)
3828 BUG();
3829
3830 ret = btrfs_previous_item(chunk_root, path, 0,
3831 BTRFS_CHUNK_ITEM_KEY);
3832 if (ret) {
3833 mutex_unlock(&fs_info->reclaim_bgs_lock);
3834 ret = 0;
3835 break;
3836 }
3837
3838 leaf = path->nodes[0];
3839 slot = path->slots[0];
3840 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3841
3842 if (found_key.objectid != key.objectid) {
3843 mutex_unlock(&fs_info->reclaim_bgs_lock);
3844 break;
3845 }
3846
3847 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3848 chunk_type = btrfs_chunk_type(leaf, chunk);
3849
3850 if (!counting) {
3851 spin_lock(&fs_info->balance_lock);
3852 bctl->stat.considered++;
3853 spin_unlock(&fs_info->balance_lock);
3854 }
3855
3856 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3857
3858 btrfs_release_path(path);
3859 if (!ret) {
3860 mutex_unlock(&fs_info->reclaim_bgs_lock);
3861 goto loop;
3862 }
3863
3864 if (counting) {
3865 mutex_unlock(&fs_info->reclaim_bgs_lock);
3866 spin_lock(&fs_info->balance_lock);
3867 bctl->stat.expected++;
3868 spin_unlock(&fs_info->balance_lock);
3869
3870 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3871 count_data++;
3872 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3873 count_sys++;
3874 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3875 count_meta++;
3876
3877 goto loop;
3878 }
3879
3880
3881
3882
3883
3884 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3885 count_data < bctl->data.limit_min)
3886 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3887 count_meta < bctl->meta.limit_min)
3888 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3889 count_sys < bctl->sys.limit_min)) {
3890 mutex_unlock(&fs_info->reclaim_bgs_lock);
3891 goto loop;
3892 }
3893
3894 if (!chunk_reserved) {
3895
3896
3897
3898
3899
3900
3901 ret = btrfs_may_alloc_data_chunk(fs_info,
3902 found_key.offset);
3903 if (ret < 0) {
3904 mutex_unlock(&fs_info->reclaim_bgs_lock);
3905 goto error;
3906 } else if (ret == 1) {
3907 chunk_reserved = 1;
3908 }
3909 }
3910
3911 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3912 mutex_unlock(&fs_info->reclaim_bgs_lock);
3913 if (ret == -ENOSPC) {
3914 enospc_errors++;
3915 } else if (ret == -ETXTBSY) {
3916 btrfs_info(fs_info,
3917 "skipping relocation of block group %llu due to active swapfile",
3918 found_key.offset);
3919 ret = 0;
3920 } else if (ret) {
3921 goto error;
3922 } else {
3923 spin_lock(&fs_info->balance_lock);
3924 bctl->stat.completed++;
3925 spin_unlock(&fs_info->balance_lock);
3926 }
3927loop:
3928 if (found_key.offset == 0)
3929 break;
3930 key.offset = found_key.offset - 1;
3931 }
3932
3933 if (counting) {
3934 btrfs_release_path(path);
3935 counting = false;
3936 goto again;
3937 }
3938error:
3939 btrfs_free_path(path);
3940 if (enospc_errors) {
3941 btrfs_info(fs_info, "%d enospc errors during balance",
3942 enospc_errors);
3943 if (!ret)
3944 ret = -ENOSPC;
3945 }
3946
3947 return ret;
3948}
3949
3950
3951
3952
3953
3954
3955static int alloc_profile_is_valid(u64 flags, int extended)
3956{
3957 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3958 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3959
3960 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3961
3962
3963 if (flags & ~mask)
3964 return 0;
3965
3966
3967 if (flags == 0)
3968 return !extended;
3969
3970 return has_single_bit_set(flags);
3971}
3972
3973static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3974{
3975
3976 return atomic_read(&fs_info->balance_cancel_req) ||
3977 (atomic_read(&fs_info->balance_pause_req) == 0 &&
3978 atomic_read(&fs_info->balance_cancel_req) == 0);
3979}
3980
3981
3982
3983
3984
3985static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
3986 const struct btrfs_balance_args *bargs,
3987 u64 allowed, const char *type)
3988{
3989 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3990 return true;
3991
3992 if (fs_info->sectorsize < PAGE_SIZE &&
3993 bargs->target & BTRFS_BLOCK_GROUP_RAID56_MASK) {
3994 btrfs_err(fs_info,
3995 "RAID56 is not yet supported for sectorsize %u with page size %lu",
3996 fs_info->sectorsize, PAGE_SIZE);
3997 return false;
3998 }
3999
4000 if (alloc_profile_is_valid(bargs->target, 1) &&
4001 (bargs->target & ~allowed) == 0)
4002 return true;
4003
4004 btrfs_err(fs_info, "balance: invalid convert %s profile %s",
4005 type, btrfs_bg_type_to_raid_name(bargs->target));
4006 return false;
4007}
4008
4009
4010
4011
4012
4013
4014static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
4015 u32 size_buf)
4016{
4017 int ret;
4018 u32 size_bp = size_buf;
4019 char *bp = buf;
4020 u64 flags = bargs->flags;
4021 char tmp_buf[128] = {'\0'};
4022
4023 if (!flags)
4024 return;
4025
4026#define CHECK_APPEND_NOARG(a) \
4027 do { \
4028 ret = snprintf(bp, size_bp, (a)); \
4029 if (ret < 0 || ret >= size_bp) \
4030 goto out_overflow; \
4031 size_bp -= ret; \
4032 bp += ret; \
4033 } while (0)
4034
4035#define CHECK_APPEND_1ARG(a, v1) \
4036 do { \
4037 ret = snprintf(bp, size_bp, (a), (v1)); \
4038 if (ret < 0 || ret >= size_bp) \
4039 goto out_overflow; \
4040 size_bp -= ret; \
4041 bp += ret; \
4042 } while (0)
4043
4044#define CHECK_APPEND_2ARG(a, v1, v2) \
4045 do { \
4046 ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
4047 if (ret < 0 || ret >= size_bp) \
4048 goto out_overflow; \
4049 size_bp -= ret; \
4050 bp += ret; \
4051 } while (0)
4052
4053 if (flags & BTRFS_BALANCE_ARGS_CONVERT)
4054 CHECK_APPEND_1ARG("convert=%s,",
4055 btrfs_bg_type_to_raid_name(bargs->target));
4056
4057 if (flags & BTRFS_BALANCE_ARGS_SOFT)
4058 CHECK_APPEND_NOARG("soft,");
4059
4060 if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
4061 btrfs_describe_block_groups(bargs->profiles, tmp_buf,
4062 sizeof(tmp_buf));
4063 CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
4064 }
4065
4066 if (flags & BTRFS_BALANCE_ARGS_USAGE)
4067 CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
4068
4069 if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
4070 CHECK_APPEND_2ARG("usage=%u..%u,",
4071 bargs->usage_min, bargs->usage_max);
4072
4073 if (flags & BTRFS_BALANCE_ARGS_DEVID)
4074 CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
4075
4076 if (flags & BTRFS_BALANCE_ARGS_DRANGE)
4077 CHECK_APPEND_2ARG("drange=%llu..%llu,",
4078 bargs->pstart, bargs->pend);
4079
4080 if (flags & BTRFS_BALANCE_ARGS_VRANGE)
4081 CHECK_APPEND_2ARG("vrange=%llu..%llu,",
4082 bargs->vstart, bargs->vend);
4083
4084 if (flags & BTRFS_BALANCE_ARGS_LIMIT)
4085 CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
4086
4087 if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
4088 CHECK_APPEND_2ARG("limit=%u..%u,",
4089 bargs->limit_min, bargs->limit_max);
4090
4091 if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
4092 CHECK_APPEND_2ARG("stripes=%u..%u,",
4093 bargs->stripes_min, bargs->stripes_max);
4094
4095#undef CHECK_APPEND_2ARG
4096#undef CHECK_APPEND_1ARG
4097#undef CHECK_APPEND_NOARG
4098
4099out_overflow:
4100
4101 if (size_bp < size_buf)
4102 buf[size_buf - size_bp - 1] = '\0';
4103 else
4104 buf[0] = '\0';
4105}
4106
4107static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
4108{
4109 u32 size_buf = 1024;
4110 char tmp_buf[192] = {'\0'};
4111 char *buf;
4112 char *bp;
4113 u32 size_bp = size_buf;
4114 int ret;
4115 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4116
4117 buf = kzalloc(size_buf, GFP_KERNEL);
4118 if (!buf)
4119 return;
4120
4121 bp = buf;
4122
4123#define CHECK_APPEND_1ARG(a, v1) \
4124 do { \
4125 ret = snprintf(bp, size_bp, (a), (v1)); \
4126 if (ret < 0 || ret >= size_bp) \
4127 goto out_overflow; \
4128 size_bp -= ret; \
4129 bp += ret; \
4130 } while (0)
4131
4132 if (bctl->flags & BTRFS_BALANCE_FORCE)
4133 CHECK_APPEND_1ARG("%s", "-f ");
4134
4135 if (bctl->flags & BTRFS_BALANCE_DATA) {
4136 describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
4137 CHECK_APPEND_1ARG("-d%s ", tmp_buf);
4138 }
4139
4140 if (bctl->flags & BTRFS_BALANCE_METADATA) {
4141 describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
4142 CHECK_APPEND_1ARG("-m%s ", tmp_buf);
4143 }
4144
4145 if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
4146 describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
4147 CHECK_APPEND_1ARG("-s%s ", tmp_buf);
4148 }
4149
4150#undef CHECK_APPEND_1ARG
4151
4152out_overflow:
4153
4154 if (size_bp < size_buf)
4155 buf[size_buf - size_bp - 1] = '\0';
4156 btrfs_info(fs_info, "balance: %s %s",
4157 (bctl->flags & BTRFS_BALANCE_RESUME) ?
4158 "resume" : "start", buf);
4159
4160 kfree(buf);
4161}
4162
4163
4164
4165
4166int btrfs_balance(struct btrfs_fs_info *fs_info,
4167 struct btrfs_balance_control *bctl,
4168 struct btrfs_ioctl_balance_args *bargs)
4169{
4170 u64 meta_target, data_target;
4171 u64 allowed;
4172 int mixed = 0;
4173 int ret;
4174 u64 num_devices;
4175 unsigned seq;
4176 bool reducing_redundancy;
4177 int i;
4178
4179 if (btrfs_fs_closing(fs_info) ||
4180 atomic_read(&fs_info->balance_pause_req) ||
4181 btrfs_should_cancel_balance(fs_info)) {
4182 ret = -EINVAL;
4183 goto out;
4184 }
4185
4186 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
4187 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
4188 mixed = 1;
4189
4190
4191
4192
4193
4194 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
4195 if (mixed && (bctl->flags & allowed)) {
4196 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
4197 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
4198 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
4199 btrfs_err(fs_info,
4200 "balance: mixed groups data and metadata options must be the same");
4201 ret = -EINVAL;
4202 goto out;
4203 }
4204 }
4205
4206
4207
4208
4209
4210 num_devices = fs_info->fs_devices->rw_devices;
4211
4212
4213
4214
4215
4216
4217 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
4218 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
4219 if (num_devices >= btrfs_raid_array[i].devs_min)
4220 allowed |= btrfs_raid_array[i].bg_flag;
4221
4222 if (!validate_convert_profile(fs_info, &bctl->data, allowed, "data") ||
4223 !validate_convert_profile(fs_info, &bctl->meta, allowed, "metadata") ||
4224 !validate_convert_profile(fs_info, &bctl->sys, allowed, "system")) {
4225 ret = -EINVAL;
4226 goto out;
4227 }
4228
4229
4230
4231
4232
4233 allowed = 0;
4234 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
4235 if (btrfs_raid_array[i].ncopies >= 2 ||
4236 btrfs_raid_array[i].tolerated_failures >= 1)
4237 allowed |= btrfs_raid_array[i].bg_flag;
4238 }
4239 do {
4240 seq = read_seqbegin(&fs_info->profiles_lock);
4241
4242 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4243 (fs_info->avail_system_alloc_bits & allowed) &&
4244 !(bctl->sys.target & allowed)) ||
4245 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4246 (fs_info->avail_metadata_alloc_bits & allowed) &&
4247 !(bctl->meta.target & allowed)))
4248 reducing_redundancy = true;
4249 else
4250 reducing_redundancy = false;
4251
4252
4253 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4254 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
4255 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4256 bctl->data.target : fs_info->avail_data_alloc_bits;
4257 } while (read_seqretry(&fs_info->profiles_lock, seq));
4258
4259 if (reducing_redundancy) {
4260 if (bctl->flags & BTRFS_BALANCE_FORCE) {
4261 btrfs_info(fs_info,
4262 "balance: force reducing metadata redundancy");
4263 } else {
4264 btrfs_err(fs_info,
4265 "balance: reduces metadata redundancy, use --force if you want this");
4266 ret = -EINVAL;
4267 goto out;
4268 }
4269 }
4270
4271 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
4272 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
4273 btrfs_warn(fs_info,
4274 "balance: metadata profile %s has lower redundancy than data profile %s",
4275 btrfs_bg_type_to_raid_name(meta_target),
4276 btrfs_bg_type_to_raid_name(data_target));
4277 }
4278
4279 ret = insert_balance_item(fs_info, bctl);
4280 if (ret && ret != -EEXIST)
4281 goto out;
4282
4283 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
4284 BUG_ON(ret == -EEXIST);
4285 BUG_ON(fs_info->balance_ctl);
4286 spin_lock(&fs_info->balance_lock);
4287 fs_info->balance_ctl = bctl;
4288 spin_unlock(&fs_info->balance_lock);
4289 } else {
4290 BUG_ON(ret != -EEXIST);
4291 spin_lock(&fs_info->balance_lock);
4292 update_balance_args(bctl);
4293 spin_unlock(&fs_info->balance_lock);
4294 }
4295
4296 ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4297 set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4298 describe_balance_start_or_resume(fs_info);
4299 mutex_unlock(&fs_info->balance_mutex);
4300
4301 ret = __btrfs_balance(fs_info);
4302
4303 mutex_lock(&fs_info->balance_mutex);
4304 if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
4305 btrfs_info(fs_info, "balance: paused");
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315
4316
4317
4318
4319
4320
4321 else if (ret == -ECANCELED || ret == -EINTR)
4322 btrfs_info(fs_info, "balance: canceled");
4323 else
4324 btrfs_info(fs_info, "balance: ended with status: %d", ret);
4325
4326 clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4327
4328 if (bargs) {
4329 memset(bargs, 0, sizeof(*bargs));
4330 btrfs_update_ioctl_balance_args(fs_info, bargs);
4331 }
4332
4333 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4334 balance_need_close(fs_info)) {
4335 reset_balance_state(fs_info);
4336 btrfs_exclop_finish(fs_info);
4337 }
4338
4339 wake_up(&fs_info->balance_wait_q);
4340
4341 return ret;
4342out:
4343 if (bctl->flags & BTRFS_BALANCE_RESUME)
4344 reset_balance_state(fs_info);
4345 else
4346 kfree(bctl);
4347 btrfs_exclop_finish(fs_info);
4348
4349 return ret;
4350}
4351
4352static int balance_kthread(void *data)
4353{
4354 struct btrfs_fs_info *fs_info = data;
4355 int ret = 0;
4356
4357 mutex_lock(&fs_info->balance_mutex);
4358 if (fs_info->balance_ctl)
4359 ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
4360 mutex_unlock(&fs_info->balance_mutex);
4361
4362 return ret;
4363}
4364
4365int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4366{
4367 struct task_struct *tsk;
4368
4369 mutex_lock(&fs_info->balance_mutex);
4370 if (!fs_info->balance_ctl) {
4371 mutex_unlock(&fs_info->balance_mutex);
4372 return 0;
4373 }
4374 mutex_unlock(&fs_info->balance_mutex);
4375
4376 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4377 btrfs_info(fs_info, "balance: resume skipped");
4378 return 0;
4379 }
4380
4381
4382
4383
4384
4385
4386 spin_lock(&fs_info->balance_lock);
4387 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4388 spin_unlock(&fs_info->balance_lock);
4389
4390 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4391 return PTR_ERR_OR_ZERO(tsk);
4392}
4393
4394int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4395{
4396 struct btrfs_balance_control *bctl;
4397 struct btrfs_balance_item *item;
4398 struct btrfs_disk_balance_args disk_bargs;
4399 struct btrfs_path *path;
4400 struct extent_buffer *leaf;
4401 struct btrfs_key key;
4402 int ret;
4403
4404 path = btrfs_alloc_path();
4405 if (!path)
4406 return -ENOMEM;
4407
4408 key.objectid = BTRFS_BALANCE_OBJECTID;
4409 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4410 key.offset = 0;
4411
4412 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4413 if (ret < 0)
4414 goto out;
4415 if (ret > 0) {
4416 ret = 0;
4417 goto out;
4418 }
4419
4420 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4421 if (!bctl) {
4422 ret = -ENOMEM;
4423 goto out;
4424 }
4425
4426 leaf = path->nodes[0];
4427 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4428
4429 bctl->flags = btrfs_balance_flags(leaf, item);
4430 bctl->flags |= BTRFS_BALANCE_RESUME;
4431
4432 btrfs_balance_data(leaf, item, &disk_bargs);
4433 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4434 btrfs_balance_meta(leaf, item, &disk_bargs);
4435 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4436 btrfs_balance_sys(leaf, item, &disk_bargs);
4437 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
4450 btrfs_warn(fs_info,
4451 "balance: cannot set exclusive op status, resume manually");
4452
4453 btrfs_release_path(path);
4454
4455 mutex_lock(&fs_info->balance_mutex);
4456 BUG_ON(fs_info->balance_ctl);
4457 spin_lock(&fs_info->balance_lock);
4458 fs_info->balance_ctl = bctl;
4459 spin_unlock(&fs_info->balance_lock);
4460 mutex_unlock(&fs_info->balance_mutex);
4461out:
4462 btrfs_free_path(path);
4463 return ret;
4464}
4465
4466int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4467{
4468 int ret = 0;
4469
4470 mutex_lock(&fs_info->balance_mutex);
4471 if (!fs_info->balance_ctl) {
4472 mutex_unlock(&fs_info->balance_mutex);
4473 return -ENOTCONN;
4474 }
4475
4476 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4477 atomic_inc(&fs_info->balance_pause_req);
4478 mutex_unlock(&fs_info->balance_mutex);
4479
4480 wait_event(fs_info->balance_wait_q,
4481 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4482
4483 mutex_lock(&fs_info->balance_mutex);
4484
4485 BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4486 atomic_dec(&fs_info->balance_pause_req);
4487 } else {
4488 ret = -ENOTCONN;
4489 }
4490
4491 mutex_unlock(&fs_info->balance_mutex);
4492 return ret;
4493}
4494
4495int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4496{
4497 mutex_lock(&fs_info->balance_mutex);
4498 if (!fs_info->balance_ctl) {
4499 mutex_unlock(&fs_info->balance_mutex);
4500 return -ENOTCONN;
4501 }
4502
4503
4504
4505
4506
4507
4508 if (sb_rdonly(fs_info->sb)) {
4509 mutex_unlock(&fs_info->balance_mutex);
4510 return -EROFS;
4511 }
4512
4513 atomic_inc(&fs_info->balance_cancel_req);
4514
4515
4516
4517
4518 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4519 mutex_unlock(&fs_info->balance_mutex);
4520 wait_event(fs_info->balance_wait_q,
4521 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4522 mutex_lock(&fs_info->balance_mutex);
4523 } else {
4524 mutex_unlock(&fs_info->balance_mutex);
4525
4526
4527
4528
4529 mutex_lock(&fs_info->balance_mutex);
4530
4531 if (fs_info->balance_ctl) {
4532 reset_balance_state(fs_info);
4533 btrfs_exclop_finish(fs_info);
4534 btrfs_info(fs_info, "balance: canceled");
4535 }
4536 }
4537
4538 BUG_ON(fs_info->balance_ctl ||
4539 test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4540 atomic_dec(&fs_info->balance_cancel_req);
4541 mutex_unlock(&fs_info->balance_mutex);
4542 return 0;
4543}
4544
4545int btrfs_uuid_scan_kthread(void *data)
4546{
4547 struct btrfs_fs_info *fs_info = data;
4548 struct btrfs_root *root = fs_info->tree_root;
4549 struct btrfs_key key;
4550 struct btrfs_path *path = NULL;
4551 int ret = 0;
4552 struct extent_buffer *eb;
4553 int slot;
4554 struct btrfs_root_item root_item;
4555 u32 item_size;
4556 struct btrfs_trans_handle *trans = NULL;
4557 bool closing = false;
4558
4559 path = btrfs_alloc_path();
4560 if (!path) {
4561 ret = -ENOMEM;
4562 goto out;
4563 }
4564
4565 key.objectid = 0;
4566 key.type = BTRFS_ROOT_ITEM_KEY;
4567 key.offset = 0;
4568
4569 while (1) {
4570 if (btrfs_fs_closing(fs_info)) {
4571 closing = true;
4572 break;
4573 }
4574 ret = btrfs_search_forward(root, &key, path,
4575 BTRFS_OLDEST_GENERATION);
4576 if (ret) {
4577 if (ret > 0)
4578 ret = 0;
4579 break;
4580 }
4581
4582 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4583 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4584 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4585 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4586 goto skip;
4587
4588 eb = path->nodes[0];
4589 slot = path->slots[0];
4590 item_size = btrfs_item_size_nr(eb, slot);
4591 if (item_size < sizeof(root_item))
4592 goto skip;
4593
4594 read_extent_buffer(eb, &root_item,
4595 btrfs_item_ptr_offset(eb, slot),
4596 (int)sizeof(root_item));
4597 if (btrfs_root_refs(&root_item) == 0)
4598 goto skip;
4599
4600 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4601 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4602 if (trans)
4603 goto update_tree;
4604
4605 btrfs_release_path(path);
4606
4607
4608
4609
4610 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4611 if (IS_ERR(trans)) {
4612 ret = PTR_ERR(trans);
4613 break;
4614 }
4615 continue;
4616 } else {
4617 goto skip;
4618 }
4619update_tree:
4620 btrfs_release_path(path);
4621 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4622 ret = btrfs_uuid_tree_add(trans, root_item.uuid,
4623 BTRFS_UUID_KEY_SUBVOL,
4624 key.objectid);
4625 if (ret < 0) {
4626 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4627 ret);
4628 break;
4629 }
4630 }
4631
4632 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4633 ret = btrfs_uuid_tree_add(trans,
4634 root_item.received_uuid,
4635 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4636 key.objectid);
4637 if (ret < 0) {
4638 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4639 ret);
4640 break;
4641 }
4642 }
4643
4644skip:
4645 btrfs_release_path(path);
4646 if (trans) {
4647 ret = btrfs_end_transaction(trans);
4648 trans = NULL;
4649 if (ret)
4650 break;
4651 }
4652
4653 if (key.offset < (u64)-1) {
4654 key.offset++;
4655 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4656 key.offset = 0;
4657 key.type = BTRFS_ROOT_ITEM_KEY;
4658 } else if (key.objectid < (u64)-1) {
4659 key.offset = 0;
4660 key.type = BTRFS_ROOT_ITEM_KEY;
4661 key.objectid++;
4662 } else {
4663 break;
4664 }
4665 cond_resched();
4666 }
4667
4668out:
4669 btrfs_free_path(path);
4670 if (trans && !IS_ERR(trans))
4671 btrfs_end_transaction(trans);
4672 if (ret)
4673 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4674 else if (!closing)
4675 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4676 up(&fs_info->uuid_tree_rescan_sem);
4677 return 0;
4678}
4679
4680int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4681{
4682 struct btrfs_trans_handle *trans;
4683 struct btrfs_root *tree_root = fs_info->tree_root;
4684 struct btrfs_root *uuid_root;
4685 struct task_struct *task;
4686 int ret;
4687
4688
4689
4690
4691
4692 trans = btrfs_start_transaction(tree_root, 2);
4693 if (IS_ERR(trans))
4694 return PTR_ERR(trans);
4695
4696 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4697 if (IS_ERR(uuid_root)) {
4698 ret = PTR_ERR(uuid_root);
4699 btrfs_abort_transaction(trans, ret);
4700 btrfs_end_transaction(trans);
4701 return ret;
4702 }
4703
4704 fs_info->uuid_root = uuid_root;
4705
4706 ret = btrfs_commit_transaction(trans);
4707 if (ret)
4708 return ret;
4709
4710 down(&fs_info->uuid_tree_rescan_sem);
4711 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4712 if (IS_ERR(task)) {
4713
4714 btrfs_warn(fs_info, "failed to start uuid_scan task");
4715 up(&fs_info->uuid_tree_rescan_sem);
4716 return PTR_ERR(task);
4717 }
4718
4719 return 0;
4720}
4721
4722
4723
4724
4725
4726
4727int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4728{
4729 struct btrfs_fs_info *fs_info = device->fs_info;
4730 struct btrfs_root *root = fs_info->dev_root;
4731 struct btrfs_trans_handle *trans;
4732 struct btrfs_dev_extent *dev_extent = NULL;
4733 struct btrfs_path *path;
4734 u64 length;
4735 u64 chunk_offset;
4736 int ret;
4737 int slot;
4738 int failed = 0;
4739 bool retried = false;
4740 struct extent_buffer *l;
4741 struct btrfs_key key;
4742 struct btrfs_super_block *super_copy = fs_info->super_copy;
4743 u64 old_total = btrfs_super_total_bytes(super_copy);
4744 u64 old_size = btrfs_device_get_total_bytes(device);
4745 u64 diff;
4746 u64 start;
4747
4748 new_size = round_down(new_size, fs_info->sectorsize);
4749 start = new_size;
4750 diff = round_down(old_size - new_size, fs_info->sectorsize);
4751
4752 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4753 return -EINVAL;
4754
4755 path = btrfs_alloc_path();
4756 if (!path)
4757 return -ENOMEM;
4758
4759 path->reada = READA_BACK;
4760
4761 trans = btrfs_start_transaction(root, 0);
4762 if (IS_ERR(trans)) {
4763 btrfs_free_path(path);
4764 return PTR_ERR(trans);
4765 }
4766
4767 mutex_lock(&fs_info->chunk_mutex);
4768
4769 btrfs_device_set_total_bytes(device, new_size);
4770 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4771 device->fs_devices->total_rw_bytes -= diff;
4772 atomic64_sub(diff, &fs_info->free_chunk_space);
4773 }
4774
4775
4776
4777
4778
4779
4780 if (contains_pending_extent(device, &start, diff)) {
4781 mutex_unlock(&fs_info->chunk_mutex);
4782 ret = btrfs_commit_transaction(trans);
4783 if (ret)
4784 goto done;
4785 } else {
4786 mutex_unlock(&fs_info->chunk_mutex);
4787 btrfs_end_transaction(trans);
4788 }
4789
4790again:
4791 key.objectid = device->devid;
4792 key.offset = (u64)-1;
4793 key.type = BTRFS_DEV_EXTENT_KEY;
4794
4795 do {
4796 mutex_lock(&fs_info->reclaim_bgs_lock);
4797 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4798 if (ret < 0) {
4799 mutex_unlock(&fs_info->reclaim_bgs_lock);
4800 goto done;
4801 }
4802
4803 ret = btrfs_previous_item(root, path, 0, key.type);
4804 if (ret) {
4805 mutex_unlock(&fs_info->reclaim_bgs_lock);
4806 if (ret < 0)
4807 goto done;
4808 ret = 0;
4809 btrfs_release_path(path);
4810 break;
4811 }
4812
4813 l = path->nodes[0];
4814 slot = path->slots[0];
4815 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4816
4817 if (key.objectid != device->devid) {
4818 mutex_unlock(&fs_info->reclaim_bgs_lock);
4819 btrfs_release_path(path);
4820 break;
4821 }
4822
4823 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4824 length = btrfs_dev_extent_length(l, dev_extent);
4825
4826 if (key.offset + length <= new_size) {
4827 mutex_unlock(&fs_info->reclaim_bgs_lock);
4828 btrfs_release_path(path);
4829 break;
4830 }
4831
4832 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4833 btrfs_release_path(path);
4834
4835
4836
4837
4838
4839
4840
4841 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4842 if (ret < 0) {
4843 mutex_unlock(&fs_info->reclaim_bgs_lock);
4844 goto done;
4845 }
4846
4847 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4848 mutex_unlock(&fs_info->reclaim_bgs_lock);
4849 if (ret == -ENOSPC) {
4850 failed++;
4851 } else if (ret) {
4852 if (ret == -ETXTBSY) {
4853 btrfs_warn(fs_info,
4854 "could not shrink block group %llu due to active swapfile",
4855 chunk_offset);
4856 }
4857 goto done;
4858 }
4859 } while (key.offset-- > 0);
4860
4861 if (failed && !retried) {
4862 failed = 0;
4863 retried = true;
4864 goto again;
4865 } else if (failed && retried) {
4866 ret = -ENOSPC;
4867 goto done;
4868 }
4869
4870
4871 trans = btrfs_start_transaction(root, 0);
4872 if (IS_ERR(trans)) {
4873 ret = PTR_ERR(trans);
4874 goto done;
4875 }
4876
4877 mutex_lock(&fs_info->chunk_mutex);
4878
4879 clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
4880 CHUNK_STATE_MASK);
4881
4882 btrfs_device_set_disk_total_bytes(device, new_size);
4883 if (list_empty(&device->post_commit_list))
4884 list_add_tail(&device->post_commit_list,
4885 &trans->transaction->dev_update_list);
4886
4887 WARN_ON(diff > old_total);
4888 btrfs_set_super_total_bytes(super_copy,
4889 round_down(old_total - diff, fs_info->sectorsize));
4890 mutex_unlock(&fs_info->chunk_mutex);
4891
4892
4893 ret = btrfs_update_device(trans, device);
4894 if (ret < 0) {
4895 btrfs_abort_transaction(trans, ret);
4896 btrfs_end_transaction(trans);
4897 } else {
4898 ret = btrfs_commit_transaction(trans);
4899 }
4900done:
4901 btrfs_free_path(path);
4902 if (ret) {
4903 mutex_lock(&fs_info->chunk_mutex);
4904 btrfs_device_set_total_bytes(device, old_size);
4905 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4906 device->fs_devices->total_rw_bytes += diff;
4907 atomic64_add(diff, &fs_info->free_chunk_space);
4908 mutex_unlock(&fs_info->chunk_mutex);
4909 }
4910 return ret;
4911}
4912
4913static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4914 struct btrfs_key *key,
4915 struct btrfs_chunk *chunk, int item_size)
4916{
4917 struct btrfs_super_block *super_copy = fs_info->super_copy;
4918 struct btrfs_disk_key disk_key;
4919 u32 array_size;
4920 u8 *ptr;
4921
4922 lockdep_assert_held(&fs_info->chunk_mutex);
4923
4924 array_size = btrfs_super_sys_array_size(super_copy);
4925 if (array_size + item_size + sizeof(disk_key)
4926 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
4927 return -EFBIG;
4928
4929 ptr = super_copy->sys_chunk_array + array_size;
4930 btrfs_cpu_key_to_disk(&disk_key, key);
4931 memcpy(ptr, &disk_key, sizeof(disk_key));
4932 ptr += sizeof(disk_key);
4933 memcpy(ptr, chunk, item_size);
4934 item_size += sizeof(disk_key);
4935 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4936
4937 return 0;
4938}
4939
4940
4941
4942
4943static int btrfs_cmp_device_info(const void *a, const void *b)
4944{
4945 const struct btrfs_device_info *di_a = a;
4946 const struct btrfs_device_info *di_b = b;
4947
4948 if (di_a->max_avail > di_b->max_avail)
4949 return -1;
4950 if (di_a->max_avail < di_b->max_avail)
4951 return 1;
4952 if (di_a->total_avail > di_b->total_avail)
4953 return -1;
4954 if (di_a->total_avail < di_b->total_avail)
4955 return 1;
4956 return 0;
4957}
4958
4959static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4960{
4961 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4962 return;
4963
4964 btrfs_set_fs_incompat(info, RAID56);
4965}
4966
4967static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
4968{
4969 if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)))
4970 return;
4971
4972 btrfs_set_fs_incompat(info, RAID1C34);
4973}
4974
4975
4976
4977
4978
4979struct alloc_chunk_ctl {
4980 u64 start;
4981 u64 type;
4982
4983 int num_stripes;
4984
4985 int sub_stripes;
4986
4987 int dev_stripes;
4988
4989 int devs_max;
4990
4991 int devs_min;
4992
4993 int devs_increment;
4994
4995 int ncopies;
4996
4997 int nparity;
4998 u64 max_stripe_size;
4999 u64 max_chunk_size;
5000 u64 dev_extent_min;
5001 u64 stripe_size;
5002 u64 chunk_size;
5003 int ndevs;
5004};
5005
5006static void init_alloc_chunk_ctl_policy_regular(
5007 struct btrfs_fs_devices *fs_devices,
5008 struct alloc_chunk_ctl *ctl)
5009{
5010 u64 type = ctl->type;
5011
5012 if (type & BTRFS_BLOCK_GROUP_DATA) {
5013 ctl->max_stripe_size = SZ_1G;
5014 ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
5015 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
5016
5017 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
5018 ctl->max_stripe_size = SZ_1G;
5019 else
5020 ctl->max_stripe_size = SZ_256M;
5021 ctl->max_chunk_size = ctl->max_stripe_size;
5022 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
5023 ctl->max_stripe_size = SZ_32M;
5024 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
5025 ctl->devs_max = min_t(int, ctl->devs_max,
5026 BTRFS_MAX_DEVS_SYS_CHUNK);
5027 } else {
5028 BUG();
5029 }
5030
5031
5032 ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
5033 ctl->max_chunk_size);
5034 ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
5035}
5036
5037static void init_alloc_chunk_ctl_policy_zoned(
5038 struct btrfs_fs_devices *fs_devices,
5039 struct alloc_chunk_ctl *ctl)
5040{
5041 u64 zone_size = fs_devices->fs_info->zone_size;
5042 u64 limit;
5043 int min_num_stripes = ctl->devs_min * ctl->dev_stripes;
5044 int min_data_stripes = (min_num_stripes - ctl->nparity) / ctl->ncopies;
5045 u64 min_chunk_size = min_data_stripes * zone_size;
5046 u64 type = ctl->type;
5047
5048 ctl->max_stripe_size = zone_size;
5049 if (type & BTRFS_BLOCK_GROUP_DATA) {
5050 ctl->max_chunk_size = round_down(BTRFS_MAX_DATA_CHUNK_SIZE,
5051 zone_size);
5052 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
5053 ctl->max_chunk_size = ctl->max_stripe_size;
5054 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
5055 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
5056 ctl->devs_max = min_t(int, ctl->devs_max,
5057 BTRFS_MAX_DEVS_SYS_CHUNK);
5058 } else {
5059 BUG();
5060 }
5061
5062
5063 limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
5064 zone_size),
5065 min_chunk_size);
5066 ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
5067 ctl->dev_extent_min = zone_size * ctl->dev_stripes;
5068}
5069
5070static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
5071 struct alloc_chunk_ctl *ctl)
5072{
5073 int index = btrfs_bg_flags_to_raid_index(ctl->type);
5074
5075 ctl->sub_stripes = btrfs_raid_array[index].sub_stripes;
5076 ctl->dev_stripes = btrfs_raid_array[index].dev_stripes;
5077 ctl->devs_max = btrfs_raid_array[index].devs_max;
5078 if (!ctl->devs_max)
5079 ctl->devs_max = BTRFS_MAX_DEVS(fs_devices->fs_info);
5080 ctl->devs_min = btrfs_raid_array[index].devs_min;
5081 ctl->devs_increment = btrfs_raid_array[index].devs_increment;
5082 ctl->ncopies = btrfs_raid_array[index].ncopies;
5083 ctl->nparity = btrfs_raid_array[index].nparity;
5084 ctl->ndevs = 0;
5085
5086 switch (fs_devices->chunk_alloc_policy) {
5087 case BTRFS_CHUNK_ALLOC_REGULAR:
5088 init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
5089 break;
5090 case BTRFS_CHUNK_ALLOC_ZONED:
5091 init_alloc_chunk_ctl_policy_zoned(fs_devices, ctl);
5092 break;
5093 default:
5094 BUG();
5095 }
5096}
5097
5098static int gather_device_info(struct btrfs_fs_devices *fs_devices,
5099 struct alloc_chunk_ctl *ctl,
5100 struct btrfs_device_info *devices_info)
5101{
5102 struct btrfs_fs_info *info = fs_devices->fs_info;
5103 struct btrfs_device *device;
5104 u64 total_avail;
5105 u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes;
5106 int ret;
5107 int ndevs = 0;
5108 u64 max_avail;
5109 u64 dev_offset;
5110
5111
5112
5113
5114
5115 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
5116 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
5117 WARN(1, KERN_ERR
5118 "BTRFS: read-only device in alloc_list\n");
5119 continue;
5120 }
5121
5122 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
5123 &device->dev_state) ||
5124 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
5125 continue;
5126
5127 if (device->total_bytes > device->bytes_used)
5128 total_avail = device->total_bytes - device->bytes_used;
5129 else
5130 total_avail = 0;
5131
5132
5133 if (total_avail < ctl->dev_extent_min)
5134 continue;
5135
5136 ret = find_free_dev_extent(device, dev_extent_want, &dev_offset,
5137 &max_avail);
5138 if (ret && ret != -ENOSPC)
5139 return ret;
5140
5141 if (ret == 0)
5142 max_avail = dev_extent_want;
5143
5144 if (max_avail < ctl->dev_extent_min) {
5145 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5146 btrfs_debug(info,
5147 "%s: devid %llu has no free space, have=%llu want=%llu",
5148 __func__, device->devid, max_avail,
5149 ctl->dev_extent_min);
5150 continue;
5151 }
5152
5153 if (ndevs == fs_devices->rw_devices) {
5154 WARN(1, "%s: found more than %llu devices\n",
5155 __func__, fs_devices->rw_devices);
5156 break;
5157 }
5158 devices_info[ndevs].dev_offset = dev_offset;
5159 devices_info[ndevs].max_avail = max_avail;
5160 devices_info[ndevs].total_avail = total_avail;
5161 devices_info[ndevs].dev = device;
5162 ++ndevs;
5163 }
5164 ctl->ndevs = ndevs;
5165
5166
5167
5168
5169 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
5170 btrfs_cmp_device_info, NULL);
5171
5172 return 0;
5173}
5174
5175static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
5176 struct btrfs_device_info *devices_info)
5177{
5178
5179 int data_stripes;
5180
5181
5182
5183
5184
5185
5186
5187
5188 ctl->stripe_size = div_u64(devices_info[ctl->ndevs - 1].max_avail,
5189 ctl->dev_stripes);
5190 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5191
5192
5193 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5194
5195
5196
5197
5198
5199
5200
5201 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
5202
5203
5204
5205
5206
5207 ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
5208 data_stripes), SZ_16M),
5209 ctl->stripe_size);
5210 }
5211
5212
5213 ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
5214 ctl->chunk_size = ctl->stripe_size * data_stripes;
5215
5216 return 0;
5217}
5218
5219static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
5220 struct btrfs_device_info *devices_info)
5221{
5222 u64 zone_size = devices_info[0].dev->zone_info->zone_size;
5223
5224 int data_stripes;
5225
5226
5227
5228
5229
5230 ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min);
5231
5232 ctl->stripe_size = zone_size;
5233 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5234 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5235
5236
5237 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
5238 ctl->ndevs = div_u64(div_u64(ctl->max_chunk_size * ctl->ncopies,
5239 ctl->stripe_size) + ctl->nparity,
5240 ctl->dev_stripes);
5241 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5242 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5243 ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size);
5244 }
5245
5246 ctl->chunk_size = ctl->stripe_size * data_stripes;
5247
5248 return 0;
5249}
5250
5251static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
5252 struct alloc_chunk_ctl *ctl,
5253 struct btrfs_device_info *devices_info)
5254{
5255 struct btrfs_fs_info *info = fs_devices->fs_info;
5256
5257
5258
5259
5260
5261
5262 ctl->ndevs = rounddown(ctl->ndevs, ctl->devs_increment);
5263
5264 if (ctl->ndevs < ctl->devs_min) {
5265 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
5266 btrfs_debug(info,
5267 "%s: not enough devices with free space: have=%d minimum required=%d",
5268 __func__, ctl->ndevs, ctl->devs_min);
5269 }
5270 return -ENOSPC;
5271 }
5272
5273 ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
5274
5275 switch (fs_devices->chunk_alloc_policy) {
5276 case BTRFS_CHUNK_ALLOC_REGULAR:
5277 return decide_stripe_size_regular(ctl, devices_info);
5278 case BTRFS_CHUNK_ALLOC_ZONED:
5279 return decide_stripe_size_zoned(ctl, devices_info);
5280 default:
5281 BUG();
5282 }
5283}
5284
5285static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
5286 struct alloc_chunk_ctl *ctl,
5287 struct btrfs_device_info *devices_info)
5288{
5289 struct btrfs_fs_info *info = trans->fs_info;
5290 struct map_lookup *map = NULL;
5291 struct extent_map_tree *em_tree;
5292 struct btrfs_block_group *block_group;
5293 struct extent_map *em;
5294 u64 start = ctl->start;
5295 u64 type = ctl->type;
5296 int ret;
5297 int i;
5298 int j;
5299
5300 map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS);
5301 if (!map)
5302 return ERR_PTR(-ENOMEM);
5303 map->num_stripes = ctl->num_stripes;
5304
5305 for (i = 0; i < ctl->ndevs; ++i) {
5306 for (j = 0; j < ctl->dev_stripes; ++j) {
5307 int s = i * ctl->dev_stripes + j;
5308 map->stripes[s].dev = devices_info[i].dev;
5309 map->stripes[s].physical = devices_info[i].dev_offset +
5310 j * ctl->stripe_size;
5311 }
5312 }
5313 map->stripe_len = BTRFS_STRIPE_LEN;
5314 map->io_align = BTRFS_STRIPE_LEN;
5315 map->io_width = BTRFS_STRIPE_LEN;
5316 map->type = type;
5317 map->sub_stripes = ctl->sub_stripes;
5318
5319 trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size);
5320
5321 em = alloc_extent_map();
5322 if (!em) {
5323 kfree(map);
5324 return ERR_PTR(-ENOMEM);
5325 }
5326 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5327 em->map_lookup = map;
5328 em->start = start;
5329 em->len = ctl->chunk_size;
5330 em->block_start = 0;
5331 em->block_len = em->len;
5332 em->orig_block_len = ctl->stripe_size;
5333
5334 em_tree = &info->mapping_tree;
5335 write_lock(&em_tree->lock);
5336 ret = add_extent_mapping(em_tree, em, 0);
5337 if (ret) {
5338 write_unlock(&em_tree->lock);
5339 free_extent_map(em);
5340 return ERR_PTR(ret);
5341 }
5342 write_unlock(&em_tree->lock);
5343
5344 block_group = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size);
5345 if (IS_ERR(block_group))
5346 goto error_del_extent;
5347
5348 for (i = 0; i < map->num_stripes; i++) {
5349 struct btrfs_device *dev = map->stripes[i].dev;
5350
5351 btrfs_device_set_bytes_used(dev,
5352 dev->bytes_used + ctl->stripe_size);
5353 if (list_empty(&dev->post_commit_list))
5354 list_add_tail(&dev->post_commit_list,
5355 &trans->transaction->dev_update_list);
5356 }
5357
5358 atomic64_sub(ctl->stripe_size * map->num_stripes,
5359 &info->free_chunk_space);
5360
5361 free_extent_map(em);
5362 check_raid56_incompat_flag(info, type);
5363 check_raid1c34_incompat_flag(info, type);
5364
5365 return block_group;
5366
5367error_del_extent:
5368 write_lock(&em_tree->lock);
5369 remove_extent_mapping(em_tree, em);
5370 write_unlock(&em_tree->lock);
5371
5372
5373 free_extent_map(em);
5374
5375 free_extent_map(em);
5376
5377 return block_group;
5378}
5379
5380struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
5381 u64 type)
5382{
5383 struct btrfs_fs_info *info = trans->fs_info;
5384 struct btrfs_fs_devices *fs_devices = info->fs_devices;
5385 struct btrfs_device_info *devices_info = NULL;
5386 struct alloc_chunk_ctl ctl;
5387 struct btrfs_block_group *block_group;
5388 int ret;
5389
5390 lockdep_assert_held(&info->chunk_mutex);
5391
5392 if (!alloc_profile_is_valid(type, 0)) {
5393 ASSERT(0);
5394 return ERR_PTR(-EINVAL);
5395 }
5396
5397 if (list_empty(&fs_devices->alloc_list)) {
5398 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5399 btrfs_debug(info, "%s: no writable device", __func__);
5400 return ERR_PTR(-ENOSPC);
5401 }
5402
5403 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
5404 btrfs_err(info, "invalid chunk type 0x%llx requested", type);
5405 ASSERT(0);
5406 return ERR_PTR(-EINVAL);
5407 }
5408
5409 ctl.start = find_next_chunk(info);
5410 ctl.type = type;
5411 init_alloc_chunk_ctl(fs_devices, &ctl);
5412
5413 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
5414 GFP_NOFS);
5415 if (!devices_info)
5416 return ERR_PTR(-ENOMEM);
5417
5418 ret = gather_device_info(fs_devices, &ctl, devices_info);
5419 if (ret < 0) {
5420 block_group = ERR_PTR(ret);
5421 goto out;
5422 }
5423
5424 ret = decide_stripe_size(fs_devices, &ctl, devices_info);
5425 if (ret < 0) {
5426 block_group = ERR_PTR(ret);
5427 goto out;
5428 }
5429
5430 block_group = create_chunk(trans, &ctl, devices_info);
5431
5432out:
5433 kfree(devices_info);
5434 return block_group;
5435}
5436
5437
5438
5439
5440
5441
5442
5443
5444
5445int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
5446 struct btrfs_block_group *bg)
5447{
5448 struct btrfs_fs_info *fs_info = trans->fs_info;
5449 struct btrfs_root *extent_root = fs_info->extent_root;
5450 struct btrfs_root *chunk_root = fs_info->chunk_root;
5451 struct btrfs_key key;
5452 struct btrfs_chunk *chunk;
5453 struct btrfs_stripe *stripe;
5454 struct extent_map *em;
5455 struct map_lookup *map;
5456 size_t item_size;
5457 int i;
5458 int ret;
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482 lockdep_assert_held(&fs_info->chunk_mutex);
5483
5484 em = btrfs_get_chunk_map(fs_info, bg->start, bg->length);
5485 if (IS_ERR(em)) {
5486 ret = PTR_ERR(em);
5487 btrfs_abort_transaction(trans, ret);
5488 return ret;
5489 }
5490
5491 map = em->map_lookup;
5492 item_size = btrfs_chunk_item_size(map->num_stripes);
5493
5494 chunk = kzalloc(item_size, GFP_NOFS);
5495 if (!chunk) {
5496 ret = -ENOMEM;
5497 btrfs_abort_transaction(trans, ret);
5498 goto out;
5499 }
5500
5501 for (i = 0; i < map->num_stripes; i++) {
5502 struct btrfs_device *device = map->stripes[i].dev;
5503
5504 ret = btrfs_update_device(trans, device);
5505 if (ret)
5506 goto out;
5507 }
5508
5509 stripe = &chunk->stripe;
5510 for (i = 0; i < map->num_stripes; i++) {
5511 struct btrfs_device *device = map->stripes[i].dev;
5512 const u64 dev_offset = map->stripes[i].physical;
5513
5514 btrfs_set_stack_stripe_devid(stripe, device->devid);
5515 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5516 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5517 stripe++;
5518 }
5519
5520 btrfs_set_stack_chunk_length(chunk, bg->length);
5521 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5522 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5523 btrfs_set_stack_chunk_type(chunk, map->type);
5524 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5525 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5526 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5527 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5528 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5529
5530 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5531 key.type = BTRFS_CHUNK_ITEM_KEY;
5532 key.offset = bg->start;
5533
5534 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5535 if (ret)
5536 goto out;
5537
5538 bg->chunk_item_inserted = 1;
5539
5540 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5541 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5542 if (ret)
5543 goto out;
5544 }
5545
5546out:
5547 kfree(chunk);
5548 free_extent_map(em);
5549 return ret;
5550}
5551
5552static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5553{
5554 struct btrfs_fs_info *fs_info = trans->fs_info;
5555 u64 alloc_profile;
5556 struct btrfs_block_group *meta_bg;
5557 struct btrfs_block_group *sys_bg;
5558
5559
5560
5561
5562
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5581 meta_bg = btrfs_alloc_chunk(trans, alloc_profile);
5582 if (IS_ERR(meta_bg))
5583 return PTR_ERR(meta_bg);
5584
5585 alloc_profile = btrfs_system_alloc_profile(fs_info);
5586 sys_bg = btrfs_alloc_chunk(trans, alloc_profile);
5587 if (IS_ERR(sys_bg))
5588 return PTR_ERR(sys_bg);
5589
5590 return 0;
5591}
5592
5593static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5594{
5595 const int index = btrfs_bg_flags_to_raid_index(map->type);
5596
5597 return btrfs_raid_array[index].tolerated_failures;
5598}
5599
5600int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5601{
5602 struct extent_map *em;
5603 struct map_lookup *map;
5604 int readonly = 0;
5605 int miss_ndevs = 0;
5606 int i;
5607
5608 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
5609 if (IS_ERR(em))
5610 return 1;
5611
5612 map = em->map_lookup;
5613 for (i = 0; i < map->num_stripes; i++) {
5614 if (test_bit(BTRFS_DEV_STATE_MISSING,
5615 &map->stripes[i].dev->dev_state)) {
5616 miss_ndevs++;
5617 continue;
5618 }
5619 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5620 &map->stripes[i].dev->dev_state)) {
5621 readonly = 1;
5622 goto end;
5623 }
5624 }
5625
5626
5627
5628
5629
5630
5631 if (miss_ndevs > btrfs_chunk_max_errors(map))
5632 readonly = 1;
5633end:
5634 free_extent_map(em);
5635 return readonly;
5636}
5637
5638void btrfs_mapping_tree_free(struct extent_map_tree *tree)
5639{
5640 struct extent_map *em;
5641
5642 while (1) {
5643 write_lock(&tree->lock);
5644 em = lookup_extent_mapping(tree, 0, (u64)-1);
5645 if (em)
5646 remove_extent_mapping(tree, em);
5647 write_unlock(&tree->lock);
5648 if (!em)
5649 break;
5650
5651 free_extent_map(em);
5652
5653 free_extent_map(em);
5654 }
5655}
5656
5657int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5658{
5659 struct extent_map *em;
5660 struct map_lookup *map;
5661 int ret;
5662
5663 em = btrfs_get_chunk_map(fs_info, logical, len);
5664 if (IS_ERR(em))
5665
5666
5667
5668
5669
5670
5671 return 1;
5672
5673 map = em->map_lookup;
5674 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
5675 ret = map->num_stripes;
5676 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5677 ret = map->sub_stripes;
5678 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5679 ret = 2;
5680 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5681
5682
5683
5684
5685
5686
5687
5688 ret = map->num_stripes;
5689 else
5690 ret = 1;
5691 free_extent_map(em);
5692
5693 down_read(&fs_info->dev_replace.rwsem);
5694 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5695 fs_info->dev_replace.tgtdev)
5696 ret++;
5697 up_read(&fs_info->dev_replace.rwsem);
5698
5699 return ret;
5700}
5701
5702unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5703 u64 logical)
5704{
5705 struct extent_map *em;
5706 struct map_lookup *map;
5707 unsigned long len = fs_info->sectorsize;
5708
5709 em = btrfs_get_chunk_map(fs_info, logical, len);
5710
5711 if (!WARN_ON(IS_ERR(em))) {
5712 map = em->map_lookup;
5713 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5714 len = map->stripe_len * nr_data_stripes(map);
5715 free_extent_map(em);
5716 }
5717 return len;
5718}
5719
5720int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5721{
5722 struct extent_map *em;
5723 struct map_lookup *map;
5724 int ret = 0;
5725
5726 em = btrfs_get_chunk_map(fs_info, logical, len);
5727
5728 if(!WARN_ON(IS_ERR(em))) {
5729 map = em->map_lookup;
5730 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5731 ret = 1;
5732 free_extent_map(em);
5733 }
5734 return ret;
5735}
5736
5737static int find_live_mirror(struct btrfs_fs_info *fs_info,
5738 struct map_lookup *map, int first,
5739 int dev_replace_is_ongoing)
5740{
5741 int i;
5742 int num_stripes;
5743 int preferred_mirror;
5744 int tolerance;
5745 struct btrfs_device *srcdev;
5746
5747 ASSERT((map->type &
5748 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
5749
5750 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5751 num_stripes = map->sub_stripes;
5752 else
5753 num_stripes = map->num_stripes;
5754
5755 switch (fs_info->fs_devices->read_policy) {
5756 default:
5757
5758 btrfs_warn_rl(fs_info,
5759 "unknown read_policy type %u, reset to pid",
5760 fs_info->fs_devices->read_policy);
5761 fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
5762 fallthrough;
5763 case BTRFS_READ_POLICY_PID:
5764 preferred_mirror = first + (current->pid % num_stripes);
5765 break;
5766 }
5767
5768 if (dev_replace_is_ongoing &&
5769 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5770 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5771 srcdev = fs_info->dev_replace.srcdev;
5772 else
5773 srcdev = NULL;
5774
5775
5776
5777
5778
5779
5780 for (tolerance = 0; tolerance < 2; tolerance++) {
5781 if (map->stripes[preferred_mirror].dev->bdev &&
5782 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5783 return preferred_mirror;
5784 for (i = first; i < first + num_stripes; i++) {
5785 if (map->stripes[i].dev->bdev &&
5786 (tolerance || map->stripes[i].dev != srcdev))
5787 return i;
5788 }
5789 }
5790
5791
5792
5793
5794 return preferred_mirror;
5795}
5796
5797
5798static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5799{
5800 int i;
5801 int again = 1;
5802
5803 while (again) {
5804 again = 0;
5805 for (i = 0; i < num_stripes - 1; i++) {
5806
5807 if (bbio->raid_map[i] > bbio->raid_map[i + 1]) {
5808 swap(bbio->stripes[i], bbio->stripes[i + 1]);
5809 swap(bbio->raid_map[i], bbio->raid_map[i + 1]);
5810 again = 1;
5811 }
5812 }
5813 }
5814}
5815
5816static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5817{
5818 struct btrfs_bio *bbio = kzalloc(
5819
5820 sizeof(struct btrfs_bio) +
5821
5822 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5823
5824 sizeof(int) * (real_stripes) +
5825
5826
5827
5828
5829 sizeof(u64) * (total_stripes),
5830 GFP_NOFS|__GFP_NOFAIL);
5831
5832 atomic_set(&bbio->error, 0);
5833 refcount_set(&bbio->refs, 1);
5834
5835 bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
5836 bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
5837
5838 return bbio;
5839}
5840
5841void btrfs_get_bbio(struct btrfs_bio *bbio)
5842{
5843 WARN_ON(!refcount_read(&bbio->refs));
5844 refcount_inc(&bbio->refs);
5845}
5846
5847void btrfs_put_bbio(struct btrfs_bio *bbio)
5848{
5849 if (!bbio)
5850 return;
5851 if (refcount_dec_and_test(&bbio->refs))
5852 kfree(bbio);
5853}
5854
5855
5856
5857
5858
5859
5860static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5861 u64 logical, u64 *length_ret,
5862 struct btrfs_bio **bbio_ret)
5863{
5864 struct extent_map *em;
5865 struct map_lookup *map;
5866 struct btrfs_bio *bbio;
5867 u64 length = *length_ret;
5868 u64 offset;
5869 u64 stripe_nr;
5870 u64 stripe_nr_end;
5871 u64 stripe_end_offset;
5872 u64 stripe_cnt;
5873 u64 stripe_len;
5874 u64 stripe_offset;
5875 u64 num_stripes;
5876 u32 stripe_index;
5877 u32 factor = 0;
5878 u32 sub_stripes = 0;
5879 u64 stripes_per_dev = 0;
5880 u32 remaining_stripes = 0;
5881 u32 last_stripe = 0;
5882 int ret = 0;
5883 int i;
5884
5885
5886 ASSERT(bbio_ret);
5887
5888 em = btrfs_get_chunk_map(fs_info, logical, length);
5889 if (IS_ERR(em))
5890 return PTR_ERR(em);
5891
5892 map = em->map_lookup;
5893
5894 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5895 ret = -EOPNOTSUPP;
5896 goto out;
5897 }
5898
5899 offset = logical - em->start;
5900 length = min_t(u64, em->start + em->len - logical, length);
5901 *length_ret = length;
5902
5903 stripe_len = map->stripe_len;
5904
5905
5906
5907
5908 stripe_nr = div64_u64(offset, stripe_len);
5909
5910
5911 stripe_offset = offset - stripe_nr * stripe_len;
5912
5913 stripe_nr_end = round_up(offset + length, map->stripe_len);
5914 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5915 stripe_cnt = stripe_nr_end - stripe_nr;
5916 stripe_end_offset = stripe_nr_end * map->stripe_len -
5917 (offset + length);
5918
5919
5920
5921
5922
5923 num_stripes = 1;
5924 stripe_index = 0;
5925 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5926 BTRFS_BLOCK_GROUP_RAID10)) {
5927 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5928 sub_stripes = 1;
5929 else
5930 sub_stripes = map->sub_stripes;
5931
5932 factor = map->num_stripes / sub_stripes;
5933 num_stripes = min_t(u64, map->num_stripes,
5934 sub_stripes * stripe_cnt);
5935 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5936 stripe_index *= sub_stripes;
5937 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
5938 &remaining_stripes);
5939 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
5940 last_stripe *= sub_stripes;
5941 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
5942 BTRFS_BLOCK_GROUP_DUP)) {
5943 num_stripes = map->num_stripes;
5944 } else {
5945 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5946 &stripe_index);
5947 }
5948
5949 bbio = alloc_btrfs_bio(num_stripes, 0);
5950 if (!bbio) {
5951 ret = -ENOMEM;
5952 goto out;
5953 }
5954
5955 for (i = 0; i < num_stripes; i++) {
5956 bbio->stripes[i].physical =
5957 map->stripes[stripe_index].physical +
5958 stripe_offset + stripe_nr * map->stripe_len;
5959 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
5960
5961 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5962 BTRFS_BLOCK_GROUP_RAID10)) {
5963 bbio->stripes[i].length = stripes_per_dev *
5964 map->stripe_len;
5965
5966 if (i / sub_stripes < remaining_stripes)
5967 bbio->stripes[i].length +=
5968 map->stripe_len;
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978 if (i < sub_stripes)
5979 bbio->stripes[i].length -=
5980 stripe_offset;
5981
5982 if (stripe_index >= last_stripe &&
5983 stripe_index <= (last_stripe +
5984 sub_stripes - 1))
5985 bbio->stripes[i].length -=
5986 stripe_end_offset;
5987
5988 if (i == sub_stripes - 1)
5989 stripe_offset = 0;
5990 } else {
5991 bbio->stripes[i].length = length;
5992 }
5993
5994 stripe_index++;
5995 if (stripe_index == map->num_stripes) {
5996 stripe_index = 0;
5997 stripe_nr++;
5998 }
5999 }
6000
6001 *bbio_ret = bbio;
6002 bbio->map_type = map->type;
6003 bbio->num_stripes = num_stripes;
6004out:
6005 free_extent_map(em);
6006 return ret;
6007}
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
6023 u64 logical, u64 length,
6024 u64 srcdev_devid, int *mirror_num,
6025 u64 *physical)
6026{
6027 struct btrfs_bio *bbio = NULL;
6028 int num_stripes;
6029 int index_srcdev = 0;
6030 int found = 0;
6031 u64 physical_of_found = 0;
6032 int i;
6033 int ret = 0;
6034
6035 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
6036 logical, &length, &bbio, 0, 0);
6037 if (ret) {
6038 ASSERT(bbio == NULL);
6039 return ret;
6040 }
6041
6042 num_stripes = bbio->num_stripes;
6043 if (*mirror_num > num_stripes) {
6044
6045
6046
6047
6048
6049 btrfs_put_bbio(bbio);
6050 return -EIO;
6051 }
6052
6053
6054
6055
6056
6057
6058 for (i = 0; i < num_stripes; i++) {
6059 if (bbio->stripes[i].dev->devid != srcdev_devid)
6060 continue;
6061
6062
6063
6064
6065
6066 if (found &&
6067 physical_of_found <= bbio->stripes[i].physical)
6068 continue;
6069
6070 index_srcdev = i;
6071 found = 1;
6072 physical_of_found = bbio->stripes[i].physical;
6073 }
6074
6075 btrfs_put_bbio(bbio);
6076
6077 ASSERT(found);
6078 if (!found)
6079 return -EIO;
6080
6081 *mirror_num = index_srcdev + 1;
6082 *physical = physical_of_found;
6083 return ret;
6084}
6085
6086static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
6087{
6088 struct btrfs_block_group *cache;
6089 bool ret;
6090
6091
6092 if (!btrfs_is_zoned(fs_info))
6093 return false;
6094
6095 cache = btrfs_lookup_block_group(fs_info, logical);
6096
6097 spin_lock(&cache->lock);
6098 ret = cache->to_copy;
6099 spin_unlock(&cache->lock);
6100
6101 btrfs_put_block_group(cache);
6102 return ret;
6103}
6104
6105static void handle_ops_on_dev_replace(enum btrfs_map_op op,
6106 struct btrfs_bio **bbio_ret,
6107 struct btrfs_dev_replace *dev_replace,
6108 u64 logical,
6109 int *num_stripes_ret, int *max_errors_ret)
6110{
6111 struct btrfs_bio *bbio = *bbio_ret;
6112 u64 srcdev_devid = dev_replace->srcdev->devid;
6113 int tgtdev_indexes = 0;
6114 int num_stripes = *num_stripes_ret;
6115 int max_errors = *max_errors_ret;
6116 int i;
6117
6118 if (op == BTRFS_MAP_WRITE) {
6119 int index_where_to_add;
6120
6121
6122
6123
6124
6125 if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
6126 return;
6127
6128
6129
6130
6131
6132
6133
6134
6135
6136
6137
6138
6139 index_where_to_add = num_stripes;
6140 for (i = 0; i < num_stripes; i++) {
6141 if (bbio->stripes[i].dev->devid == srcdev_devid) {
6142
6143 struct btrfs_bio_stripe *new =
6144 bbio->stripes + index_where_to_add;
6145 struct btrfs_bio_stripe *old =
6146 bbio->stripes + i;
6147
6148 new->physical = old->physical;
6149 new->length = old->length;
6150 new->dev = dev_replace->tgtdev;
6151 bbio->tgtdev_map[i] = index_where_to_add;
6152 index_where_to_add++;
6153 max_errors++;
6154 tgtdev_indexes++;
6155 }
6156 }
6157 num_stripes = index_where_to_add;
6158 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
6159 int index_srcdev = 0;
6160 int found = 0;
6161 u64 physical_of_found = 0;
6162
6163
6164
6165
6166
6167
6168
6169
6170 for (i = 0; i < num_stripes; i++) {
6171 if (bbio->stripes[i].dev->devid == srcdev_devid) {
6172
6173
6174
6175
6176
6177 if (found &&
6178 physical_of_found <=
6179 bbio->stripes[i].physical)
6180 continue;
6181 index_srcdev = i;
6182 found = 1;
6183 physical_of_found = bbio->stripes[i].physical;
6184 }
6185 }
6186 if (found) {
6187 struct btrfs_bio_stripe *tgtdev_stripe =
6188 bbio->stripes + num_stripes;
6189
6190 tgtdev_stripe->physical = physical_of_found;
6191 tgtdev_stripe->length =
6192 bbio->stripes[index_srcdev].length;
6193 tgtdev_stripe->dev = dev_replace->tgtdev;
6194 bbio->tgtdev_map[index_srcdev] = num_stripes;
6195
6196 tgtdev_indexes++;
6197 num_stripes++;
6198 }
6199 }
6200
6201 *num_stripes_ret = num_stripes;
6202 *max_errors_ret = max_errors;
6203 bbio->num_tgtdevs = tgtdev_indexes;
6204 *bbio_ret = bbio;
6205}
6206
6207static bool need_full_stripe(enum btrfs_map_op op)
6208{
6209 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
6210}
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
6227 enum btrfs_map_op op, u64 logical,
6228 struct btrfs_io_geometry *io_geom)
6229{
6230 struct map_lookup *map;
6231 u64 len;
6232 u64 offset;
6233 u64 stripe_offset;
6234 u64 stripe_nr;
6235 u64 stripe_len;
6236 u64 raid56_full_stripe_start = (u64)-1;
6237 int data_stripes;
6238
6239 ASSERT(op != BTRFS_MAP_DISCARD);
6240
6241 map = em->map_lookup;
6242
6243 offset = logical - em->start;
6244
6245 stripe_len = map->stripe_len;
6246
6247 stripe_nr = div64_u64(offset, stripe_len);
6248
6249 stripe_offset = stripe_nr * stripe_len;
6250 if (offset < stripe_offset) {
6251 btrfs_crit(fs_info,
6252"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
6253 stripe_offset, offset, em->start, logical, stripe_len);
6254 return -EINVAL;
6255 }
6256
6257
6258 stripe_offset = offset - stripe_offset;
6259 data_stripes = nr_data_stripes(map);
6260
6261 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6262 u64 max_len = stripe_len - stripe_offset;
6263
6264
6265
6266
6267 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6268 unsigned long full_stripe_len = stripe_len * data_stripes;
6269 raid56_full_stripe_start = offset;
6270
6271
6272
6273
6274
6275 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
6276 full_stripe_len);
6277 raid56_full_stripe_start *= full_stripe_len;
6278
6279
6280
6281
6282
6283
6284 if (op == BTRFS_MAP_WRITE) {
6285 max_len = stripe_len * data_stripes -
6286 (offset - raid56_full_stripe_start);
6287 }
6288 }
6289 len = min_t(u64, em->len - offset, max_len);
6290 } else {
6291 len = em->len - offset;
6292 }
6293
6294 io_geom->len = len;
6295 io_geom->offset = offset;
6296 io_geom->stripe_len = stripe_len;
6297 io_geom->stripe_nr = stripe_nr;
6298 io_geom->stripe_offset = stripe_offset;
6299 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
6300
6301 return 0;
6302}
6303
6304static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
6305 enum btrfs_map_op op,
6306 u64 logical, u64 *length,
6307 struct btrfs_bio **bbio_ret,
6308 int mirror_num, int need_raid_map)
6309{
6310 struct extent_map *em;
6311 struct map_lookup *map;
6312 u64 stripe_offset;
6313 u64 stripe_nr;
6314 u64 stripe_len;
6315 u32 stripe_index;
6316 int data_stripes;
6317 int i;
6318 int ret = 0;
6319 int num_stripes;
6320 int max_errors = 0;
6321 int tgtdev_indexes = 0;
6322 struct btrfs_bio *bbio = NULL;
6323 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
6324 int dev_replace_is_ongoing = 0;
6325 int num_alloc_stripes;
6326 int patch_the_first_stripe_for_dev_replace = 0;
6327 u64 physical_to_patch_in_first_stripe = 0;
6328 u64 raid56_full_stripe_start = (u64)-1;
6329 struct btrfs_io_geometry geom;
6330
6331 ASSERT(bbio_ret);
6332 ASSERT(op != BTRFS_MAP_DISCARD);
6333
6334 em = btrfs_get_chunk_map(fs_info, logical, *length);
6335 ASSERT(!IS_ERR(em));
6336
6337 ret = btrfs_get_io_geometry(fs_info, em, op, logical, &geom);
6338 if (ret < 0)
6339 return ret;
6340
6341 map = em->map_lookup;
6342
6343 *length = geom.len;
6344 stripe_len = geom.stripe_len;
6345 stripe_nr = geom.stripe_nr;
6346 stripe_offset = geom.stripe_offset;
6347 raid56_full_stripe_start = geom.raid56_stripe_offset;
6348 data_stripes = nr_data_stripes(map);
6349
6350 down_read(&dev_replace->rwsem);
6351 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
6352
6353
6354
6355
6356 if (!dev_replace_is_ongoing)
6357 up_read(&dev_replace->rwsem);
6358
6359 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
6360 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
6361 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
6362 dev_replace->srcdev->devid,
6363 &mirror_num,
6364 &physical_to_patch_in_first_stripe);
6365 if (ret)
6366 goto out;
6367 else
6368 patch_the_first_stripe_for_dev_replace = 1;
6369 } else if (mirror_num > map->num_stripes) {
6370 mirror_num = 0;
6371 }
6372
6373 num_stripes = 1;
6374 stripe_index = 0;
6375 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6376 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6377 &stripe_index);
6378 if (!need_full_stripe(op))
6379 mirror_num = 1;
6380 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
6381 if (need_full_stripe(op))
6382 num_stripes = map->num_stripes;
6383 else if (mirror_num)
6384 stripe_index = mirror_num - 1;
6385 else {
6386 stripe_index = find_live_mirror(fs_info, map, 0,
6387 dev_replace_is_ongoing);
6388 mirror_num = stripe_index + 1;
6389 }
6390
6391 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
6392 if (need_full_stripe(op)) {
6393 num_stripes = map->num_stripes;
6394 } else if (mirror_num) {
6395 stripe_index = mirror_num - 1;
6396 } else {
6397 mirror_num = 1;
6398 }
6399
6400 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6401 u32 factor = map->num_stripes / map->sub_stripes;
6402
6403 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6404 stripe_index *= map->sub_stripes;
6405
6406 if (need_full_stripe(op))
6407 num_stripes = map->sub_stripes;
6408 else if (mirror_num)
6409 stripe_index += mirror_num - 1;
6410 else {
6411 int old_stripe_index = stripe_index;
6412 stripe_index = find_live_mirror(fs_info, map,
6413 stripe_index,
6414 dev_replace_is_ongoing);
6415 mirror_num = stripe_index - old_stripe_index + 1;
6416 }
6417
6418 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6419 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
6420
6421 stripe_nr = div64_u64(raid56_full_stripe_start,
6422 stripe_len * data_stripes);
6423
6424
6425 num_stripes = map->num_stripes;
6426 max_errors = nr_parity_stripes(map);
6427
6428 *length = map->stripe_len;
6429 stripe_index = 0;
6430 stripe_offset = 0;
6431 } else {
6432
6433
6434
6435
6436
6437 stripe_nr = div_u64_rem(stripe_nr,
6438 data_stripes, &stripe_index);
6439 if (mirror_num > 1)
6440 stripe_index = data_stripes + mirror_num - 2;
6441
6442
6443 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
6444 &stripe_index);
6445 if (!need_full_stripe(op) && mirror_num <= 1)
6446 mirror_num = 1;
6447 }
6448 } else {
6449
6450
6451
6452
6453
6454 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6455 &stripe_index);
6456 mirror_num = stripe_index + 1;
6457 }
6458 if (stripe_index >= map->num_stripes) {
6459 btrfs_crit(fs_info,
6460 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
6461 stripe_index, map->num_stripes);
6462 ret = -EINVAL;
6463 goto out;
6464 }
6465
6466 num_alloc_stripes = num_stripes;
6467 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
6468 if (op == BTRFS_MAP_WRITE)
6469 num_alloc_stripes <<= 1;
6470 if (op == BTRFS_MAP_GET_READ_MIRRORS)
6471 num_alloc_stripes++;
6472 tgtdev_indexes = num_stripes;
6473 }
6474
6475 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
6476 if (!bbio) {
6477 ret = -ENOMEM;
6478 goto out;
6479 }
6480
6481 for (i = 0; i < num_stripes; i++) {
6482 bbio->stripes[i].physical = map->stripes[stripe_index].physical +
6483 stripe_offset + stripe_nr * map->stripe_len;
6484 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
6485 stripe_index++;
6486 }
6487
6488
6489 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
6490 (need_full_stripe(op) || mirror_num > 1)) {
6491 u64 tmp;
6492 unsigned rot;
6493
6494
6495 div_u64_rem(stripe_nr, num_stripes, &rot);
6496
6497
6498 tmp = stripe_nr * data_stripes;
6499 for (i = 0; i < data_stripes; i++)
6500 bbio->raid_map[(i+rot) % num_stripes] =
6501 em->start + (tmp + i) * map->stripe_len;
6502
6503 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
6504 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
6505 bbio->raid_map[(i+rot+1) % num_stripes] =
6506 RAID6_Q_STRIPE;
6507
6508 sort_parity_stripes(bbio, num_stripes);
6509 }
6510
6511 if (need_full_stripe(op))
6512 max_errors = btrfs_chunk_max_errors(map);
6513
6514 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
6515 need_full_stripe(op)) {
6516 handle_ops_on_dev_replace(op, &bbio, dev_replace, logical,
6517 &num_stripes, &max_errors);
6518 }
6519
6520 *bbio_ret = bbio;
6521 bbio->map_type = map->type;
6522 bbio->num_stripes = num_stripes;
6523 bbio->max_errors = max_errors;
6524 bbio->mirror_num = mirror_num;
6525
6526
6527
6528
6529
6530
6531 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6532 WARN_ON(num_stripes > 1);
6533 bbio->stripes[0].dev = dev_replace->tgtdev;
6534 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6535 bbio->mirror_num = map->num_stripes + 1;
6536 }
6537out:
6538 if (dev_replace_is_ongoing) {
6539 lockdep_assert_held(&dev_replace->rwsem);
6540
6541 up_read(&dev_replace->rwsem);
6542 }
6543 free_extent_map(em);
6544 return ret;
6545}
6546
6547int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6548 u64 logical, u64 *length,
6549 struct btrfs_bio **bbio_ret, int mirror_num)
6550{
6551 if (op == BTRFS_MAP_DISCARD)
6552 return __btrfs_map_block_for_discard(fs_info, logical,
6553 length, bbio_ret);
6554
6555 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6556 mirror_num, 0);
6557}
6558
6559
6560int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6561 u64 logical, u64 *length,
6562 struct btrfs_bio **bbio_ret)
6563{
6564 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6565}
6566
6567static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6568{
6569 bio->bi_private = bbio->private;
6570 bio->bi_end_io = bbio->end_io;
6571 bio_endio(bio);
6572
6573 btrfs_put_bbio(bbio);
6574}
6575
6576static void btrfs_end_bio(struct bio *bio)
6577{
6578 struct btrfs_bio *bbio = bio->bi_private;
6579 int is_orig_bio = 0;
6580
6581 if (bio->bi_status) {
6582 atomic_inc(&bbio->error);
6583 if (bio->bi_status == BLK_STS_IOERR ||
6584 bio->bi_status == BLK_STS_TARGET) {
6585 struct btrfs_device *dev = btrfs_io_bio(bio)->device;
6586
6587 ASSERT(dev->bdev);
6588 if (btrfs_op(bio) == BTRFS_MAP_WRITE)
6589 btrfs_dev_stat_inc_and_print(dev,
6590 BTRFS_DEV_STAT_WRITE_ERRS);
6591 else if (!(bio->bi_opf & REQ_RAHEAD))
6592 btrfs_dev_stat_inc_and_print(dev,
6593 BTRFS_DEV_STAT_READ_ERRS);
6594 if (bio->bi_opf & REQ_PREFLUSH)
6595 btrfs_dev_stat_inc_and_print(dev,
6596 BTRFS_DEV_STAT_FLUSH_ERRS);
6597 }
6598 }
6599
6600 if (bio == bbio->orig_bio)
6601 is_orig_bio = 1;
6602
6603 btrfs_bio_counter_dec(bbio->fs_info);
6604
6605 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6606 if (!is_orig_bio) {
6607 bio_put(bio);
6608 bio = bbio->orig_bio;
6609 }
6610
6611 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6612
6613
6614
6615 if (atomic_read(&bbio->error) > bbio->max_errors) {
6616 bio->bi_status = BLK_STS_IOERR;
6617 } else {
6618
6619
6620
6621
6622 bio->bi_status = BLK_STS_OK;
6623 }
6624
6625 btrfs_end_bbio(bbio, bio);
6626 } else if (!is_orig_bio) {
6627 bio_put(bio);
6628 }
6629}
6630
6631static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6632 u64 physical, struct btrfs_device *dev)
6633{
6634 struct btrfs_fs_info *fs_info = bbio->fs_info;
6635
6636 bio->bi_private = bbio;
6637 btrfs_io_bio(bio)->device = dev;
6638 bio->bi_end_io = btrfs_end_bio;
6639 bio->bi_iter.bi_sector = physical >> 9;
6640
6641
6642
6643
6644 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
6645 if (btrfs_dev_is_sequential(dev, physical)) {
6646 u64 zone_start = round_down(physical, fs_info->zone_size);
6647
6648 bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
6649 } else {
6650 bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
6651 bio->bi_opf |= REQ_OP_WRITE;
6652 }
6653 }
6654 btrfs_debug_in_rcu(fs_info,
6655 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6656 bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
6657 (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
6658 dev->devid, bio->bi_iter.bi_size);
6659 bio_set_dev(bio, dev->bdev);
6660
6661 btrfs_bio_counter_inc_noblocked(fs_info);
6662
6663 btrfsic_submit_bio(bio);
6664}
6665
6666static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6667{
6668 atomic_inc(&bbio->error);
6669 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6670
6671 WARN_ON(bio != bbio->orig_bio);
6672
6673 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6674 bio->bi_iter.bi_sector = logical >> 9;
6675 if (atomic_read(&bbio->error) > bbio->max_errors)
6676 bio->bi_status = BLK_STS_IOERR;
6677 else
6678 bio->bi_status = BLK_STS_OK;
6679 btrfs_end_bbio(bbio, bio);
6680 }
6681}
6682
6683blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6684 int mirror_num)
6685{
6686 struct btrfs_device *dev;
6687 struct bio *first_bio = bio;
6688 u64 logical = bio->bi_iter.bi_sector << 9;
6689 u64 length = 0;
6690 u64 map_length;
6691 int ret;
6692 int dev_nr;
6693 int total_devs;
6694 struct btrfs_bio *bbio = NULL;
6695
6696 length = bio->bi_iter.bi_size;
6697 map_length = length;
6698
6699 btrfs_bio_counter_inc_blocked(fs_info);
6700 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6701 &map_length, &bbio, mirror_num, 1);
6702 if (ret) {
6703 btrfs_bio_counter_dec(fs_info);
6704 return errno_to_blk_status(ret);
6705 }
6706
6707 total_devs = bbio->num_stripes;
6708 bbio->orig_bio = first_bio;
6709 bbio->private = first_bio->bi_private;
6710 bbio->end_io = first_bio->bi_end_io;
6711 bbio->fs_info = fs_info;
6712 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6713
6714 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6715 ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
6716
6717
6718 if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
6719 ret = raid56_parity_write(fs_info, bio, bbio,
6720 map_length);
6721 } else {
6722 ret = raid56_parity_recover(fs_info, bio, bbio,
6723 map_length, mirror_num, 1);
6724 }
6725
6726 btrfs_bio_counter_dec(fs_info);
6727 return errno_to_blk_status(ret);
6728 }
6729
6730 if (map_length < length) {
6731 btrfs_crit(fs_info,
6732 "mapping failed logical %llu bio len %llu len %llu",
6733 logical, length, map_length);
6734 BUG();
6735 }
6736
6737 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6738 dev = bbio->stripes[dev_nr].dev;
6739 if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
6740 &dev->dev_state) ||
6741 (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
6742 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6743 bbio_error(bbio, first_bio, logical);
6744 continue;
6745 }
6746
6747 if (dev_nr < total_devs - 1)
6748 bio = btrfs_bio_clone(first_bio);
6749 else
6750 bio = first_bio;
6751
6752 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
6753 }
6754 btrfs_bio_counter_dec(fs_info);
6755 return BLK_STS_OK;
6756}
6757
6758
6759
6760
6761
6762
6763
6764
6765struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
6766 u64 devid, u8 *uuid, u8 *fsid)
6767{
6768 struct btrfs_device *device;
6769 struct btrfs_fs_devices *seed_devs;
6770
6771 if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6772 list_for_each_entry(device, &fs_devices->devices, dev_list) {
6773 if (device->devid == devid &&
6774 (!uuid || memcmp(device->uuid, uuid,
6775 BTRFS_UUID_SIZE) == 0))
6776 return device;
6777 }
6778 }
6779
6780 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
6781 if (!fsid ||
6782 !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6783 list_for_each_entry(device, &seed_devs->devices,
6784 dev_list) {
6785 if (device->devid == devid &&
6786 (!uuid || memcmp(device->uuid, uuid,
6787 BTRFS_UUID_SIZE) == 0))
6788 return device;
6789 }
6790 }
6791 }
6792
6793 return NULL;
6794}
6795
6796static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6797 u64 devid, u8 *dev_uuid)
6798{
6799 struct btrfs_device *device;
6800 unsigned int nofs_flag;
6801
6802
6803
6804
6805
6806
6807
6808 nofs_flag = memalloc_nofs_save();
6809 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6810 memalloc_nofs_restore(nofs_flag);
6811 if (IS_ERR(device))
6812 return device;
6813
6814 list_add(&device->dev_list, &fs_devices->devices);
6815 device->fs_devices = fs_devices;
6816 fs_devices->num_devices++;
6817
6818 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6819 fs_devices->missing_devices++;
6820
6821 return device;
6822}
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6838 const u64 *devid,
6839 const u8 *uuid)
6840{
6841 struct btrfs_device *dev;
6842 u64 tmp;
6843
6844 if (WARN_ON(!devid && !fs_info))
6845 return ERR_PTR(-EINVAL);
6846
6847 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
6848 if (!dev)
6849 return ERR_PTR(-ENOMEM);
6850
6851
6852
6853
6854
6855 dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
6856 if (!dev->flush_bio) {
6857 kfree(dev);
6858 return ERR_PTR(-ENOMEM);
6859 }
6860
6861 INIT_LIST_HEAD(&dev->dev_list);
6862 INIT_LIST_HEAD(&dev->dev_alloc_list);
6863 INIT_LIST_HEAD(&dev->post_commit_list);
6864
6865 atomic_set(&dev->reada_in_flight, 0);
6866 atomic_set(&dev->dev_stats_ccnt, 0);
6867 btrfs_device_data_ordered_init(dev);
6868 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
6869 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
6870 extent_io_tree_init(fs_info, &dev->alloc_state,
6871 IO_TREE_DEVICE_ALLOC_STATE, NULL);
6872
6873 if (devid)
6874 tmp = *devid;
6875 else {
6876 int ret;
6877
6878 ret = find_next_devid(fs_info, &tmp);
6879 if (ret) {
6880 btrfs_free_device(dev);
6881 return ERR_PTR(ret);
6882 }
6883 }
6884 dev->devid = tmp;
6885
6886 if (uuid)
6887 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6888 else
6889 generate_random_uuid(dev->uuid);
6890
6891 return dev;
6892}
6893
6894static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6895 u64 devid, u8 *uuid, bool error)
6896{
6897 if (error)
6898 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6899 devid, uuid);
6900 else
6901 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6902 devid, uuid);
6903}
6904
6905static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6906{
6907 const int data_stripes = calc_data_stripes(type, num_stripes);
6908
6909 return div_u64(chunk_len, data_stripes);
6910}
6911
6912#if BITS_PER_LONG == 32
6913
6914
6915
6916
6917
6918
6919
6920static int check_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
6921 u64 logical, u64 length, u64 type)
6922{
6923 if (!(type & BTRFS_BLOCK_GROUP_METADATA))
6924 return 0;
6925
6926 if (logical + length < MAX_LFS_FILESIZE)
6927 return 0;
6928
6929 btrfs_err_32bit_limit(fs_info);
6930 return -EOVERFLOW;
6931}
6932
6933
6934
6935
6936
6937
6938
6939static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
6940 u64 logical, u64 length, u64 type)
6941{
6942 if (!(type & BTRFS_BLOCK_GROUP_METADATA))
6943 return;
6944
6945 if (logical + length < BTRFS_32BIT_EARLY_WARN_THRESHOLD)
6946 return;
6947
6948 btrfs_warn_32bit_limit(fs_info);
6949}
6950#endif
6951
6952static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
6953 struct btrfs_chunk *chunk)
6954{
6955 struct btrfs_fs_info *fs_info = leaf->fs_info;
6956 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
6957 struct map_lookup *map;
6958 struct extent_map *em;
6959 u64 logical;
6960 u64 length;
6961 u64 devid;
6962 u64 type;
6963 u8 uuid[BTRFS_UUID_SIZE];
6964 int num_stripes;
6965 int ret;
6966 int i;
6967
6968 logical = key->offset;
6969 length = btrfs_chunk_length(leaf, chunk);
6970 type = btrfs_chunk_type(leaf, chunk);
6971 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6972
6973#if BITS_PER_LONG == 32
6974 ret = check_32bit_meta_chunk(fs_info, logical, length, type);
6975 if (ret < 0)
6976 return ret;
6977 warn_32bit_meta_chunk(fs_info, logical, length, type);
6978#endif
6979
6980
6981
6982
6983
6984 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
6985 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
6986 if (ret)
6987 return ret;
6988 }
6989
6990 read_lock(&map_tree->lock);
6991 em = lookup_extent_mapping(map_tree, logical, 1);
6992 read_unlock(&map_tree->lock);
6993
6994
6995 if (em && em->start <= logical && em->start + em->len > logical) {
6996 free_extent_map(em);
6997 return 0;
6998 } else if (em) {
6999 free_extent_map(em);
7000 }
7001
7002 em = alloc_extent_map();
7003 if (!em)
7004 return -ENOMEM;
7005 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
7006 if (!map) {
7007 free_extent_map(em);
7008 return -ENOMEM;
7009 }
7010
7011 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
7012 em->map_lookup = map;
7013 em->start = logical;
7014 em->len = length;
7015 em->orig_start = 0;
7016 em->block_start = 0;
7017 em->block_len = em->len;
7018
7019 map->num_stripes = num_stripes;
7020 map->io_width = btrfs_chunk_io_width(leaf, chunk);
7021 map->io_align = btrfs_chunk_io_align(leaf, chunk);
7022 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
7023 map->type = type;
7024 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
7025 map->verified_stripes = 0;
7026 em->orig_block_len = calc_stripe_length(type, em->len,
7027 map->num_stripes);
7028 for (i = 0; i < num_stripes; i++) {
7029 map->stripes[i].physical =
7030 btrfs_stripe_offset_nr(leaf, chunk, i);
7031 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
7032 read_extent_buffer(leaf, uuid, (unsigned long)
7033 btrfs_stripe_dev_uuid_nr(chunk, i),
7034 BTRFS_UUID_SIZE);
7035 map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
7036 devid, uuid, NULL);
7037 if (!map->stripes[i].dev &&
7038 !btrfs_test_opt(fs_info, DEGRADED)) {
7039 free_extent_map(em);
7040 btrfs_report_missing_device(fs_info, devid, uuid, true);
7041 return -ENOENT;
7042 }
7043 if (!map->stripes[i].dev) {
7044 map->stripes[i].dev =
7045 add_missing_dev(fs_info->fs_devices, devid,
7046 uuid);
7047 if (IS_ERR(map->stripes[i].dev)) {
7048 free_extent_map(em);
7049 btrfs_err(fs_info,
7050 "failed to init missing dev %llu: %ld",
7051 devid, PTR_ERR(map->stripes[i].dev));
7052 return PTR_ERR(map->stripes[i].dev);
7053 }
7054 btrfs_report_missing_device(fs_info, devid, uuid, false);
7055 }
7056 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
7057 &(map->stripes[i].dev->dev_state));
7058
7059 }
7060
7061 write_lock(&map_tree->lock);
7062 ret = add_extent_mapping(map_tree, em, 0);
7063 write_unlock(&map_tree->lock);
7064 if (ret < 0) {
7065 btrfs_err(fs_info,
7066 "failed to add chunk map, start=%llu len=%llu: %d",
7067 em->start, em->len, ret);
7068 }
7069 free_extent_map(em);
7070
7071 return ret;
7072}
7073
7074static void fill_device_from_item(struct extent_buffer *leaf,
7075 struct btrfs_dev_item *dev_item,
7076 struct btrfs_device *device)
7077{
7078 unsigned long ptr;
7079
7080 device->devid = btrfs_device_id(leaf, dev_item);
7081 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
7082 device->total_bytes = device->disk_total_bytes;
7083 device->commit_total_bytes = device->disk_total_bytes;
7084 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
7085 device->commit_bytes_used = device->bytes_used;
7086 device->type = btrfs_device_type(leaf, dev_item);
7087 device->io_align = btrfs_device_io_align(leaf, dev_item);
7088 device->io_width = btrfs_device_io_width(leaf, dev_item);
7089 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
7090 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
7091 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
7092
7093 ptr = btrfs_device_uuid(dev_item);
7094 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
7095}
7096
7097static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
7098 u8 *fsid)
7099{
7100 struct btrfs_fs_devices *fs_devices;
7101 int ret;
7102
7103 lockdep_assert_held(&uuid_mutex);
7104 ASSERT(fsid);
7105
7106
7107 list_for_each_entry(fs_devices, &fs_info->fs_devices->seed_list, seed_list)
7108 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
7109 return fs_devices;
7110
7111
7112 fs_devices = find_fsid(fsid, NULL);
7113 if (!fs_devices) {
7114 if (!btrfs_test_opt(fs_info, DEGRADED))
7115 return ERR_PTR(-ENOENT);
7116
7117 fs_devices = alloc_fs_devices(fsid, NULL);
7118 if (IS_ERR(fs_devices))
7119 return fs_devices;
7120
7121 fs_devices->seeding = true;
7122 fs_devices->opened = 1;
7123 return fs_devices;
7124 }
7125
7126
7127
7128
7129
7130 fs_devices = clone_fs_devices(fs_devices);
7131 if (IS_ERR(fs_devices))
7132 return fs_devices;
7133
7134 ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
7135 if (ret) {
7136 free_fs_devices(fs_devices);
7137 return ERR_PTR(ret);
7138 }
7139
7140 if (!fs_devices->seeding) {
7141 close_fs_devices(fs_devices);
7142 free_fs_devices(fs_devices);
7143 return ERR_PTR(-EINVAL);
7144 }
7145
7146 list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list);
7147
7148 return fs_devices;
7149}
7150
7151static int read_one_dev(struct extent_buffer *leaf,
7152 struct btrfs_dev_item *dev_item)
7153{
7154 struct btrfs_fs_info *fs_info = leaf->fs_info;
7155 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7156 struct btrfs_device *device;
7157 u64 devid;
7158 int ret;
7159 u8 fs_uuid[BTRFS_FSID_SIZE];
7160 u8 dev_uuid[BTRFS_UUID_SIZE];
7161
7162 devid = btrfs_device_id(leaf, dev_item);
7163 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
7164 BTRFS_UUID_SIZE);
7165 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
7166 BTRFS_FSID_SIZE);
7167
7168 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
7169 fs_devices = open_seed_devices(fs_info, fs_uuid);
7170 if (IS_ERR(fs_devices))
7171 return PTR_ERR(fs_devices);
7172 }
7173
7174 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
7175 fs_uuid);
7176 if (!device) {
7177 if (!btrfs_test_opt(fs_info, DEGRADED)) {
7178 btrfs_report_missing_device(fs_info, devid,
7179 dev_uuid, true);
7180 return -ENOENT;
7181 }
7182
7183 device = add_missing_dev(fs_devices, devid, dev_uuid);
7184 if (IS_ERR(device)) {
7185 btrfs_err(fs_info,
7186 "failed to add missing dev %llu: %ld",
7187 devid, PTR_ERR(device));
7188 return PTR_ERR(device);
7189 }
7190 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
7191 } else {
7192 if (!device->bdev) {
7193 if (!btrfs_test_opt(fs_info, DEGRADED)) {
7194 btrfs_report_missing_device(fs_info,
7195 devid, dev_uuid, true);
7196 return -ENOENT;
7197 }
7198 btrfs_report_missing_device(fs_info, devid,
7199 dev_uuid, false);
7200 }
7201
7202 if (!device->bdev &&
7203 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
7204
7205
7206
7207
7208
7209
7210 device->fs_devices->missing_devices++;
7211 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
7212 }
7213
7214
7215 if (device->fs_devices != fs_devices) {
7216 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
7217 &device->dev_state));
7218
7219 list_move(&device->dev_list, &fs_devices->devices);
7220 device->fs_devices->num_devices--;
7221 fs_devices->num_devices++;
7222
7223 device->fs_devices->missing_devices--;
7224 fs_devices->missing_devices++;
7225
7226 device->fs_devices = fs_devices;
7227 }
7228 }
7229
7230 if (device->fs_devices != fs_info->fs_devices) {
7231 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
7232 if (device->generation !=
7233 btrfs_device_generation(leaf, dev_item))
7234 return -EINVAL;
7235 }
7236
7237 fill_device_from_item(leaf, dev_item, device);
7238 if (device->bdev) {
7239 u64 max_total_bytes = i_size_read(device->bdev->bd_inode);
7240
7241 if (device->total_bytes > max_total_bytes) {
7242 btrfs_err(fs_info,
7243 "device total_bytes should be at most %llu but found %llu",
7244 max_total_bytes, device->total_bytes);
7245 return -EINVAL;
7246 }
7247 }
7248 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
7249 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
7250 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
7251 device->fs_devices->total_rw_bytes += device->total_bytes;
7252 atomic64_add(device->total_bytes - device->bytes_used,
7253 &fs_info->free_chunk_space);
7254 }
7255 ret = 0;
7256 return ret;
7257}
7258
7259int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
7260{
7261 struct btrfs_root *root = fs_info->tree_root;
7262 struct btrfs_super_block *super_copy = fs_info->super_copy;
7263 struct extent_buffer *sb;
7264 struct btrfs_disk_key *disk_key;
7265 struct btrfs_chunk *chunk;
7266 u8 *array_ptr;
7267 unsigned long sb_array_offset;
7268 int ret = 0;
7269 u32 num_stripes;
7270 u32 array_size;
7271 u32 len = 0;
7272 u32 cur_offset;
7273 u64 type;
7274 struct btrfs_key key;
7275
7276 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
7277
7278
7279
7280
7281
7282 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
7283 root->root_key.objectid, 0);
7284 if (IS_ERR(sb))
7285 return PTR_ERR(sb);
7286 set_extent_buffer_uptodate(sb);
7287
7288
7289
7290
7291
7292
7293
7294
7295
7296
7297
7298
7299 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
7300 SetPageUptodate(sb->pages[0]);
7301
7302 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
7303 array_size = btrfs_super_sys_array_size(super_copy);
7304
7305 array_ptr = super_copy->sys_chunk_array;
7306 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
7307 cur_offset = 0;
7308
7309 while (cur_offset < array_size) {
7310 disk_key = (struct btrfs_disk_key *)array_ptr;
7311 len = sizeof(*disk_key);
7312 if (cur_offset + len > array_size)
7313 goto out_short_read;
7314
7315 btrfs_disk_key_to_cpu(&key, disk_key);
7316
7317 array_ptr += len;
7318 sb_array_offset += len;
7319 cur_offset += len;
7320
7321 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
7322 btrfs_err(fs_info,
7323 "unexpected item type %u in sys_array at offset %u",
7324 (u32)key.type, cur_offset);
7325 ret = -EIO;
7326 break;
7327 }
7328
7329 chunk = (struct btrfs_chunk *)sb_array_offset;
7330
7331
7332
7333
7334 len = btrfs_chunk_item_size(1);
7335 if (cur_offset + len > array_size)
7336 goto out_short_read;
7337
7338 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
7339 if (!num_stripes) {
7340 btrfs_err(fs_info,
7341 "invalid number of stripes %u in sys_array at offset %u",
7342 num_stripes, cur_offset);
7343 ret = -EIO;
7344 break;
7345 }
7346
7347 type = btrfs_chunk_type(sb, chunk);
7348 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
7349 btrfs_err(fs_info,
7350 "invalid chunk type %llu in sys_array at offset %u",
7351 type, cur_offset);
7352 ret = -EIO;
7353 break;
7354 }
7355
7356 len = btrfs_chunk_item_size(num_stripes);
7357 if (cur_offset + len > array_size)
7358 goto out_short_read;
7359
7360 ret = read_one_chunk(&key, sb, chunk);
7361 if (ret)
7362 break;
7363
7364 array_ptr += len;
7365 sb_array_offset += len;
7366 cur_offset += len;
7367 }
7368 clear_extent_buffer_uptodate(sb);
7369 free_extent_buffer_stale(sb);
7370 return ret;
7371
7372out_short_read:
7373 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
7374 len, cur_offset);
7375 clear_extent_buffer_uptodate(sb);
7376 free_extent_buffer_stale(sb);
7377 return -EIO;
7378}
7379
7380
7381
7382
7383
7384
7385
7386
7387
7388bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
7389 struct btrfs_device *failing_dev)
7390{
7391 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
7392 struct extent_map *em;
7393 u64 next_start = 0;
7394 bool ret = true;
7395
7396 read_lock(&map_tree->lock);
7397 em = lookup_extent_mapping(map_tree, 0, (u64)-1);
7398 read_unlock(&map_tree->lock);
7399
7400 if (!em) {
7401 ret = false;
7402 goto out;
7403 }
7404 while (em) {
7405 struct map_lookup *map;
7406 int missing = 0;
7407 int max_tolerated;
7408 int i;
7409
7410 map = em->map_lookup;
7411 max_tolerated =
7412 btrfs_get_num_tolerated_disk_barrier_failures(
7413 map->type);
7414 for (i = 0; i < map->num_stripes; i++) {
7415 struct btrfs_device *dev = map->stripes[i].dev;
7416
7417 if (!dev || !dev->bdev ||
7418 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
7419 dev->last_flush_error)
7420 missing++;
7421 else if (failing_dev && failing_dev == dev)
7422 missing++;
7423 }
7424 if (missing > max_tolerated) {
7425 if (!failing_dev)
7426 btrfs_warn(fs_info,
7427 "chunk %llu missing %d devices, max tolerance is %d for writable mount",
7428 em->start, missing, max_tolerated);
7429 free_extent_map(em);
7430 ret = false;
7431 goto out;
7432 }
7433 next_start = extent_map_end(em);
7434 free_extent_map(em);
7435
7436 read_lock(&map_tree->lock);
7437 em = lookup_extent_mapping(map_tree, next_start,
7438 (u64)(-1) - next_start);
7439 read_unlock(&map_tree->lock);
7440 }
7441out:
7442 return ret;
7443}
7444
7445static void readahead_tree_node_children(struct extent_buffer *node)
7446{
7447 int i;
7448 const int nr_items = btrfs_header_nritems(node);
7449
7450 for (i = 0; i < nr_items; i++)
7451 btrfs_readahead_node_child(node, i);
7452}
7453
7454int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7455{
7456 struct btrfs_root *root = fs_info->chunk_root;
7457 struct btrfs_path *path;
7458 struct extent_buffer *leaf;
7459 struct btrfs_key key;
7460 struct btrfs_key found_key;
7461 int ret;
7462 int slot;
7463 u64 total_dev = 0;
7464 u64 last_ra_node = 0;
7465
7466 path = btrfs_alloc_path();
7467 if (!path)
7468 return -ENOMEM;
7469
7470
7471
7472
7473
7474 mutex_lock(&uuid_mutex);
7475
7476
7477
7478
7479
7480
7481
7482 fs_info->fs_devices->total_rw_bytes = 0;
7483
7484
7485
7486
7487
7488
7489
7490 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7491 key.offset = 0;
7492 key.type = 0;
7493 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7494 if (ret < 0)
7495 goto error;
7496 while (1) {
7497 struct extent_buffer *node;
7498
7499 leaf = path->nodes[0];
7500 slot = path->slots[0];
7501 if (slot >= btrfs_header_nritems(leaf)) {
7502 ret = btrfs_next_leaf(root, path);
7503 if (ret == 0)
7504 continue;
7505 if (ret < 0)
7506 goto error;
7507 break;
7508 }
7509
7510
7511
7512
7513 node = path->nodes[1];
7514 if (node) {
7515 if (last_ra_node != node->start) {
7516 readahead_tree_node_children(node);
7517 last_ra_node = node->start;
7518 }
7519 }
7520 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7521 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7522 struct btrfs_dev_item *dev_item;
7523 dev_item = btrfs_item_ptr(leaf, slot,
7524 struct btrfs_dev_item);
7525 ret = read_one_dev(leaf, dev_item);
7526 if (ret)
7527 goto error;
7528 total_dev++;
7529 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7530 struct btrfs_chunk *chunk;
7531
7532
7533
7534
7535
7536
7537
7538
7539
7540 ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
7541 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7542 ret = read_one_chunk(&found_key, leaf, chunk);
7543 if (ret)
7544 goto error;
7545 }
7546 path->slots[0]++;
7547 }
7548
7549
7550
7551
7552
7553 if (total_dev != fs_info->fs_devices->total_devices) {
7554 btrfs_err(fs_info,
7555 "super_num_devices %llu mismatch with num_devices %llu found here",
7556 btrfs_super_num_devices(fs_info->super_copy),
7557 total_dev);
7558 ret = -EINVAL;
7559 goto error;
7560 }
7561 if (btrfs_super_total_bytes(fs_info->super_copy) <
7562 fs_info->fs_devices->total_rw_bytes) {
7563 btrfs_err(fs_info,
7564 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7565 btrfs_super_total_bytes(fs_info->super_copy),
7566 fs_info->fs_devices->total_rw_bytes);
7567 ret = -EINVAL;
7568 goto error;
7569 }
7570 ret = 0;
7571error:
7572 mutex_unlock(&uuid_mutex);
7573
7574 btrfs_free_path(path);
7575 return ret;
7576}
7577
7578void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7579{
7580 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
7581 struct btrfs_device *device;
7582
7583 fs_devices->fs_info = fs_info;
7584
7585 mutex_lock(&fs_devices->device_list_mutex);
7586 list_for_each_entry(device, &fs_devices->devices, dev_list)
7587 device->fs_info = fs_info;
7588
7589 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
7590 list_for_each_entry(device, &seed_devs->devices, dev_list)
7591 device->fs_info = fs_info;
7592
7593 seed_devs->fs_info = fs_info;
7594 }
7595 mutex_unlock(&fs_devices->device_list_mutex);
7596}
7597
7598static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
7599 const struct btrfs_dev_stats_item *ptr,
7600 int index)
7601{
7602 u64 val;
7603
7604 read_extent_buffer(eb, &val,
7605 offsetof(struct btrfs_dev_stats_item, values) +
7606 ((unsigned long)ptr) + (index * sizeof(u64)),
7607 sizeof(val));
7608 return val;
7609}
7610
7611static void btrfs_set_dev_stats_value(struct extent_buffer *eb,
7612 struct btrfs_dev_stats_item *ptr,
7613 int index, u64 val)
7614{
7615 write_extent_buffer(eb, &val,
7616 offsetof(struct btrfs_dev_stats_item, values) +
7617 ((unsigned long)ptr) + (index * sizeof(u64)),
7618 sizeof(val));
7619}
7620
7621static int btrfs_device_init_dev_stats(struct btrfs_device *device,
7622 struct btrfs_path *path)
7623{
7624 struct btrfs_dev_stats_item *ptr;
7625 struct extent_buffer *eb;
7626 struct btrfs_key key;
7627 int item_size;
7628 int i, ret, slot;
7629
7630 if (!device->fs_info->dev_root)
7631 return 0;
7632
7633 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7634 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7635 key.offset = device->devid;
7636 ret = btrfs_search_slot(NULL, device->fs_info->dev_root, &key, path, 0, 0);
7637 if (ret) {
7638 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7639 btrfs_dev_stat_set(device, i, 0);
7640 device->dev_stats_valid = 1;
7641 btrfs_release_path(path);
7642 return ret < 0 ? ret : 0;
7643 }
7644 slot = path->slots[0];
7645 eb = path->nodes[0];
7646 item_size = btrfs_item_size_nr(eb, slot);
7647
7648 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_stats_item);
7649
7650 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7651 if (item_size >= (1 + i) * sizeof(__le64))
7652 btrfs_dev_stat_set(device, i,
7653 btrfs_dev_stats_value(eb, ptr, i));
7654 else
7655 btrfs_dev_stat_set(device, i, 0);
7656 }
7657
7658 device->dev_stats_valid = 1;
7659 btrfs_dev_stat_print_on_load(device);
7660 btrfs_release_path(path);
7661
7662 return 0;
7663}
7664
7665int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7666{
7667 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
7668 struct btrfs_device *device;
7669 struct btrfs_path *path = NULL;
7670 int ret = 0;
7671
7672 path = btrfs_alloc_path();
7673 if (!path)
7674 return -ENOMEM;
7675
7676 mutex_lock(&fs_devices->device_list_mutex);
7677 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7678 ret = btrfs_device_init_dev_stats(device, path);
7679 if (ret)
7680 goto out;
7681 }
7682 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
7683 list_for_each_entry(device, &seed_devs->devices, dev_list) {
7684 ret = btrfs_device_init_dev_stats(device, path);
7685 if (ret)
7686 goto out;
7687 }
7688 }
7689out:
7690 mutex_unlock(&fs_devices->device_list_mutex);
7691
7692 btrfs_free_path(path);
7693 return ret;
7694}
7695
7696static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7697 struct btrfs_device *device)
7698{
7699 struct btrfs_fs_info *fs_info = trans->fs_info;
7700 struct btrfs_root *dev_root = fs_info->dev_root;
7701 struct btrfs_path *path;
7702 struct btrfs_key key;
7703 struct extent_buffer *eb;
7704 struct btrfs_dev_stats_item *ptr;
7705 int ret;
7706 int i;
7707
7708 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7709 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7710 key.offset = device->devid;
7711
7712 path = btrfs_alloc_path();
7713 if (!path)
7714 return -ENOMEM;
7715 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7716 if (ret < 0) {
7717 btrfs_warn_in_rcu(fs_info,
7718 "error %d while searching for dev_stats item for device %s",
7719 ret, rcu_str_deref(device->name));
7720 goto out;
7721 }
7722
7723 if (ret == 0 &&
7724 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7725
7726 ret = btrfs_del_item(trans, dev_root, path);
7727 if (ret != 0) {
7728 btrfs_warn_in_rcu(fs_info,
7729 "delete too small dev_stats item for device %s failed %d",
7730 rcu_str_deref(device->name), ret);
7731 goto out;
7732 }
7733 ret = 1;
7734 }
7735
7736 if (ret == 1) {
7737
7738 btrfs_release_path(path);
7739 ret = btrfs_insert_empty_item(trans, dev_root, path,
7740 &key, sizeof(*ptr));
7741 if (ret < 0) {
7742 btrfs_warn_in_rcu(fs_info,
7743 "insert dev_stats item for device %s failed %d",
7744 rcu_str_deref(device->name), ret);
7745 goto out;
7746 }
7747 }
7748
7749 eb = path->nodes[0];
7750 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7751 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7752 btrfs_set_dev_stats_value(eb, ptr, i,
7753 btrfs_dev_stat_read(device, i));
7754 btrfs_mark_buffer_dirty(eb);
7755
7756out:
7757 btrfs_free_path(path);
7758 return ret;
7759}
7760
7761
7762
7763
7764int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7765{
7766 struct btrfs_fs_info *fs_info = trans->fs_info;
7767 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7768 struct btrfs_device *device;
7769 int stats_cnt;
7770 int ret = 0;
7771
7772 mutex_lock(&fs_devices->device_list_mutex);
7773 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7774 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7775 if (!device->dev_stats_valid || stats_cnt == 0)
7776 continue;
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787
7788
7789
7790 smp_rmb();
7791
7792 ret = update_dev_stat_item(trans, device);
7793 if (!ret)
7794 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7795 }
7796 mutex_unlock(&fs_devices->device_list_mutex);
7797
7798 return ret;
7799}
7800
7801void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7802{
7803 btrfs_dev_stat_inc(dev, index);
7804 btrfs_dev_stat_print_on_error(dev);
7805}
7806
7807static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7808{
7809 if (!dev->dev_stats_valid)
7810 return;
7811 btrfs_err_rl_in_rcu(dev->fs_info,
7812 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7813 rcu_str_deref(dev->name),
7814 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7815 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7816 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7817 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7818 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7819}
7820
7821static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7822{
7823 int i;
7824
7825 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7826 if (btrfs_dev_stat_read(dev, i) != 0)
7827 break;
7828 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7829 return;
7830
7831 btrfs_info_in_rcu(dev->fs_info,
7832 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7833 rcu_str_deref(dev->name),
7834 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7835 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7836 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7837 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7838 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7839}
7840
7841int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7842 struct btrfs_ioctl_get_dev_stats *stats)
7843{
7844 struct btrfs_device *dev;
7845 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7846 int i;
7847
7848 mutex_lock(&fs_devices->device_list_mutex);
7849 dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
7850 mutex_unlock(&fs_devices->device_list_mutex);
7851
7852 if (!dev) {
7853 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7854 return -ENODEV;
7855 } else if (!dev->dev_stats_valid) {
7856 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7857 return -ENODEV;
7858 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7859 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7860 if (stats->nr_items > i)
7861 stats->values[i] =
7862 btrfs_dev_stat_read_and_reset(dev, i);
7863 else
7864 btrfs_dev_stat_set(dev, i, 0);
7865 }
7866 btrfs_info(fs_info, "device stats zeroed by %s (%d)",
7867 current->comm, task_pid_nr(current));
7868 } else {
7869 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7870 if (stats->nr_items > i)
7871 stats->values[i] = btrfs_dev_stat_read(dev, i);
7872 }
7873 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7874 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7875 return 0;
7876}
7877
7878
7879
7880
7881
7882
7883
7884
7885void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7886{
7887 struct btrfs_device *curr, *next;
7888
7889 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7890
7891 if (list_empty(&trans->dev_update_list))
7892 return;
7893
7894
7895
7896
7897
7898
7899 mutex_lock(&trans->fs_info->chunk_mutex);
7900 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7901 post_commit_list) {
7902 list_del_init(&curr->post_commit_list);
7903 curr->commit_total_bytes = curr->disk_total_bytes;
7904 curr->commit_bytes_used = curr->bytes_used;
7905 }
7906 mutex_unlock(&trans->fs_info->chunk_mutex);
7907}
7908
7909
7910
7911
7912int btrfs_bg_type_to_factor(u64 flags)
7913{
7914 const int index = btrfs_bg_flags_to_raid_index(flags);
7915
7916 return btrfs_raid_array[index].ncopies;
7917}
7918
7919
7920
7921static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7922 u64 chunk_offset, u64 devid,
7923 u64 physical_offset, u64 physical_len)
7924{
7925 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7926 struct extent_map *em;
7927 struct map_lookup *map;
7928 struct btrfs_device *dev;
7929 u64 stripe_len;
7930 bool found = false;
7931 int ret = 0;
7932 int i;
7933
7934 read_lock(&em_tree->lock);
7935 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
7936 read_unlock(&em_tree->lock);
7937
7938 if (!em) {
7939 btrfs_err(fs_info,
7940"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
7941 physical_offset, devid);
7942 ret = -EUCLEAN;
7943 goto out;
7944 }
7945
7946 map = em->map_lookup;
7947 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
7948 if (physical_len != stripe_len) {
7949 btrfs_err(fs_info,
7950"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
7951 physical_offset, devid, em->start, physical_len,
7952 stripe_len);
7953 ret = -EUCLEAN;
7954 goto out;
7955 }
7956
7957 for (i = 0; i < map->num_stripes; i++) {
7958 if (map->stripes[i].dev->devid == devid &&
7959 map->stripes[i].physical == physical_offset) {
7960 found = true;
7961 if (map->verified_stripes >= map->num_stripes) {
7962 btrfs_err(fs_info,
7963 "too many dev extents for chunk %llu found",
7964 em->start);
7965 ret = -EUCLEAN;
7966 goto out;
7967 }
7968 map->verified_stripes++;
7969 break;
7970 }
7971 }
7972 if (!found) {
7973 btrfs_err(fs_info,
7974 "dev extent physical offset %llu devid %llu has no corresponding chunk",
7975 physical_offset, devid);
7976 ret = -EUCLEAN;
7977 }
7978
7979
7980 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
7981 if (!dev) {
7982 btrfs_err(fs_info, "failed to find devid %llu", devid);
7983 ret = -EUCLEAN;
7984 goto out;
7985 }
7986
7987 if (physical_offset + physical_len > dev->disk_total_bytes) {
7988 btrfs_err(fs_info,
7989"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
7990 devid, physical_offset, physical_len,
7991 dev->disk_total_bytes);
7992 ret = -EUCLEAN;
7993 goto out;
7994 }
7995
7996 if (dev->zone_info) {
7997 u64 zone_size = dev->zone_info->zone_size;
7998
7999 if (!IS_ALIGNED(physical_offset, zone_size) ||
8000 !IS_ALIGNED(physical_len, zone_size)) {
8001 btrfs_err(fs_info,
8002"zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone",
8003 devid, physical_offset, physical_len);
8004 ret = -EUCLEAN;
8005 goto out;
8006 }
8007 }
8008
8009out:
8010 free_extent_map(em);
8011 return ret;
8012}
8013
8014static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
8015{
8016 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
8017 struct extent_map *em;
8018 struct rb_node *node;
8019 int ret = 0;
8020
8021 read_lock(&em_tree->lock);
8022 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
8023 em = rb_entry(node, struct extent_map, rb_node);
8024 if (em->map_lookup->num_stripes !=
8025 em->map_lookup->verified_stripes) {
8026 btrfs_err(fs_info,
8027 "chunk %llu has missing dev extent, have %d expect %d",
8028 em->start, em->map_lookup->verified_stripes,
8029 em->map_lookup->num_stripes);
8030 ret = -EUCLEAN;
8031 goto out;
8032 }
8033 }
8034out:
8035 read_unlock(&em_tree->lock);
8036 return ret;
8037}
8038
8039
8040
8041
8042
8043
8044
8045
8046int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
8047{
8048 struct btrfs_path *path;
8049 struct btrfs_root *root = fs_info->dev_root;
8050 struct btrfs_key key;
8051 u64 prev_devid = 0;
8052 u64 prev_dev_ext_end = 0;
8053 int ret = 0;
8054
8055
8056
8057
8058
8059
8060
8061
8062
8063
8064
8065 if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
8066 return 0;
8067
8068 key.objectid = 1;
8069 key.type = BTRFS_DEV_EXTENT_KEY;
8070 key.offset = 0;
8071
8072 path = btrfs_alloc_path();
8073 if (!path)
8074 return -ENOMEM;
8075
8076 path->reada = READA_FORWARD;
8077 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8078 if (ret < 0)
8079 goto out;
8080
8081 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8082 ret = btrfs_next_leaf(root, path);
8083 if (ret < 0)
8084 goto out;
8085
8086 if (ret > 0) {
8087 ret = -EUCLEAN;
8088 goto out;
8089 }
8090 }
8091 while (1) {
8092 struct extent_buffer *leaf = path->nodes[0];
8093 struct btrfs_dev_extent *dext;
8094 int slot = path->slots[0];
8095 u64 chunk_offset;
8096 u64 physical_offset;
8097 u64 physical_len;
8098 u64 devid;
8099
8100 btrfs_item_key_to_cpu(leaf, &key, slot);
8101 if (key.type != BTRFS_DEV_EXTENT_KEY)
8102 break;
8103 devid = key.objectid;
8104 physical_offset = key.offset;
8105
8106 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8107 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
8108 physical_len = btrfs_dev_extent_length(leaf, dext);
8109
8110
8111 if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
8112 btrfs_err(fs_info,
8113"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
8114 devid, physical_offset, prev_dev_ext_end);
8115 ret = -EUCLEAN;
8116 goto out;
8117 }
8118
8119 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
8120 physical_offset, physical_len);
8121 if (ret < 0)
8122 goto out;
8123 prev_devid = devid;
8124 prev_dev_ext_end = physical_offset + physical_len;
8125
8126 ret = btrfs_next_item(root, path);
8127 if (ret < 0)
8128 goto out;
8129 if (ret > 0) {
8130 ret = 0;
8131 break;
8132 }
8133 }
8134
8135
8136 ret = verify_chunk_dev_extent_mapping(fs_info);
8137out:
8138 btrfs_free_path(path);
8139 return ret;
8140}
8141
8142
8143
8144
8145
8146bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
8147{
8148 struct btrfs_swapfile_pin *sp;
8149 struct rb_node *node;
8150
8151 spin_lock(&fs_info->swapfile_pins_lock);
8152 node = fs_info->swapfile_pins.rb_node;
8153 while (node) {
8154 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
8155 if (ptr < sp->ptr)
8156 node = node->rb_left;
8157 else if (ptr > sp->ptr)
8158 node = node->rb_right;
8159 else
8160 break;
8161 }
8162 spin_unlock(&fs_info->swapfile_pins_lock);
8163 return node != NULL;
8164}
8165
8166static int relocating_repair_kthread(void *data)
8167{
8168 struct btrfs_block_group *cache = (struct btrfs_block_group *)data;
8169 struct btrfs_fs_info *fs_info = cache->fs_info;
8170 u64 target;
8171 int ret = 0;
8172
8173 target = cache->start;
8174 btrfs_put_block_group(cache);
8175
8176 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
8177 btrfs_info(fs_info,
8178 "zoned: skip relocating block group %llu to repair: EBUSY",
8179 target);
8180 return -EBUSY;
8181 }
8182
8183 mutex_lock(&fs_info->reclaim_bgs_lock);
8184
8185
8186 cache = btrfs_lookup_block_group(fs_info, target);
8187 if (!cache)
8188 goto out;
8189
8190 if (!cache->relocating_repair)
8191 goto out;
8192
8193 ret = btrfs_may_alloc_data_chunk(fs_info, target);
8194 if (ret < 0)
8195 goto out;
8196
8197 btrfs_info(fs_info,
8198 "zoned: relocating block group %llu to repair IO failure",
8199 target);
8200 ret = btrfs_relocate_chunk(fs_info, target);
8201
8202out:
8203 if (cache)
8204 btrfs_put_block_group(cache);
8205 mutex_unlock(&fs_info->reclaim_bgs_lock);
8206 btrfs_exclop_finish(fs_info);
8207
8208 return ret;
8209}
8210
8211int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
8212{
8213 struct btrfs_block_group *cache;
8214
8215
8216 if (btrfs_test_opt(fs_info, DEGRADED))
8217 return 0;
8218
8219 cache = btrfs_lookup_block_group(fs_info, logical);
8220 if (!cache)
8221 return 0;
8222
8223 spin_lock(&cache->lock);
8224 if (cache->relocating_repair) {
8225 spin_unlock(&cache->lock);
8226 btrfs_put_block_group(cache);
8227 return 0;
8228 }
8229 cache->relocating_repair = 1;
8230 spin_unlock(&cache->lock);
8231
8232 kthread_run(relocating_repair_kthread, cache,
8233 "btrfs-relocating-repair");
8234
8235 return 0;
8236}
8237