1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/mm.h>
8#include <linux/bio.h>
9#include <linux/slab.h>
10#include <linux/blkdev.h>
11#include <linux/ratelimit.h>
12#include <linux/kthread.h>
13#include <linux/raid/pq.h>
14#include <linux/semaphore.h>
15#include <linux/uuid.h>
16#include <linux/list_sort.h>
17#include "misc.h"
18#include "ctree.h"
19#include "extent_map.h"
20#include "disk-io.h"
21#include "transaction.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "async-thread.h"
26#include "check-integrity.h"
27#include "rcu-string.h"
28#include "dev-replace.h"
29#include "sysfs.h"
30#include "tree-checker.h"
31#include "space-info.h"
32#include "block-group.h"
33#include "discard.h"
34#include "zoned.h"
35
36const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
37 [BTRFS_RAID_RAID10] = {
38 .sub_stripes = 2,
39 .dev_stripes = 1,
40 .devs_max = 0,
41 .devs_min = 4,
42 .tolerated_failures = 1,
43 .devs_increment = 2,
44 .ncopies = 2,
45 .nparity = 0,
46 .raid_name = "raid10",
47 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
48 .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
49 },
50 [BTRFS_RAID_RAID1] = {
51 .sub_stripes = 1,
52 .dev_stripes = 1,
53 .devs_max = 2,
54 .devs_min = 2,
55 .tolerated_failures = 1,
56 .devs_increment = 2,
57 .ncopies = 2,
58 .nparity = 0,
59 .raid_name = "raid1",
60 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
61 .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
62 },
63 [BTRFS_RAID_RAID1C3] = {
64 .sub_stripes = 1,
65 .dev_stripes = 1,
66 .devs_max = 3,
67 .devs_min = 3,
68 .tolerated_failures = 2,
69 .devs_increment = 3,
70 .ncopies = 3,
71 .nparity = 0,
72 .raid_name = "raid1c3",
73 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
74 .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
75 },
76 [BTRFS_RAID_RAID1C4] = {
77 .sub_stripes = 1,
78 .dev_stripes = 1,
79 .devs_max = 4,
80 .devs_min = 4,
81 .tolerated_failures = 3,
82 .devs_increment = 4,
83 .ncopies = 4,
84 .nparity = 0,
85 .raid_name = "raid1c4",
86 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
87 .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
88 },
89 [BTRFS_RAID_DUP] = {
90 .sub_stripes = 1,
91 .dev_stripes = 2,
92 .devs_max = 1,
93 .devs_min = 1,
94 .tolerated_failures = 0,
95 .devs_increment = 1,
96 .ncopies = 2,
97 .nparity = 0,
98 .raid_name = "dup",
99 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
100 .mindev_error = 0,
101 },
102 [BTRFS_RAID_RAID0] = {
103 .sub_stripes = 1,
104 .dev_stripes = 1,
105 .devs_max = 0,
106 .devs_min = 2,
107 .tolerated_failures = 0,
108 .devs_increment = 1,
109 .ncopies = 1,
110 .nparity = 0,
111 .raid_name = "raid0",
112 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
113 .mindev_error = 0,
114 },
115 [BTRFS_RAID_SINGLE] = {
116 .sub_stripes = 1,
117 .dev_stripes = 1,
118 .devs_max = 1,
119 .devs_min = 1,
120 .tolerated_failures = 0,
121 .devs_increment = 1,
122 .ncopies = 1,
123 .nparity = 0,
124 .raid_name = "single",
125 .bg_flag = 0,
126 .mindev_error = 0,
127 },
128 [BTRFS_RAID_RAID5] = {
129 .sub_stripes = 1,
130 .dev_stripes = 1,
131 .devs_max = 0,
132 .devs_min = 2,
133 .tolerated_failures = 1,
134 .devs_increment = 1,
135 .ncopies = 1,
136 .nparity = 1,
137 .raid_name = "raid5",
138 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
139 .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
140 },
141 [BTRFS_RAID_RAID6] = {
142 .sub_stripes = 1,
143 .dev_stripes = 1,
144 .devs_max = 0,
145 .devs_min = 3,
146 .tolerated_failures = 2,
147 .devs_increment = 1,
148 .ncopies = 1,
149 .nparity = 2,
150 .raid_name = "raid6",
151 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
152 .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
153 },
154};
155
156const char *btrfs_bg_type_to_raid_name(u64 flags)
157{
158 const int index = btrfs_bg_flags_to_raid_index(flags);
159
160 if (index >= BTRFS_NR_RAID_TYPES)
161 return NULL;
162
163 return btrfs_raid_array[index].raid_name;
164}
165
166
167
168
169
170void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
171{
172 int i;
173 int ret;
174 char *bp = buf;
175 u64 flags = bg_flags;
176 u32 size_bp = size_buf;
177
178 if (!flags) {
179 strcpy(bp, "NONE");
180 return;
181 }
182
183#define DESCRIBE_FLAG(flag, desc) \
184 do { \
185 if (flags & (flag)) { \
186 ret = snprintf(bp, size_bp, "%s|", (desc)); \
187 if (ret < 0 || ret >= size_bp) \
188 goto out_overflow; \
189 size_bp -= ret; \
190 bp += ret; \
191 flags &= ~(flag); \
192 } \
193 } while (0)
194
195 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
196 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
197 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
198
199 DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
200 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
201 DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
202 btrfs_raid_array[i].raid_name);
203#undef DESCRIBE_FLAG
204
205 if (flags) {
206 ret = snprintf(bp, size_bp, "0x%llx|", flags);
207 size_bp -= ret;
208 }
209
210 if (size_bp < size_buf)
211 buf[size_buf - size_bp - 1] = '\0';
212
213
214
215
216
217out_overflow:;
218}
219
220static int init_first_rw_device(struct btrfs_trans_handle *trans);
221static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
222static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
223static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
224static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
225 enum btrfs_map_op op,
226 u64 logical, u64 *length,
227 struct btrfs_bio **bbio_ret,
228 int mirror_num, int need_raid_map);
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330DEFINE_MUTEX(uuid_mutex);
331static LIST_HEAD(fs_uuids);
332struct list_head * __attribute_const__ btrfs_get_fs_uuids(void)
333{
334 return &fs_uuids;
335}
336
337
338
339
340
341
342
343
344
345
346static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
347 const u8 *metadata_fsid)
348{
349 struct btrfs_fs_devices *fs_devs;
350
351 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
352 if (!fs_devs)
353 return ERR_PTR(-ENOMEM);
354
355 mutex_init(&fs_devs->device_list_mutex);
356
357 INIT_LIST_HEAD(&fs_devs->devices);
358 INIT_LIST_HEAD(&fs_devs->alloc_list);
359 INIT_LIST_HEAD(&fs_devs->fs_list);
360 INIT_LIST_HEAD(&fs_devs->seed_list);
361 if (fsid)
362 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
363
364 if (metadata_fsid)
365 memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
366 else if (fsid)
367 memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
368
369 return fs_devs;
370}
371
372void btrfs_free_device(struct btrfs_device *device)
373{
374 WARN_ON(!list_empty(&device->post_commit_list));
375 rcu_string_free(device->name);
376 extent_io_tree_release(&device->alloc_state);
377 bio_put(device->flush_bio);
378 btrfs_destroy_dev_zone_info(device);
379 kfree(device);
380}
381
382static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
383{
384 struct btrfs_device *device;
385 WARN_ON(fs_devices->opened);
386 while (!list_empty(&fs_devices->devices)) {
387 device = list_entry(fs_devices->devices.next,
388 struct btrfs_device, dev_list);
389 list_del(&device->dev_list);
390 btrfs_free_device(device);
391 }
392 kfree(fs_devices);
393}
394
395void __exit btrfs_cleanup_fs_uuids(void)
396{
397 struct btrfs_fs_devices *fs_devices;
398
399 while (!list_empty(&fs_uuids)) {
400 fs_devices = list_entry(fs_uuids.next,
401 struct btrfs_fs_devices, fs_list);
402 list_del(&fs_devices->fs_list);
403 free_fs_devices(fs_devices);
404 }
405}
406
407
408
409
410
411
412static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
413{
414 struct btrfs_device *dev;
415
416 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
417 if (!dev)
418 return ERR_PTR(-ENOMEM);
419
420
421
422
423
424 dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
425 if (!dev->flush_bio) {
426 kfree(dev);
427 return ERR_PTR(-ENOMEM);
428 }
429
430 INIT_LIST_HEAD(&dev->dev_list);
431 INIT_LIST_HEAD(&dev->dev_alloc_list);
432 INIT_LIST_HEAD(&dev->post_commit_list);
433
434 atomic_set(&dev->reada_in_flight, 0);
435 atomic_set(&dev->dev_stats_ccnt, 0);
436 btrfs_device_data_ordered_init(dev);
437 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
438 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
439 extent_io_tree_init(fs_info, &dev->alloc_state,
440 IO_TREE_DEVICE_ALLOC_STATE, NULL);
441
442 return dev;
443}
444
445static noinline struct btrfs_fs_devices *find_fsid(
446 const u8 *fsid, const u8 *metadata_fsid)
447{
448 struct btrfs_fs_devices *fs_devices;
449
450 ASSERT(fsid);
451
452
453 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
454 if (metadata_fsid) {
455 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
456 && memcmp(metadata_fsid, fs_devices->metadata_uuid,
457 BTRFS_FSID_SIZE) == 0)
458 return fs_devices;
459 } else {
460 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
461 return fs_devices;
462 }
463 }
464 return NULL;
465}
466
467static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
468 struct btrfs_super_block *disk_super)
469{
470
471 struct btrfs_fs_devices *fs_devices;
472
473
474
475
476
477
478
479 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
480 if (fs_devices->fsid_change &&
481 memcmp(disk_super->metadata_uuid, fs_devices->fsid,
482 BTRFS_FSID_SIZE) == 0 &&
483 memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
484 BTRFS_FSID_SIZE) == 0) {
485 return fs_devices;
486 }
487 }
488
489
490
491
492
493
494 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
495 if (fs_devices->fsid_change &&
496 memcmp(fs_devices->metadata_uuid,
497 fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
498 memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
499 BTRFS_FSID_SIZE) == 0) {
500 return fs_devices;
501 }
502 }
503
504 return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
505}
506
507
508static int
509btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
510 int flush, struct block_device **bdev,
511 struct btrfs_super_block **disk_super)
512{
513 int ret;
514
515 *bdev = blkdev_get_by_path(device_path, flags, holder);
516
517 if (IS_ERR(*bdev)) {
518 ret = PTR_ERR(*bdev);
519 goto error;
520 }
521
522 if (flush)
523 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
524 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
525 if (ret) {
526 blkdev_put(*bdev, flags);
527 goto error;
528 }
529 invalidate_bdev(*bdev);
530 *disk_super = btrfs_read_dev_super(*bdev);
531 if (IS_ERR(*disk_super)) {
532 ret = PTR_ERR(*disk_super);
533 blkdev_put(*bdev, flags);
534 goto error;
535 }
536
537 return 0;
538
539error:
540 *bdev = NULL;
541 return ret;
542}
543
544static bool device_path_matched(const char *path, struct btrfs_device *device)
545{
546 int found;
547
548 rcu_read_lock();
549 found = strcmp(rcu_str_deref(device->name), path);
550 rcu_read_unlock();
551
552 return found == 0;
553}
554
555
556
557
558
559
560
561
562
563
564
565
566static int btrfs_free_stale_devices(const char *path,
567 struct btrfs_device *skip_device)
568{
569 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
570 struct btrfs_device *device, *tmp_device;
571 int ret = 0;
572
573 if (path)
574 ret = -ENOENT;
575
576 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
577
578 mutex_lock(&fs_devices->device_list_mutex);
579 list_for_each_entry_safe(device, tmp_device,
580 &fs_devices->devices, dev_list) {
581 if (skip_device && skip_device == device)
582 continue;
583 if (path && !device->name)
584 continue;
585 if (path && !device_path_matched(path, device))
586 continue;
587 if (fs_devices->opened) {
588
589 if (path && ret != 0)
590 ret = -EBUSY;
591 break;
592 }
593
594
595 fs_devices->num_devices--;
596 list_del(&device->dev_list);
597 btrfs_free_device(device);
598
599 ret = 0;
600 }
601 mutex_unlock(&fs_devices->device_list_mutex);
602
603 if (fs_devices->num_devices == 0) {
604 btrfs_sysfs_remove_fsid(fs_devices);
605 list_del(&fs_devices->fs_list);
606 free_fs_devices(fs_devices);
607 }
608 }
609
610 return ret;
611}
612
613
614
615
616
617
618static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
619 struct btrfs_device *device, fmode_t flags,
620 void *holder)
621{
622 struct request_queue *q;
623 struct block_device *bdev;
624 struct btrfs_super_block *disk_super;
625 u64 devid;
626 int ret;
627
628 if (device->bdev)
629 return -EINVAL;
630 if (!device->name)
631 return -EINVAL;
632
633 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
634 &bdev, &disk_super);
635 if (ret)
636 return ret;
637
638 devid = btrfs_stack_device_id(&disk_super->dev_item);
639 if (devid != device->devid)
640 goto error_free_page;
641
642 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
643 goto error_free_page;
644
645 device->generation = btrfs_super_generation(disk_super);
646
647 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
648 if (btrfs_super_incompat_flags(disk_super) &
649 BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
650 pr_err(
651 "BTRFS: Invalid seeding and uuid-changed device detected\n");
652 goto error_free_page;
653 }
654
655 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
656 fs_devices->seeding = true;
657 } else {
658 if (bdev_read_only(bdev))
659 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
660 else
661 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
662 }
663
664 q = bdev_get_queue(bdev);
665 if (!blk_queue_nonrot(q))
666 fs_devices->rotating = true;
667
668 device->bdev = bdev;
669 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
670 device->mode = flags;
671
672 fs_devices->open_devices++;
673 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
674 device->devid != BTRFS_DEV_REPLACE_DEVID) {
675 fs_devices->rw_devices++;
676 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
677 }
678 btrfs_release_disk_super(disk_super);
679
680 return 0;
681
682error_free_page:
683 btrfs_release_disk_super(disk_super);
684 blkdev_put(bdev, flags);
685
686 return -EINVAL;
687}
688
689
690
691
692
693
694
695static struct btrfs_fs_devices *find_fsid_inprogress(
696 struct btrfs_super_block *disk_super)
697{
698 struct btrfs_fs_devices *fs_devices;
699
700 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
701 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
702 BTRFS_FSID_SIZE) != 0 &&
703 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
704 BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
705 return fs_devices;
706 }
707 }
708
709 return find_fsid(disk_super->fsid, NULL);
710}
711
712
713static struct btrfs_fs_devices *find_fsid_changed(
714 struct btrfs_super_block *disk_super)
715{
716 struct btrfs_fs_devices *fs_devices;
717
718
719
720
721
722
723
724
725
726
727 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
728
729 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
730 BTRFS_FSID_SIZE) != 0 &&
731 memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
732 BTRFS_FSID_SIZE) == 0 &&
733 memcmp(fs_devices->fsid, disk_super->fsid,
734 BTRFS_FSID_SIZE) != 0)
735 return fs_devices;
736
737
738 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
739 BTRFS_FSID_SIZE) == 0 &&
740 memcmp(fs_devices->fsid, disk_super->metadata_uuid,
741 BTRFS_FSID_SIZE) == 0)
742 return fs_devices;
743 }
744
745 return NULL;
746}
747
748static struct btrfs_fs_devices *find_fsid_reverted_metadata(
749 struct btrfs_super_block *disk_super)
750{
751 struct btrfs_fs_devices *fs_devices;
752
753
754
755
756
757
758
759
760
761
762 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
763 if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
764 BTRFS_FSID_SIZE) != 0 &&
765 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
766 BTRFS_FSID_SIZE) == 0 &&
767 fs_devices->fsid_change)
768 return fs_devices;
769 }
770
771 return NULL;
772}
773
774
775
776
777
778
779
780static noinline struct btrfs_device *device_list_add(const char *path,
781 struct btrfs_super_block *disk_super,
782 bool *new_device_added)
783{
784 struct btrfs_device *device;
785 struct btrfs_fs_devices *fs_devices = NULL;
786 struct rcu_string *name;
787 u64 found_transid = btrfs_super_generation(disk_super);
788 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
789 bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
790 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
791 bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
792 BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
793
794 if (fsid_change_in_progress) {
795 if (!has_metadata_uuid)
796 fs_devices = find_fsid_inprogress(disk_super);
797 else
798 fs_devices = find_fsid_changed(disk_super);
799 } else if (has_metadata_uuid) {
800 fs_devices = find_fsid_with_metadata_uuid(disk_super);
801 } else {
802 fs_devices = find_fsid_reverted_metadata(disk_super);
803 if (!fs_devices)
804 fs_devices = find_fsid(disk_super->fsid, NULL);
805 }
806
807
808 if (!fs_devices) {
809 if (has_metadata_uuid)
810 fs_devices = alloc_fs_devices(disk_super->fsid,
811 disk_super->metadata_uuid);
812 else
813 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
814
815 if (IS_ERR(fs_devices))
816 return ERR_CAST(fs_devices);
817
818 fs_devices->fsid_change = fsid_change_in_progress;
819
820 mutex_lock(&fs_devices->device_list_mutex);
821 list_add(&fs_devices->fs_list, &fs_uuids);
822
823 device = NULL;
824 } else {
825 mutex_lock(&fs_devices->device_list_mutex);
826 device = btrfs_find_device(fs_devices, devid,
827 disk_super->dev_item.uuid, NULL);
828
829
830
831
832
833
834 if (fs_devices->fsid_change &&
835 found_transid > fs_devices->latest_generation) {
836 memcpy(fs_devices->fsid, disk_super->fsid,
837 BTRFS_FSID_SIZE);
838
839 if (has_metadata_uuid)
840 memcpy(fs_devices->metadata_uuid,
841 disk_super->metadata_uuid,
842 BTRFS_FSID_SIZE);
843 else
844 memcpy(fs_devices->metadata_uuid,
845 disk_super->fsid, BTRFS_FSID_SIZE);
846
847 fs_devices->fsid_change = false;
848 }
849 }
850
851 if (!device) {
852 if (fs_devices->opened) {
853 mutex_unlock(&fs_devices->device_list_mutex);
854 return ERR_PTR(-EBUSY);
855 }
856
857 device = btrfs_alloc_device(NULL, &devid,
858 disk_super->dev_item.uuid);
859 if (IS_ERR(device)) {
860 mutex_unlock(&fs_devices->device_list_mutex);
861
862 return device;
863 }
864
865 name = rcu_string_strdup(path, GFP_NOFS);
866 if (!name) {
867 btrfs_free_device(device);
868 mutex_unlock(&fs_devices->device_list_mutex);
869 return ERR_PTR(-ENOMEM);
870 }
871 rcu_assign_pointer(device->name, name);
872
873 list_add_rcu(&device->dev_list, &fs_devices->devices);
874 fs_devices->num_devices++;
875
876 device->fs_devices = fs_devices;
877 *new_device_added = true;
878
879 if (disk_super->label[0])
880 pr_info(
881 "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
882 disk_super->label, devid, found_transid, path,
883 current->comm, task_pid_nr(current));
884 else
885 pr_info(
886 "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
887 disk_super->fsid, devid, found_transid, path,
888 current->comm, task_pid_nr(current));
889
890 } else if (!device->name || strcmp(device->name->str, path)) {
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917 if (!fs_devices->opened && found_transid < device->generation) {
918
919
920
921
922
923
924
925 mutex_unlock(&fs_devices->device_list_mutex);
926 return ERR_PTR(-EEXIST);
927 }
928
929
930
931
932
933 if (device->bdev) {
934 int error;
935 dev_t path_dev;
936
937 error = lookup_bdev(path, &path_dev);
938 if (error) {
939 mutex_unlock(&fs_devices->device_list_mutex);
940 return ERR_PTR(error);
941 }
942
943 if (device->bdev->bd_dev != path_dev) {
944 mutex_unlock(&fs_devices->device_list_mutex);
945
946
947
948
949
950
951 btrfs_warn_in_rcu(NULL,
952 "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
953 path, devid, found_transid,
954 current->comm,
955 task_pid_nr(current));
956 return ERR_PTR(-EEXIST);
957 }
958 btrfs_info_in_rcu(device->fs_info,
959 "devid %llu device path %s changed to %s scanned by %s (%d)",
960 devid, rcu_str_deref(device->name),
961 path, current->comm,
962 task_pid_nr(current));
963 }
964
965 name = rcu_string_strdup(path, GFP_NOFS);
966 if (!name) {
967 mutex_unlock(&fs_devices->device_list_mutex);
968 return ERR_PTR(-ENOMEM);
969 }
970 rcu_string_free(device->name);
971 rcu_assign_pointer(device->name, name);
972 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
973 fs_devices->missing_devices--;
974 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
975 }
976 }
977
978
979
980
981
982
983
984 if (!fs_devices->opened) {
985 device->generation = found_transid;
986 fs_devices->latest_generation = max_t(u64, found_transid,
987 fs_devices->latest_generation);
988 }
989
990 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
991
992 mutex_unlock(&fs_devices->device_list_mutex);
993 return device;
994}
995
996static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
997{
998 struct btrfs_fs_devices *fs_devices;
999 struct btrfs_device *device;
1000 struct btrfs_device *orig_dev;
1001 int ret = 0;
1002
1003 fs_devices = alloc_fs_devices(orig->fsid, NULL);
1004 if (IS_ERR(fs_devices))
1005 return fs_devices;
1006
1007 mutex_lock(&orig->device_list_mutex);
1008 fs_devices->total_devices = orig->total_devices;
1009
1010 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
1011 struct rcu_string *name;
1012
1013 device = btrfs_alloc_device(NULL, &orig_dev->devid,
1014 orig_dev->uuid);
1015 if (IS_ERR(device)) {
1016 ret = PTR_ERR(device);
1017 goto error;
1018 }
1019
1020
1021
1022
1023
1024 if (orig_dev->name) {
1025 name = rcu_string_strdup(orig_dev->name->str,
1026 GFP_KERNEL);
1027 if (!name) {
1028 btrfs_free_device(device);
1029 ret = -ENOMEM;
1030 goto error;
1031 }
1032 rcu_assign_pointer(device->name, name);
1033 }
1034
1035 list_add(&device->dev_list, &fs_devices->devices);
1036 device->fs_devices = fs_devices;
1037 fs_devices->num_devices++;
1038 }
1039 mutex_unlock(&orig->device_list_mutex);
1040 return fs_devices;
1041error:
1042 mutex_unlock(&orig->device_list_mutex);
1043 free_fs_devices(fs_devices);
1044 return ERR_PTR(ret);
1045}
1046
1047static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
1048 struct btrfs_device **latest_dev)
1049{
1050 struct btrfs_device *device, *next;
1051
1052
1053 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1054 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)) {
1055 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1056 &device->dev_state) &&
1057 !test_bit(BTRFS_DEV_STATE_MISSING,
1058 &device->dev_state) &&
1059 (!*latest_dev ||
1060 device->generation > (*latest_dev)->generation)) {
1061 *latest_dev = device;
1062 }
1063 continue;
1064 }
1065
1066
1067
1068
1069
1070 if (device->devid == BTRFS_DEV_REPLACE_DEVID)
1071 continue;
1072
1073 if (device->bdev) {
1074 blkdev_put(device->bdev, device->mode);
1075 device->bdev = NULL;
1076 fs_devices->open_devices--;
1077 }
1078 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1079 list_del_init(&device->dev_alloc_list);
1080 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1081 }
1082 list_del_init(&device->dev_list);
1083 fs_devices->num_devices--;
1084 btrfs_free_device(device);
1085 }
1086
1087}
1088
1089
1090
1091
1092
1093void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
1094{
1095 struct btrfs_device *latest_dev = NULL;
1096 struct btrfs_fs_devices *seed_dev;
1097
1098 mutex_lock(&uuid_mutex);
1099 __btrfs_free_extra_devids(fs_devices, &latest_dev);
1100
1101 list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
1102 __btrfs_free_extra_devids(seed_dev, &latest_dev);
1103
1104 fs_devices->latest_bdev = latest_dev->bdev;
1105
1106 mutex_unlock(&uuid_mutex);
1107}
1108
1109static void btrfs_close_bdev(struct btrfs_device *device)
1110{
1111 if (!device->bdev)
1112 return;
1113
1114 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1115 sync_blockdev(device->bdev);
1116 invalidate_bdev(device->bdev);
1117 }
1118
1119 blkdev_put(device->bdev, device->mode);
1120}
1121
1122static void btrfs_close_one_device(struct btrfs_device *device)
1123{
1124 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1125
1126 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1127 device->devid != BTRFS_DEV_REPLACE_DEVID) {
1128 list_del_init(&device->dev_alloc_list);
1129 fs_devices->rw_devices--;
1130 }
1131
1132 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1133 fs_devices->missing_devices--;
1134
1135 btrfs_close_bdev(device);
1136 if (device->bdev) {
1137 fs_devices->open_devices--;
1138 device->bdev = NULL;
1139 }
1140 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1141 btrfs_destroy_dev_zone_info(device);
1142
1143 device->fs_info = NULL;
1144 atomic_set(&device->dev_stats_ccnt, 0);
1145 extent_io_tree_release(&device->alloc_state);
1146
1147
1148 ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
1149 ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1150 ASSERT(list_empty(&device->dev_alloc_list));
1151 ASSERT(list_empty(&device->post_commit_list));
1152 ASSERT(atomic_read(&device->reada_in_flight) == 0);
1153}
1154
1155static void close_fs_devices(struct btrfs_fs_devices *fs_devices)
1156{
1157 struct btrfs_device *device, *tmp;
1158
1159 lockdep_assert_held(&uuid_mutex);
1160
1161 if (--fs_devices->opened > 0)
1162 return;
1163
1164 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list)
1165 btrfs_close_one_device(device);
1166
1167 WARN_ON(fs_devices->open_devices);
1168 WARN_ON(fs_devices->rw_devices);
1169 fs_devices->opened = 0;
1170 fs_devices->seeding = false;
1171 fs_devices->fs_info = NULL;
1172}
1173
1174void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1175{
1176 LIST_HEAD(list);
1177 struct btrfs_fs_devices *tmp;
1178
1179 mutex_lock(&uuid_mutex);
1180 close_fs_devices(fs_devices);
1181 if (!fs_devices->opened)
1182 list_splice_init(&fs_devices->seed_list, &list);
1183
1184 list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
1185 close_fs_devices(fs_devices);
1186 list_del(&fs_devices->seed_list);
1187 free_fs_devices(fs_devices);
1188 }
1189 mutex_unlock(&uuid_mutex);
1190}
1191
1192static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
1193 fmode_t flags, void *holder)
1194{
1195 struct btrfs_device *device;
1196 struct btrfs_device *latest_dev = NULL;
1197 struct btrfs_device *tmp_device;
1198
1199 flags |= FMODE_EXCL;
1200
1201 list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
1202 dev_list) {
1203 int ret;
1204
1205 ret = btrfs_open_one_device(fs_devices, device, flags, holder);
1206 if (ret == 0 &&
1207 (!latest_dev || device->generation > latest_dev->generation)) {
1208 latest_dev = device;
1209 } else if (ret == -ENODATA) {
1210 fs_devices->num_devices--;
1211 list_del(&device->dev_list);
1212 btrfs_free_device(device);
1213 }
1214 }
1215 if (fs_devices->open_devices == 0)
1216 return -EINVAL;
1217
1218 fs_devices->opened = 1;
1219 fs_devices->latest_bdev = latest_dev->bdev;
1220 fs_devices->total_rw_bytes = 0;
1221 fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
1222 fs_devices->read_policy = BTRFS_READ_POLICY_PID;
1223
1224 return 0;
1225}
1226
1227static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
1228{
1229 struct btrfs_device *dev1, *dev2;
1230
1231 dev1 = list_entry(a, struct btrfs_device, dev_list);
1232 dev2 = list_entry(b, struct btrfs_device, dev_list);
1233
1234 if (dev1->devid < dev2->devid)
1235 return -1;
1236 else if (dev1->devid > dev2->devid)
1237 return 1;
1238 return 0;
1239}
1240
1241int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1242 fmode_t flags, void *holder)
1243{
1244 int ret;
1245
1246 lockdep_assert_held(&uuid_mutex);
1247
1248
1249
1250
1251
1252
1253
1254
1255 if (fs_devices->opened) {
1256 fs_devices->opened++;
1257 ret = 0;
1258 } else {
1259 list_sort(NULL, &fs_devices->devices, devid_cmp);
1260 ret = open_fs_devices(fs_devices, flags, holder);
1261 }
1262
1263 return ret;
1264}
1265
1266void btrfs_release_disk_super(struct btrfs_super_block *super)
1267{
1268 struct page *page = virt_to_page(super);
1269
1270 put_page(page);
1271}
1272
1273static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
1274 u64 bytenr, u64 bytenr_orig)
1275{
1276 struct btrfs_super_block *disk_super;
1277 struct page *page;
1278 void *p;
1279 pgoff_t index;
1280
1281
1282 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1283 return ERR_PTR(-EINVAL);
1284
1285
1286 if (sizeof(*disk_super) > PAGE_SIZE)
1287 return ERR_PTR(-EINVAL);
1288
1289
1290 index = bytenr >> PAGE_SHIFT;
1291 if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
1292 return ERR_PTR(-EINVAL);
1293
1294
1295 page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
1296
1297 if (IS_ERR(page))
1298 return ERR_CAST(page);
1299
1300 p = page_address(page);
1301
1302
1303 disk_super = p + offset_in_page(bytenr);
1304
1305 if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
1306 btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
1307 btrfs_release_disk_super(p);
1308 return ERR_PTR(-EINVAL);
1309 }
1310
1311 if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
1312 disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
1313
1314 return disk_super;
1315}
1316
1317int btrfs_forget_devices(const char *path)
1318{
1319 int ret;
1320
1321 mutex_lock(&uuid_mutex);
1322 ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
1323 mutex_unlock(&uuid_mutex);
1324
1325 return ret;
1326}
1327
1328
1329
1330
1331
1332
1333struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1334 void *holder)
1335{
1336 struct btrfs_super_block *disk_super;
1337 bool new_device_added = false;
1338 struct btrfs_device *device = NULL;
1339 struct block_device *bdev;
1340 u64 bytenr, bytenr_orig;
1341 int ret;
1342
1343 lockdep_assert_held(&uuid_mutex);
1344
1345
1346
1347
1348
1349
1350
1351 flags |= FMODE_EXCL;
1352
1353 bdev = blkdev_get_by_path(path, flags, holder);
1354 if (IS_ERR(bdev))
1355 return ERR_CAST(bdev);
1356
1357 bytenr_orig = btrfs_sb_offset(0);
1358 ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
1359 if (ret)
1360 return ERR_PTR(ret);
1361
1362 disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
1363 if (IS_ERR(disk_super)) {
1364 device = ERR_CAST(disk_super);
1365 goto error_bdev_put;
1366 }
1367
1368 device = device_list_add(path, disk_super, &new_device_added);
1369 if (!IS_ERR(device)) {
1370 if (new_device_added)
1371 btrfs_free_stale_devices(path, device);
1372 }
1373
1374 btrfs_release_disk_super(disk_super);
1375
1376error_bdev_put:
1377 blkdev_put(bdev, flags);
1378
1379 return device;
1380}
1381
1382
1383
1384
1385
1386static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1387 u64 len)
1388{
1389 u64 physical_start, physical_end;
1390
1391 lockdep_assert_held(&device->fs_info->chunk_mutex);
1392
1393 if (!find_first_extent_bit(&device->alloc_state, *start,
1394 &physical_start, &physical_end,
1395 CHUNK_ALLOCATED, NULL)) {
1396
1397 if (in_range(physical_start, *start, len) ||
1398 in_range(*start, physical_start,
1399 physical_end - physical_start)) {
1400 *start = physical_end + 1;
1401 return true;
1402 }
1403 }
1404 return false;
1405}
1406
1407static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
1408{
1409 switch (device->fs_devices->chunk_alloc_policy) {
1410 case BTRFS_CHUNK_ALLOC_REGULAR:
1411
1412
1413
1414
1415
1416 return max_t(u64, start, SZ_1M);
1417 case BTRFS_CHUNK_ALLOC_ZONED:
1418
1419
1420
1421
1422
1423 return ALIGN(start, device->zone_info->zone_size);
1424 default:
1425 BUG();
1426 }
1427}
1428
1429static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
1430 u64 *hole_start, u64 *hole_size,
1431 u64 num_bytes)
1432{
1433 u64 zone_size = device->zone_info->zone_size;
1434 u64 pos;
1435 int ret;
1436 bool changed = false;
1437
1438 ASSERT(IS_ALIGNED(*hole_start, zone_size));
1439
1440 while (*hole_size > 0) {
1441 pos = btrfs_find_allocatable_zones(device, *hole_start,
1442 *hole_start + *hole_size,
1443 num_bytes);
1444 if (pos != *hole_start) {
1445 *hole_size = *hole_start + *hole_size - pos;
1446 *hole_start = pos;
1447 changed = true;
1448 if (*hole_size < num_bytes)
1449 break;
1450 }
1451
1452 ret = btrfs_ensure_empty_zones(device, pos, num_bytes);
1453
1454
1455 if (!ret)
1456 return changed;
1457
1458
1459 if (ret == -ERANGE) {
1460 *hole_start += *hole_size;
1461 *hole_size = 0;
1462 return 1;
1463 }
1464
1465 *hole_start += zone_size;
1466 *hole_size -= zone_size;
1467 changed = true;
1468 }
1469
1470 return changed;
1471}
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
1484 u64 *hole_size, u64 num_bytes)
1485{
1486 bool changed = false;
1487 u64 hole_end = *hole_start + *hole_size;
1488
1489 for (;;) {
1490
1491
1492
1493
1494 if (contains_pending_extent(device, hole_start, *hole_size)) {
1495 if (hole_end >= *hole_start)
1496 *hole_size = hole_end - *hole_start;
1497 else
1498 *hole_size = 0;
1499 changed = true;
1500 }
1501
1502 switch (device->fs_devices->chunk_alloc_policy) {
1503 case BTRFS_CHUNK_ALLOC_REGULAR:
1504
1505 break;
1506 case BTRFS_CHUNK_ALLOC_ZONED:
1507 if (dev_extent_hole_check_zoned(device, hole_start,
1508 hole_size, num_bytes)) {
1509 changed = true;
1510
1511
1512
1513
1514 continue;
1515 }
1516 break;
1517 default:
1518 BUG();
1519 }
1520
1521 break;
1522 }
1523
1524 return changed;
1525}
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554static int find_free_dev_extent_start(struct btrfs_device *device,
1555 u64 num_bytes, u64 search_start, u64 *start,
1556 u64 *len)
1557{
1558 struct btrfs_fs_info *fs_info = device->fs_info;
1559 struct btrfs_root *root = fs_info->dev_root;
1560 struct btrfs_key key;
1561 struct btrfs_dev_extent *dev_extent;
1562 struct btrfs_path *path;
1563 u64 hole_size;
1564 u64 max_hole_start;
1565 u64 max_hole_size;
1566 u64 extent_end;
1567 u64 search_end = device->total_bytes;
1568 int ret;
1569 int slot;
1570 struct extent_buffer *l;
1571
1572 search_start = dev_extent_search_start(device, search_start);
1573
1574 WARN_ON(device->zone_info &&
1575 !IS_ALIGNED(num_bytes, device->zone_info->zone_size));
1576
1577 path = btrfs_alloc_path();
1578 if (!path)
1579 return -ENOMEM;
1580
1581 max_hole_start = search_start;
1582 max_hole_size = 0;
1583
1584again:
1585 if (search_start >= search_end ||
1586 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1587 ret = -ENOSPC;
1588 goto out;
1589 }
1590
1591 path->reada = READA_FORWARD;
1592 path->search_commit_root = 1;
1593 path->skip_locking = 1;
1594
1595 key.objectid = device->devid;
1596 key.offset = search_start;
1597 key.type = BTRFS_DEV_EXTENT_KEY;
1598
1599 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1600 if (ret < 0)
1601 goto out;
1602 if (ret > 0) {
1603 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1604 if (ret < 0)
1605 goto out;
1606 }
1607
1608 while (1) {
1609 l = path->nodes[0];
1610 slot = path->slots[0];
1611 if (slot >= btrfs_header_nritems(l)) {
1612 ret = btrfs_next_leaf(root, path);
1613 if (ret == 0)
1614 continue;
1615 if (ret < 0)
1616 goto out;
1617
1618 break;
1619 }
1620 btrfs_item_key_to_cpu(l, &key, slot);
1621
1622 if (key.objectid < device->devid)
1623 goto next;
1624
1625 if (key.objectid > device->devid)
1626 break;
1627
1628 if (key.type != BTRFS_DEV_EXTENT_KEY)
1629 goto next;
1630
1631 if (key.offset > search_start) {
1632 hole_size = key.offset - search_start;
1633 dev_extent_hole_check(device, &search_start, &hole_size,
1634 num_bytes);
1635
1636 if (hole_size > max_hole_size) {
1637 max_hole_start = search_start;
1638 max_hole_size = hole_size;
1639 }
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650 if (hole_size >= num_bytes) {
1651 ret = 0;
1652 goto out;
1653 }
1654 }
1655
1656 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1657 extent_end = key.offset + btrfs_dev_extent_length(l,
1658 dev_extent);
1659 if (extent_end > search_start)
1660 search_start = extent_end;
1661next:
1662 path->slots[0]++;
1663 cond_resched();
1664 }
1665
1666
1667
1668
1669
1670
1671 if (search_end > search_start) {
1672 hole_size = search_end - search_start;
1673 if (dev_extent_hole_check(device, &search_start, &hole_size,
1674 num_bytes)) {
1675 btrfs_release_path(path);
1676 goto again;
1677 }
1678
1679 if (hole_size > max_hole_size) {
1680 max_hole_start = search_start;
1681 max_hole_size = hole_size;
1682 }
1683 }
1684
1685
1686 if (max_hole_size < num_bytes)
1687 ret = -ENOSPC;
1688 else
1689 ret = 0;
1690
1691out:
1692 btrfs_free_path(path);
1693 *start = max_hole_start;
1694 if (len)
1695 *len = max_hole_size;
1696 return ret;
1697}
1698
1699int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1700 u64 *start, u64 *len)
1701{
1702
1703 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1704}
1705
1706static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1707 struct btrfs_device *device,
1708 u64 start, u64 *dev_extent_len)
1709{
1710 struct btrfs_fs_info *fs_info = device->fs_info;
1711 struct btrfs_root *root = fs_info->dev_root;
1712 int ret;
1713 struct btrfs_path *path;
1714 struct btrfs_key key;
1715 struct btrfs_key found_key;
1716 struct extent_buffer *leaf = NULL;
1717 struct btrfs_dev_extent *extent = NULL;
1718
1719 path = btrfs_alloc_path();
1720 if (!path)
1721 return -ENOMEM;
1722
1723 key.objectid = device->devid;
1724 key.offset = start;
1725 key.type = BTRFS_DEV_EXTENT_KEY;
1726again:
1727 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1728 if (ret > 0) {
1729 ret = btrfs_previous_item(root, path, key.objectid,
1730 BTRFS_DEV_EXTENT_KEY);
1731 if (ret)
1732 goto out;
1733 leaf = path->nodes[0];
1734 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1735 extent = btrfs_item_ptr(leaf, path->slots[0],
1736 struct btrfs_dev_extent);
1737 BUG_ON(found_key.offset > start || found_key.offset +
1738 btrfs_dev_extent_length(leaf, extent) < start);
1739 key = found_key;
1740 btrfs_release_path(path);
1741 goto again;
1742 } else if (ret == 0) {
1743 leaf = path->nodes[0];
1744 extent = btrfs_item_ptr(leaf, path->slots[0],
1745 struct btrfs_dev_extent);
1746 } else {
1747 btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
1748 goto out;
1749 }
1750
1751 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1752
1753 ret = btrfs_del_item(trans, root, path);
1754 if (ret) {
1755 btrfs_handle_fs_error(fs_info, ret,
1756 "Failed to remove dev extent item");
1757 } else {
1758 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1759 }
1760out:
1761 btrfs_free_path(path);
1762 return ret;
1763}
1764
1765static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1766 struct btrfs_device *device,
1767 u64 chunk_offset, u64 start, u64 num_bytes)
1768{
1769 int ret;
1770 struct btrfs_path *path;
1771 struct btrfs_fs_info *fs_info = device->fs_info;
1772 struct btrfs_root *root = fs_info->dev_root;
1773 struct btrfs_dev_extent *extent;
1774 struct extent_buffer *leaf;
1775 struct btrfs_key key;
1776
1777 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1778 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1779 path = btrfs_alloc_path();
1780 if (!path)
1781 return -ENOMEM;
1782
1783 key.objectid = device->devid;
1784 key.offset = start;
1785 key.type = BTRFS_DEV_EXTENT_KEY;
1786 ret = btrfs_insert_empty_item(trans, root, path, &key,
1787 sizeof(*extent));
1788 if (ret)
1789 goto out;
1790
1791 leaf = path->nodes[0];
1792 extent = btrfs_item_ptr(leaf, path->slots[0],
1793 struct btrfs_dev_extent);
1794 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1795 BTRFS_CHUNK_TREE_OBJECTID);
1796 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1797 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1798 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1799
1800 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1801 btrfs_mark_buffer_dirty(leaf);
1802out:
1803 btrfs_free_path(path);
1804 return ret;
1805}
1806
1807static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1808{
1809 struct extent_map_tree *em_tree;
1810 struct extent_map *em;
1811 struct rb_node *n;
1812 u64 ret = 0;
1813
1814 em_tree = &fs_info->mapping_tree;
1815 read_lock(&em_tree->lock);
1816 n = rb_last(&em_tree->map.rb_root);
1817 if (n) {
1818 em = rb_entry(n, struct extent_map, rb_node);
1819 ret = em->start + em->len;
1820 }
1821 read_unlock(&em_tree->lock);
1822
1823 return ret;
1824}
1825
1826static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1827 u64 *devid_ret)
1828{
1829 int ret;
1830 struct btrfs_key key;
1831 struct btrfs_key found_key;
1832 struct btrfs_path *path;
1833
1834 path = btrfs_alloc_path();
1835 if (!path)
1836 return -ENOMEM;
1837
1838 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1839 key.type = BTRFS_DEV_ITEM_KEY;
1840 key.offset = (u64)-1;
1841
1842 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1843 if (ret < 0)
1844 goto error;
1845
1846 if (ret == 0) {
1847
1848 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
1849 ret = -EUCLEAN;
1850 goto error;
1851 }
1852
1853 ret = btrfs_previous_item(fs_info->chunk_root, path,
1854 BTRFS_DEV_ITEMS_OBJECTID,
1855 BTRFS_DEV_ITEM_KEY);
1856 if (ret) {
1857 *devid_ret = 1;
1858 } else {
1859 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1860 path->slots[0]);
1861 *devid_ret = found_key.offset + 1;
1862 }
1863 ret = 0;
1864error:
1865 btrfs_free_path(path);
1866 return ret;
1867}
1868
1869
1870
1871
1872
1873static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1874 struct btrfs_device *device)
1875{
1876 int ret;
1877 struct btrfs_path *path;
1878 struct btrfs_dev_item *dev_item;
1879 struct extent_buffer *leaf;
1880 struct btrfs_key key;
1881 unsigned long ptr;
1882
1883 path = btrfs_alloc_path();
1884 if (!path)
1885 return -ENOMEM;
1886
1887 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1888 key.type = BTRFS_DEV_ITEM_KEY;
1889 key.offset = device->devid;
1890
1891 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1892 &key, sizeof(*dev_item));
1893 if (ret)
1894 goto out;
1895
1896 leaf = path->nodes[0];
1897 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1898
1899 btrfs_set_device_id(leaf, dev_item, device->devid);
1900 btrfs_set_device_generation(leaf, dev_item, 0);
1901 btrfs_set_device_type(leaf, dev_item, device->type);
1902 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1903 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1904 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1905 btrfs_set_device_total_bytes(leaf, dev_item,
1906 btrfs_device_get_disk_total_bytes(device));
1907 btrfs_set_device_bytes_used(leaf, dev_item,
1908 btrfs_device_get_bytes_used(device));
1909 btrfs_set_device_group(leaf, dev_item, 0);
1910 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1911 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1912 btrfs_set_device_start_offset(leaf, dev_item, 0);
1913
1914 ptr = btrfs_device_uuid(dev_item);
1915 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1916 ptr = btrfs_device_fsid(dev_item);
1917 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
1918 ptr, BTRFS_FSID_SIZE);
1919 btrfs_mark_buffer_dirty(leaf);
1920
1921 ret = 0;
1922out:
1923 btrfs_free_path(path);
1924 return ret;
1925}
1926
1927
1928
1929
1930
1931static void update_dev_time(const char *path_name)
1932{
1933 struct file *filp;
1934
1935 filp = filp_open(path_name, O_RDWR, 0);
1936 if (IS_ERR(filp))
1937 return;
1938 file_update_time(filp);
1939 filp_close(filp, NULL);
1940}
1941
1942static int btrfs_rm_dev_item(struct btrfs_device *device)
1943{
1944 struct btrfs_root *root = device->fs_info->chunk_root;
1945 int ret;
1946 struct btrfs_path *path;
1947 struct btrfs_key key;
1948 struct btrfs_trans_handle *trans;
1949
1950 path = btrfs_alloc_path();
1951 if (!path)
1952 return -ENOMEM;
1953
1954 trans = btrfs_start_transaction(root, 0);
1955 if (IS_ERR(trans)) {
1956 btrfs_free_path(path);
1957 return PTR_ERR(trans);
1958 }
1959 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1960 key.type = BTRFS_DEV_ITEM_KEY;
1961 key.offset = device->devid;
1962
1963 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1964 if (ret) {
1965 if (ret > 0)
1966 ret = -ENOENT;
1967 btrfs_abort_transaction(trans, ret);
1968 btrfs_end_transaction(trans);
1969 goto out;
1970 }
1971
1972 ret = btrfs_del_item(trans, root, path);
1973 if (ret) {
1974 btrfs_abort_transaction(trans, ret);
1975 btrfs_end_transaction(trans);
1976 }
1977
1978out:
1979 btrfs_free_path(path);
1980 if (!ret)
1981 ret = btrfs_commit_transaction(trans);
1982 return ret;
1983}
1984
1985
1986
1987
1988
1989
1990static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
1991 u64 num_devices)
1992{
1993 u64 all_avail;
1994 unsigned seq;
1995 int i;
1996
1997 do {
1998 seq = read_seqbegin(&fs_info->profiles_lock);
1999
2000 all_avail = fs_info->avail_data_alloc_bits |
2001 fs_info->avail_system_alloc_bits |
2002 fs_info->avail_metadata_alloc_bits;
2003 } while (read_seqretry(&fs_info->profiles_lock, seq));
2004
2005 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2006 if (!(all_avail & btrfs_raid_array[i].bg_flag))
2007 continue;
2008
2009 if (num_devices < btrfs_raid_array[i].devs_min) {
2010 int ret = btrfs_raid_array[i].mindev_error;
2011
2012 if (ret)
2013 return ret;
2014 }
2015 }
2016
2017 return 0;
2018}
2019
2020static struct btrfs_device * btrfs_find_next_active_device(
2021 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
2022{
2023 struct btrfs_device *next_device;
2024
2025 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
2026 if (next_device != device &&
2027 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
2028 && next_device->bdev)
2029 return next_device;
2030 }
2031
2032 return NULL;
2033}
2034
2035
2036
2037
2038
2039
2040
2041void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
2042 struct btrfs_device *next_device)
2043{
2044 struct btrfs_fs_info *fs_info = device->fs_info;
2045
2046 if (!next_device)
2047 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
2048 device);
2049 ASSERT(next_device);
2050
2051 if (fs_info->sb->s_bdev &&
2052 (fs_info->sb->s_bdev == device->bdev))
2053 fs_info->sb->s_bdev = next_device->bdev;
2054
2055 if (fs_info->fs_devices->latest_bdev == device->bdev)
2056 fs_info->fs_devices->latest_bdev = next_device->bdev;
2057}
2058
2059
2060
2061
2062
2063static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
2064{
2065 u64 num_devices = fs_info->fs_devices->num_devices;
2066
2067 down_read(&fs_info->dev_replace.rwsem);
2068 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
2069 ASSERT(num_devices > 1);
2070 num_devices--;
2071 }
2072 up_read(&fs_info->dev_replace.rwsem);
2073
2074 return num_devices;
2075}
2076
2077void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
2078 struct block_device *bdev,
2079 const char *device_path)
2080{
2081 struct btrfs_super_block *disk_super;
2082 int copy_num;
2083
2084 if (!bdev)
2085 return;
2086
2087 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
2088 struct page *page;
2089 int ret;
2090
2091 disk_super = btrfs_read_dev_one_super(bdev, copy_num);
2092 if (IS_ERR(disk_super))
2093 continue;
2094
2095 if (bdev_is_zoned(bdev)) {
2096 btrfs_reset_sb_log_zones(bdev, copy_num);
2097 continue;
2098 }
2099
2100 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
2101
2102 page = virt_to_page(disk_super);
2103 set_page_dirty(page);
2104 lock_page(page);
2105
2106 ret = write_one_page(page);
2107 if (ret)
2108 btrfs_warn(fs_info,
2109 "error clearing superblock number %d (%d)",
2110 copy_num, ret);
2111 btrfs_release_disk_super(disk_super);
2112
2113 }
2114
2115
2116 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
2117
2118
2119 update_dev_time(device_path);
2120}
2121
2122int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2123 u64 devid)
2124{
2125 struct btrfs_device *device;
2126 struct btrfs_fs_devices *cur_devices;
2127 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2128 u64 num_devices;
2129 int ret = 0;
2130
2131 mutex_lock(&uuid_mutex);
2132
2133 num_devices = btrfs_num_devices(fs_info);
2134
2135 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
2136 if (ret)
2137 goto out;
2138
2139 device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
2140
2141 if (IS_ERR(device)) {
2142 if (PTR_ERR(device) == -ENOENT &&
2143 strcmp(device_path, "missing") == 0)
2144 ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2145 else
2146 ret = PTR_ERR(device);
2147 goto out;
2148 }
2149
2150 if (btrfs_pinned_by_swapfile(fs_info, device)) {
2151 btrfs_warn_in_rcu(fs_info,
2152 "cannot remove device %s (devid %llu) due to active swapfile",
2153 rcu_str_deref(device->name), device->devid);
2154 ret = -ETXTBSY;
2155 goto out;
2156 }
2157
2158 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2159 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
2160 goto out;
2161 }
2162
2163 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
2164 fs_info->fs_devices->rw_devices == 1) {
2165 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
2166 goto out;
2167 }
2168
2169 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2170 mutex_lock(&fs_info->chunk_mutex);
2171 list_del_init(&device->dev_alloc_list);
2172 device->fs_devices->rw_devices--;
2173 mutex_unlock(&fs_info->chunk_mutex);
2174 }
2175
2176 mutex_unlock(&uuid_mutex);
2177 ret = btrfs_shrink_device(device, 0);
2178 if (!ret)
2179 btrfs_reada_remove_dev(device);
2180 mutex_lock(&uuid_mutex);
2181 if (ret)
2182 goto error_undo;
2183
2184
2185
2186
2187
2188
2189 ret = btrfs_rm_dev_item(device);
2190 if (ret)
2191 goto error_undo;
2192
2193 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2194 btrfs_scrub_cancel_dev(device);
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211 cur_devices = device->fs_devices;
2212 mutex_lock(&fs_devices->device_list_mutex);
2213 list_del_rcu(&device->dev_list);
2214
2215 cur_devices->num_devices--;
2216 cur_devices->total_devices--;
2217
2218 if (cur_devices != fs_devices)
2219 fs_devices->total_devices--;
2220
2221 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2222 cur_devices->missing_devices--;
2223
2224 btrfs_assign_next_active_device(device, NULL);
2225
2226 if (device->bdev) {
2227 cur_devices->open_devices--;
2228
2229 btrfs_sysfs_remove_device(device);
2230 }
2231
2232 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2233 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2234 mutex_unlock(&fs_devices->device_list_mutex);
2235
2236
2237
2238
2239
2240
2241 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2242 btrfs_scratch_superblocks(fs_info, device->bdev,
2243 device->name->str);
2244
2245 btrfs_close_bdev(device);
2246 synchronize_rcu();
2247 btrfs_free_device(device);
2248
2249 if (cur_devices->open_devices == 0) {
2250 list_del_init(&cur_devices->seed_list);
2251 close_fs_devices(cur_devices);
2252 free_fs_devices(cur_devices);
2253 }
2254
2255out:
2256 mutex_unlock(&uuid_mutex);
2257 return ret;
2258
2259error_undo:
2260 btrfs_reada_undo_remove_dev(device);
2261 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2262 mutex_lock(&fs_info->chunk_mutex);
2263 list_add(&device->dev_alloc_list,
2264 &fs_devices->alloc_list);
2265 device->fs_devices->rw_devices++;
2266 mutex_unlock(&fs_info->chunk_mutex);
2267 }
2268 goto out;
2269}
2270
2271void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2272{
2273 struct btrfs_fs_devices *fs_devices;
2274
2275 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2276
2277
2278
2279
2280
2281
2282
2283 fs_devices = srcdev->fs_devices;
2284
2285 list_del_rcu(&srcdev->dev_list);
2286 list_del(&srcdev->dev_alloc_list);
2287 fs_devices->num_devices--;
2288 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2289 fs_devices->missing_devices--;
2290
2291 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2292 fs_devices->rw_devices--;
2293
2294 if (srcdev->bdev)
2295 fs_devices->open_devices--;
2296}
2297
2298void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2299{
2300 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2301
2302 mutex_lock(&uuid_mutex);
2303
2304 btrfs_close_bdev(srcdev);
2305 synchronize_rcu();
2306 btrfs_free_device(srcdev);
2307
2308
2309 if (!fs_devices->num_devices) {
2310
2311
2312
2313
2314
2315
2316 ASSERT(fs_devices->seeding);
2317
2318 list_del_init(&fs_devices->seed_list);
2319 close_fs_devices(fs_devices);
2320 free_fs_devices(fs_devices);
2321 }
2322 mutex_unlock(&uuid_mutex);
2323}
2324
2325void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2326{
2327 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2328
2329 mutex_lock(&fs_devices->device_list_mutex);
2330
2331 btrfs_sysfs_remove_device(tgtdev);
2332
2333 if (tgtdev->bdev)
2334 fs_devices->open_devices--;
2335
2336 fs_devices->num_devices--;
2337
2338 btrfs_assign_next_active_device(tgtdev, NULL);
2339
2340 list_del_rcu(&tgtdev->dev_list);
2341
2342 mutex_unlock(&fs_devices->device_list_mutex);
2343
2344
2345
2346
2347
2348
2349
2350
2351 btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
2352 tgtdev->name->str);
2353
2354 btrfs_close_bdev(tgtdev);
2355 synchronize_rcu();
2356 btrfs_free_device(tgtdev);
2357}
2358
2359static struct btrfs_device *btrfs_find_device_by_path(
2360 struct btrfs_fs_info *fs_info, const char *device_path)
2361{
2362 int ret = 0;
2363 struct btrfs_super_block *disk_super;
2364 u64 devid;
2365 u8 *dev_uuid;
2366 struct block_device *bdev;
2367 struct btrfs_device *device;
2368
2369 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2370 fs_info->bdev_holder, 0, &bdev, &disk_super);
2371 if (ret)
2372 return ERR_PTR(ret);
2373
2374 devid = btrfs_stack_device_id(&disk_super->dev_item);
2375 dev_uuid = disk_super->dev_item.uuid;
2376 if (btrfs_fs_incompat(fs_info, METADATA_UUID))
2377 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2378 disk_super->metadata_uuid);
2379 else
2380 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2381 disk_super->fsid);
2382
2383 btrfs_release_disk_super(disk_super);
2384 if (!device)
2385 device = ERR_PTR(-ENOENT);
2386 blkdev_put(bdev, FMODE_READ);
2387 return device;
2388}
2389
2390
2391
2392
2393struct btrfs_device *btrfs_find_device_by_devspec(
2394 struct btrfs_fs_info *fs_info, u64 devid,
2395 const char *device_path)
2396{
2397 struct btrfs_device *device;
2398
2399 if (devid) {
2400 device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
2401 NULL);
2402 if (!device)
2403 return ERR_PTR(-ENOENT);
2404 return device;
2405 }
2406
2407 if (!device_path || !device_path[0])
2408 return ERR_PTR(-EINVAL);
2409
2410 if (strcmp(device_path, "missing") == 0) {
2411
2412 list_for_each_entry(device, &fs_info->fs_devices->devices,
2413 dev_list) {
2414 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2415 &device->dev_state) && !device->bdev)
2416 return device;
2417 }
2418 return ERR_PTR(-ENOENT);
2419 }
2420
2421 return btrfs_find_device_by_path(fs_info, device_path);
2422}
2423
2424
2425
2426
2427static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2428{
2429 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2430 struct btrfs_fs_devices *old_devices;
2431 struct btrfs_fs_devices *seed_devices;
2432 struct btrfs_super_block *disk_super = fs_info->super_copy;
2433 struct btrfs_device *device;
2434 u64 super_flags;
2435
2436 lockdep_assert_held(&uuid_mutex);
2437 if (!fs_devices->seeding)
2438 return -EINVAL;
2439
2440
2441
2442
2443
2444 seed_devices = alloc_fs_devices(NULL, NULL);
2445 if (IS_ERR(seed_devices))
2446 return PTR_ERR(seed_devices);
2447
2448
2449
2450
2451
2452
2453
2454 old_devices = clone_fs_devices(fs_devices);
2455 if (IS_ERR(old_devices)) {
2456 kfree(seed_devices);
2457 return PTR_ERR(old_devices);
2458 }
2459
2460 list_add(&old_devices->fs_list, &fs_uuids);
2461
2462 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2463 seed_devices->opened = 1;
2464 INIT_LIST_HEAD(&seed_devices->devices);
2465 INIT_LIST_HEAD(&seed_devices->alloc_list);
2466 mutex_init(&seed_devices->device_list_mutex);
2467
2468 mutex_lock(&fs_devices->device_list_mutex);
2469 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2470 synchronize_rcu);
2471 list_for_each_entry(device, &seed_devices->devices, dev_list)
2472 device->fs_devices = seed_devices;
2473
2474 fs_devices->seeding = false;
2475 fs_devices->num_devices = 0;
2476 fs_devices->open_devices = 0;
2477 fs_devices->missing_devices = 0;
2478 fs_devices->rotating = false;
2479 list_add(&seed_devices->seed_list, &fs_devices->seed_list);
2480
2481 generate_random_uuid(fs_devices->fsid);
2482 memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
2483 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2484 mutex_unlock(&fs_devices->device_list_mutex);
2485
2486 super_flags = btrfs_super_flags(disk_super) &
2487 ~BTRFS_SUPER_FLAG_SEEDING;
2488 btrfs_set_super_flags(disk_super, super_flags);
2489
2490 return 0;
2491}
2492
2493
2494
2495
2496static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2497{
2498 struct btrfs_fs_info *fs_info = trans->fs_info;
2499 struct btrfs_root *root = fs_info->chunk_root;
2500 struct btrfs_path *path;
2501 struct extent_buffer *leaf;
2502 struct btrfs_dev_item *dev_item;
2503 struct btrfs_device *device;
2504 struct btrfs_key key;
2505 u8 fs_uuid[BTRFS_FSID_SIZE];
2506 u8 dev_uuid[BTRFS_UUID_SIZE];
2507 u64 devid;
2508 int ret;
2509
2510 path = btrfs_alloc_path();
2511 if (!path)
2512 return -ENOMEM;
2513
2514 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2515 key.offset = 0;
2516 key.type = BTRFS_DEV_ITEM_KEY;
2517
2518 while (1) {
2519 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2520 if (ret < 0)
2521 goto error;
2522
2523 leaf = path->nodes[0];
2524next_slot:
2525 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2526 ret = btrfs_next_leaf(root, path);
2527 if (ret > 0)
2528 break;
2529 if (ret < 0)
2530 goto error;
2531 leaf = path->nodes[0];
2532 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2533 btrfs_release_path(path);
2534 continue;
2535 }
2536
2537 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2538 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2539 key.type != BTRFS_DEV_ITEM_KEY)
2540 break;
2541
2542 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2543 struct btrfs_dev_item);
2544 devid = btrfs_device_id(leaf, dev_item);
2545 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2546 BTRFS_UUID_SIZE);
2547 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2548 BTRFS_FSID_SIZE);
2549 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2550 fs_uuid);
2551 BUG_ON(!device);
2552
2553 if (device->fs_devices->seeding) {
2554 btrfs_set_device_generation(leaf, dev_item,
2555 device->generation);
2556 btrfs_mark_buffer_dirty(leaf);
2557 }
2558
2559 path->slots[0]++;
2560 goto next_slot;
2561 }
2562 ret = 0;
2563error:
2564 btrfs_free_path(path);
2565 return ret;
2566}
2567
2568int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2569{
2570 struct btrfs_root *root = fs_info->dev_root;
2571 struct request_queue *q;
2572 struct btrfs_trans_handle *trans;
2573 struct btrfs_device *device;
2574 struct block_device *bdev;
2575 struct super_block *sb = fs_info->sb;
2576 struct rcu_string *name;
2577 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2578 u64 orig_super_total_bytes;
2579 u64 orig_super_num_devices;
2580 int seeding_dev = 0;
2581 int ret = 0;
2582 bool locked = false;
2583
2584 if (sb_rdonly(sb) && !fs_devices->seeding)
2585 return -EROFS;
2586
2587 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2588 fs_info->bdev_holder);
2589 if (IS_ERR(bdev))
2590 return PTR_ERR(bdev);
2591
2592 if (!btrfs_check_device_zone_type(fs_info, bdev)) {
2593 ret = -EINVAL;
2594 goto error;
2595 }
2596
2597 if (fs_devices->seeding) {
2598 seeding_dev = 1;
2599 down_write(&sb->s_umount);
2600 mutex_lock(&uuid_mutex);
2601 locked = true;
2602 }
2603
2604 sync_blockdev(bdev);
2605
2606 rcu_read_lock();
2607 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2608 if (device->bdev == bdev) {
2609 ret = -EEXIST;
2610 rcu_read_unlock();
2611 goto error;
2612 }
2613 }
2614 rcu_read_unlock();
2615
2616 device = btrfs_alloc_device(fs_info, NULL, NULL);
2617 if (IS_ERR(device)) {
2618
2619 ret = PTR_ERR(device);
2620 goto error;
2621 }
2622
2623 name = rcu_string_strdup(device_path, GFP_KERNEL);
2624 if (!name) {
2625 ret = -ENOMEM;
2626 goto error_free_device;
2627 }
2628 rcu_assign_pointer(device->name, name);
2629
2630 device->fs_info = fs_info;
2631 device->bdev = bdev;
2632
2633 ret = btrfs_get_dev_zone_info(device);
2634 if (ret)
2635 goto error_free_device;
2636
2637 trans = btrfs_start_transaction(root, 0);
2638 if (IS_ERR(trans)) {
2639 ret = PTR_ERR(trans);
2640 goto error_free_zone;
2641 }
2642
2643 q = bdev_get_queue(bdev);
2644 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2645 device->generation = trans->transid;
2646 device->io_width = fs_info->sectorsize;
2647 device->io_align = fs_info->sectorsize;
2648 device->sector_size = fs_info->sectorsize;
2649 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2650 fs_info->sectorsize);
2651 device->disk_total_bytes = device->total_bytes;
2652 device->commit_total_bytes = device->total_bytes;
2653 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2654 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2655 device->mode = FMODE_EXCL;
2656 device->dev_stats_valid = 1;
2657 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2658
2659 if (seeding_dev) {
2660 btrfs_clear_sb_rdonly(sb);
2661 ret = btrfs_prepare_sprout(fs_info);
2662 if (ret) {
2663 btrfs_abort_transaction(trans, ret);
2664 goto error_trans;
2665 }
2666 }
2667
2668 device->fs_devices = fs_devices;
2669
2670 mutex_lock(&fs_devices->device_list_mutex);
2671 mutex_lock(&fs_info->chunk_mutex);
2672 list_add_rcu(&device->dev_list, &fs_devices->devices);
2673 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2674 fs_devices->num_devices++;
2675 fs_devices->open_devices++;
2676 fs_devices->rw_devices++;
2677 fs_devices->total_devices++;
2678 fs_devices->total_rw_bytes += device->total_bytes;
2679
2680 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2681
2682 if (!blk_queue_nonrot(q))
2683 fs_devices->rotating = true;
2684
2685 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2686 btrfs_set_super_total_bytes(fs_info->super_copy,
2687 round_down(orig_super_total_bytes + device->total_bytes,
2688 fs_info->sectorsize));
2689
2690 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2691 btrfs_set_super_num_devices(fs_info->super_copy,
2692 orig_super_num_devices + 1);
2693
2694
2695
2696
2697
2698 btrfs_clear_space_info_full(fs_info);
2699
2700 mutex_unlock(&fs_info->chunk_mutex);
2701
2702
2703 btrfs_sysfs_add_device(device);
2704
2705 mutex_unlock(&fs_devices->device_list_mutex);
2706
2707 if (seeding_dev) {
2708 mutex_lock(&fs_info->chunk_mutex);
2709 ret = init_first_rw_device(trans);
2710 mutex_unlock(&fs_info->chunk_mutex);
2711 if (ret) {
2712 btrfs_abort_transaction(trans, ret);
2713 goto error_sysfs;
2714 }
2715 }
2716
2717 ret = btrfs_add_dev_item(trans, device);
2718 if (ret) {
2719 btrfs_abort_transaction(trans, ret);
2720 goto error_sysfs;
2721 }
2722
2723 if (seeding_dev) {
2724 ret = btrfs_finish_sprout(trans);
2725 if (ret) {
2726 btrfs_abort_transaction(trans, ret);
2727 goto error_sysfs;
2728 }
2729
2730
2731
2732
2733
2734 btrfs_sysfs_update_sprout_fsid(fs_devices);
2735 }
2736
2737 ret = btrfs_commit_transaction(trans);
2738
2739 if (seeding_dev) {
2740 mutex_unlock(&uuid_mutex);
2741 up_write(&sb->s_umount);
2742 locked = false;
2743
2744 if (ret)
2745 return ret;
2746
2747 ret = btrfs_relocate_sys_chunks(fs_info);
2748 if (ret < 0)
2749 btrfs_handle_fs_error(fs_info, ret,
2750 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2751 trans = btrfs_attach_transaction(root);
2752 if (IS_ERR(trans)) {
2753 if (PTR_ERR(trans) == -ENOENT)
2754 return 0;
2755 ret = PTR_ERR(trans);
2756 trans = NULL;
2757 goto error_sysfs;
2758 }
2759 ret = btrfs_commit_transaction(trans);
2760 }
2761
2762
2763
2764
2765
2766
2767
2768
2769 btrfs_forget_devices(device_path);
2770
2771
2772 update_dev_time(device_path);
2773
2774 return ret;
2775
2776error_sysfs:
2777 btrfs_sysfs_remove_device(device);
2778 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2779 mutex_lock(&fs_info->chunk_mutex);
2780 list_del_rcu(&device->dev_list);
2781 list_del(&device->dev_alloc_list);
2782 fs_info->fs_devices->num_devices--;
2783 fs_info->fs_devices->open_devices--;
2784 fs_info->fs_devices->rw_devices--;
2785 fs_info->fs_devices->total_devices--;
2786 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2787 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2788 btrfs_set_super_total_bytes(fs_info->super_copy,
2789 orig_super_total_bytes);
2790 btrfs_set_super_num_devices(fs_info->super_copy,
2791 orig_super_num_devices);
2792 mutex_unlock(&fs_info->chunk_mutex);
2793 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2794error_trans:
2795 if (seeding_dev)
2796 btrfs_set_sb_rdonly(sb);
2797 if (trans)
2798 btrfs_end_transaction(trans);
2799error_free_zone:
2800 btrfs_destroy_dev_zone_info(device);
2801error_free_device:
2802 btrfs_free_device(device);
2803error:
2804 blkdev_put(bdev, FMODE_EXCL);
2805 if (locked) {
2806 mutex_unlock(&uuid_mutex);
2807 up_write(&sb->s_umount);
2808 }
2809 return ret;
2810}
2811
2812static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2813 struct btrfs_device *device)
2814{
2815 int ret;
2816 struct btrfs_path *path;
2817 struct btrfs_root *root = device->fs_info->chunk_root;
2818 struct btrfs_dev_item *dev_item;
2819 struct extent_buffer *leaf;
2820 struct btrfs_key key;
2821
2822 path = btrfs_alloc_path();
2823 if (!path)
2824 return -ENOMEM;
2825
2826 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2827 key.type = BTRFS_DEV_ITEM_KEY;
2828 key.offset = device->devid;
2829
2830 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2831 if (ret < 0)
2832 goto out;
2833
2834 if (ret > 0) {
2835 ret = -ENOENT;
2836 goto out;
2837 }
2838
2839 leaf = path->nodes[0];
2840 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2841
2842 btrfs_set_device_id(leaf, dev_item, device->devid);
2843 btrfs_set_device_type(leaf, dev_item, device->type);
2844 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2845 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2846 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2847 btrfs_set_device_total_bytes(leaf, dev_item,
2848 btrfs_device_get_disk_total_bytes(device));
2849 btrfs_set_device_bytes_used(leaf, dev_item,
2850 btrfs_device_get_bytes_used(device));
2851 btrfs_mark_buffer_dirty(leaf);
2852
2853out:
2854 btrfs_free_path(path);
2855 return ret;
2856}
2857
2858int btrfs_grow_device(struct btrfs_trans_handle *trans,
2859 struct btrfs_device *device, u64 new_size)
2860{
2861 struct btrfs_fs_info *fs_info = device->fs_info;
2862 struct btrfs_super_block *super_copy = fs_info->super_copy;
2863 u64 old_total;
2864 u64 diff;
2865
2866 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2867 return -EACCES;
2868
2869 new_size = round_down(new_size, fs_info->sectorsize);
2870
2871 mutex_lock(&fs_info->chunk_mutex);
2872 old_total = btrfs_super_total_bytes(super_copy);
2873 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2874
2875 if (new_size <= device->total_bytes ||
2876 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2877 mutex_unlock(&fs_info->chunk_mutex);
2878 return -EINVAL;
2879 }
2880
2881 btrfs_set_super_total_bytes(super_copy,
2882 round_down(old_total + diff, fs_info->sectorsize));
2883 device->fs_devices->total_rw_bytes += diff;
2884
2885 btrfs_device_set_total_bytes(device, new_size);
2886 btrfs_device_set_disk_total_bytes(device, new_size);
2887 btrfs_clear_space_info_full(device->fs_info);
2888 if (list_empty(&device->post_commit_list))
2889 list_add_tail(&device->post_commit_list,
2890 &trans->transaction->dev_update_list);
2891 mutex_unlock(&fs_info->chunk_mutex);
2892
2893 return btrfs_update_device(trans, device);
2894}
2895
2896static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2897{
2898 struct btrfs_fs_info *fs_info = trans->fs_info;
2899 struct btrfs_root *root = fs_info->chunk_root;
2900 int ret;
2901 struct btrfs_path *path;
2902 struct btrfs_key key;
2903
2904 path = btrfs_alloc_path();
2905 if (!path)
2906 return -ENOMEM;
2907
2908 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2909 key.offset = chunk_offset;
2910 key.type = BTRFS_CHUNK_ITEM_KEY;
2911
2912 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2913 if (ret < 0)
2914 goto out;
2915 else if (ret > 0) {
2916 btrfs_handle_fs_error(fs_info, -ENOENT,
2917 "Failed lookup while freeing chunk.");
2918 ret = -ENOENT;
2919 goto out;
2920 }
2921
2922 ret = btrfs_del_item(trans, root, path);
2923 if (ret < 0)
2924 btrfs_handle_fs_error(fs_info, ret,
2925 "Failed to delete chunk item.");
2926out:
2927 btrfs_free_path(path);
2928 return ret;
2929}
2930
2931static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2932{
2933 struct btrfs_super_block *super_copy = fs_info->super_copy;
2934 struct btrfs_disk_key *disk_key;
2935 struct btrfs_chunk *chunk;
2936 u8 *ptr;
2937 int ret = 0;
2938 u32 num_stripes;
2939 u32 array_size;
2940 u32 len = 0;
2941 u32 cur;
2942 struct btrfs_key key;
2943
2944 mutex_lock(&fs_info->chunk_mutex);
2945 array_size = btrfs_super_sys_array_size(super_copy);
2946
2947 ptr = super_copy->sys_chunk_array;
2948 cur = 0;
2949
2950 while (cur < array_size) {
2951 disk_key = (struct btrfs_disk_key *)ptr;
2952 btrfs_disk_key_to_cpu(&key, disk_key);
2953
2954 len = sizeof(*disk_key);
2955
2956 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2957 chunk = (struct btrfs_chunk *)(ptr + len);
2958 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2959 len += btrfs_chunk_item_size(num_stripes);
2960 } else {
2961 ret = -EIO;
2962 break;
2963 }
2964 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2965 key.offset == chunk_offset) {
2966 memmove(ptr, ptr + len, array_size - (cur + len));
2967 array_size -= len;
2968 btrfs_set_super_sys_array_size(super_copy, array_size);
2969 } else {
2970 ptr += len;
2971 cur += len;
2972 }
2973 }
2974 mutex_unlock(&fs_info->chunk_mutex);
2975 return ret;
2976}
2977
2978
2979
2980
2981
2982
2983
2984
2985struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
2986 u64 logical, u64 length)
2987{
2988 struct extent_map_tree *em_tree;
2989 struct extent_map *em;
2990
2991 em_tree = &fs_info->mapping_tree;
2992 read_lock(&em_tree->lock);
2993 em = lookup_extent_mapping(em_tree, logical, length);
2994 read_unlock(&em_tree->lock);
2995
2996 if (!em) {
2997 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2998 logical, length);
2999 return ERR_PTR(-EINVAL);
3000 }
3001
3002 if (em->start > logical || em->start + em->len < logical) {
3003 btrfs_crit(fs_info,
3004 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
3005 logical, length, em->start, em->start + em->len);
3006 free_extent_map(em);
3007 return ERR_PTR(-EINVAL);
3008 }
3009
3010
3011 return em;
3012}
3013
3014int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
3015{
3016 struct btrfs_fs_info *fs_info = trans->fs_info;
3017 struct extent_map *em;
3018 struct map_lookup *map;
3019 u64 dev_extent_len = 0;
3020 int i, ret = 0;
3021 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
3022
3023 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
3024 if (IS_ERR(em)) {
3025
3026
3027
3028
3029
3030 ASSERT(0);
3031 return PTR_ERR(em);
3032 }
3033 map = em->map_lookup;
3034 mutex_lock(&fs_info->chunk_mutex);
3035 check_system_chunk(trans, map->type);
3036 mutex_unlock(&fs_info->chunk_mutex);
3037
3038
3039
3040
3041
3042
3043 mutex_lock(&fs_devices->device_list_mutex);
3044 for (i = 0; i < map->num_stripes; i++) {
3045 struct btrfs_device *device = map->stripes[i].dev;
3046 ret = btrfs_free_dev_extent(trans, device,
3047 map->stripes[i].physical,
3048 &dev_extent_len);
3049 if (ret) {
3050 mutex_unlock(&fs_devices->device_list_mutex);
3051 btrfs_abort_transaction(trans, ret);
3052 goto out;
3053 }
3054
3055 if (device->bytes_used > 0) {
3056 mutex_lock(&fs_info->chunk_mutex);
3057 btrfs_device_set_bytes_used(device,
3058 device->bytes_used - dev_extent_len);
3059 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
3060 btrfs_clear_space_info_full(fs_info);
3061 mutex_unlock(&fs_info->chunk_mutex);
3062 }
3063
3064 ret = btrfs_update_device(trans, device);
3065 if (ret) {
3066 mutex_unlock(&fs_devices->device_list_mutex);
3067 btrfs_abort_transaction(trans, ret);
3068 goto out;
3069 }
3070 }
3071 mutex_unlock(&fs_devices->device_list_mutex);
3072
3073 ret = btrfs_free_chunk(trans, chunk_offset);
3074 if (ret) {
3075 btrfs_abort_transaction(trans, ret);
3076 goto out;
3077 }
3078
3079 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
3080
3081 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3082 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
3083 if (ret) {
3084 btrfs_abort_transaction(trans, ret);
3085 goto out;
3086 }
3087 }
3088
3089 ret = btrfs_remove_block_group(trans, chunk_offset, em);
3090 if (ret) {
3091 btrfs_abort_transaction(trans, ret);
3092 goto out;
3093 }
3094
3095out:
3096
3097 free_extent_map(em);
3098 return ret;
3099}
3100
3101static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3102{
3103 struct btrfs_root *root = fs_info->chunk_root;
3104 struct btrfs_trans_handle *trans;
3105 struct btrfs_block_group *block_group;
3106 int ret;
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120 lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
3121
3122
3123 btrfs_scrub_pause(fs_info);
3124 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
3125 btrfs_scrub_continue(fs_info);
3126 if (ret)
3127 return ret;
3128
3129 block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
3130 if (!block_group)
3131 return -ENOENT;
3132 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
3133 btrfs_put_block_group(block_group);
3134
3135 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3136 chunk_offset);
3137 if (IS_ERR(trans)) {
3138 ret = PTR_ERR(trans);
3139 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3140 return ret;
3141 }
3142
3143
3144
3145
3146
3147 ret = btrfs_remove_chunk(trans, chunk_offset);
3148 btrfs_end_transaction(trans);
3149 return ret;
3150}
3151
3152static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3153{
3154 struct btrfs_root *chunk_root = fs_info->chunk_root;
3155 struct btrfs_path *path;
3156 struct extent_buffer *leaf;
3157 struct btrfs_chunk *chunk;
3158 struct btrfs_key key;
3159 struct btrfs_key found_key;
3160 u64 chunk_type;
3161 bool retried = false;
3162 int failed = 0;
3163 int ret;
3164
3165 path = btrfs_alloc_path();
3166 if (!path)
3167 return -ENOMEM;
3168
3169again:
3170 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3171 key.offset = (u64)-1;
3172 key.type = BTRFS_CHUNK_ITEM_KEY;
3173
3174 while (1) {
3175 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3176 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3177 if (ret < 0) {
3178 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3179 goto error;
3180 }
3181 BUG_ON(ret == 0);
3182
3183 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3184 key.type);
3185 if (ret)
3186 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3187 if (ret < 0)
3188 goto error;
3189 if (ret > 0)
3190 break;
3191
3192 leaf = path->nodes[0];
3193 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3194
3195 chunk = btrfs_item_ptr(leaf, path->slots[0],
3196 struct btrfs_chunk);
3197 chunk_type = btrfs_chunk_type(leaf, chunk);
3198 btrfs_release_path(path);
3199
3200 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3201 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3202 if (ret == -ENOSPC)
3203 failed++;
3204 else
3205 BUG_ON(ret);
3206 }
3207 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3208
3209 if (found_key.offset == 0)
3210 break;
3211 key.offset = found_key.offset - 1;
3212 }
3213 ret = 0;
3214 if (failed && !retried) {
3215 failed = 0;
3216 retried = true;
3217 goto again;
3218 } else if (WARN_ON(failed && retried)) {
3219 ret = -ENOSPC;
3220 }
3221error:
3222 btrfs_free_path(path);
3223 return ret;
3224}
3225
3226
3227
3228
3229
3230
3231static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3232 u64 chunk_offset)
3233{
3234 struct btrfs_block_group *cache;
3235 u64 bytes_used;
3236 u64 chunk_type;
3237
3238 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3239 ASSERT(cache);
3240 chunk_type = cache->flags;
3241 btrfs_put_block_group(cache);
3242
3243 if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA))
3244 return 0;
3245
3246 spin_lock(&fs_info->data_sinfo->lock);
3247 bytes_used = fs_info->data_sinfo->bytes_used;
3248 spin_unlock(&fs_info->data_sinfo->lock);
3249
3250 if (!bytes_used) {
3251 struct btrfs_trans_handle *trans;
3252 int ret;
3253
3254 trans = btrfs_join_transaction(fs_info->tree_root);
3255 if (IS_ERR(trans))
3256 return PTR_ERR(trans);
3257
3258 ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA);
3259 btrfs_end_transaction(trans);
3260 if (ret < 0)
3261 return ret;
3262 return 1;
3263 }
3264
3265 return 0;
3266}
3267
3268static int insert_balance_item(struct btrfs_fs_info *fs_info,
3269 struct btrfs_balance_control *bctl)
3270{
3271 struct btrfs_root *root = fs_info->tree_root;
3272 struct btrfs_trans_handle *trans;
3273 struct btrfs_balance_item *item;
3274 struct btrfs_disk_balance_args disk_bargs;
3275 struct btrfs_path *path;
3276 struct extent_buffer *leaf;
3277 struct btrfs_key key;
3278 int ret, err;
3279
3280 path = btrfs_alloc_path();
3281 if (!path)
3282 return -ENOMEM;
3283
3284 trans = btrfs_start_transaction(root, 0);
3285 if (IS_ERR(trans)) {
3286 btrfs_free_path(path);
3287 return PTR_ERR(trans);
3288 }
3289
3290 key.objectid = BTRFS_BALANCE_OBJECTID;
3291 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3292 key.offset = 0;
3293
3294 ret = btrfs_insert_empty_item(trans, root, path, &key,
3295 sizeof(*item));
3296 if (ret)
3297 goto out;
3298
3299 leaf = path->nodes[0];
3300 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3301
3302 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3303
3304 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3305 btrfs_set_balance_data(leaf, item, &disk_bargs);
3306 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3307 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3308 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3309 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3310
3311 btrfs_set_balance_flags(leaf, item, bctl->flags);
3312
3313 btrfs_mark_buffer_dirty(leaf);
3314out:
3315 btrfs_free_path(path);
3316 err = btrfs_commit_transaction(trans);
3317 if (err && !ret)
3318 ret = err;
3319 return ret;
3320}
3321
3322static int del_balance_item(struct btrfs_fs_info *fs_info)
3323{
3324 struct btrfs_root *root = fs_info->tree_root;
3325 struct btrfs_trans_handle *trans;
3326 struct btrfs_path *path;
3327 struct btrfs_key key;
3328 int ret, err;
3329
3330 path = btrfs_alloc_path();
3331 if (!path)
3332 return -ENOMEM;
3333
3334 trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
3335 if (IS_ERR(trans)) {
3336 btrfs_free_path(path);
3337 return PTR_ERR(trans);
3338 }
3339
3340 key.objectid = BTRFS_BALANCE_OBJECTID;
3341 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3342 key.offset = 0;
3343
3344 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3345 if (ret < 0)
3346 goto out;
3347 if (ret > 0) {
3348 ret = -ENOENT;
3349 goto out;
3350 }
3351
3352 ret = btrfs_del_item(trans, root, path);
3353out:
3354 btrfs_free_path(path);
3355 err = btrfs_commit_transaction(trans);
3356 if (err && !ret)
3357 ret = err;
3358 return ret;
3359}
3360
3361
3362
3363
3364
3365static void update_balance_args(struct btrfs_balance_control *bctl)
3366{
3367
3368
3369
3370 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3371 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3372 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3373 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3374 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3375 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3376
3377
3378
3379
3380
3381
3382
3383
3384 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3385 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3386 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3387 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3388 bctl->data.usage = 90;
3389 }
3390 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3391 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3392 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3393 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3394 bctl->sys.usage = 90;
3395 }
3396 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3397 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3398 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3399 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3400 bctl->meta.usage = 90;
3401 }
3402}
3403
3404
3405
3406
3407static void reset_balance_state(struct btrfs_fs_info *fs_info)
3408{
3409 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3410 int ret;
3411
3412 BUG_ON(!fs_info->balance_ctl);
3413
3414 spin_lock(&fs_info->balance_lock);
3415 fs_info->balance_ctl = NULL;
3416 spin_unlock(&fs_info->balance_lock);
3417
3418 kfree(bctl);
3419 ret = del_balance_item(fs_info);
3420 if (ret)
3421 btrfs_handle_fs_error(fs_info, ret, NULL);
3422}
3423
3424
3425
3426
3427
3428static int chunk_profiles_filter(u64 chunk_type,
3429 struct btrfs_balance_args *bargs)
3430{
3431 chunk_type = chunk_to_extended(chunk_type) &
3432 BTRFS_EXTENDED_PROFILE_MASK;
3433
3434 if (bargs->profiles & chunk_type)
3435 return 0;
3436
3437 return 1;
3438}
3439
3440static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3441 struct btrfs_balance_args *bargs)
3442{
3443 struct btrfs_block_group *cache;
3444 u64 chunk_used;
3445 u64 user_thresh_min;
3446 u64 user_thresh_max;
3447 int ret = 1;
3448
3449 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3450 chunk_used = cache->used;
3451
3452 if (bargs->usage_min == 0)
3453 user_thresh_min = 0;
3454 else
3455 user_thresh_min = div_factor_fine(cache->length,
3456 bargs->usage_min);
3457
3458 if (bargs->usage_max == 0)
3459 user_thresh_max = 1;
3460 else if (bargs->usage_max > 100)
3461 user_thresh_max = cache->length;
3462 else
3463 user_thresh_max = div_factor_fine(cache->length,
3464 bargs->usage_max);
3465
3466 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3467 ret = 0;
3468
3469 btrfs_put_block_group(cache);
3470 return ret;
3471}
3472
3473static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3474 u64 chunk_offset, struct btrfs_balance_args *bargs)
3475{
3476 struct btrfs_block_group *cache;
3477 u64 chunk_used, user_thresh;
3478 int ret = 1;
3479
3480 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3481 chunk_used = cache->used;
3482
3483 if (bargs->usage_min == 0)
3484 user_thresh = 1;
3485 else if (bargs->usage > 100)
3486 user_thresh = cache->length;
3487 else
3488 user_thresh = div_factor_fine(cache->length, bargs->usage);
3489
3490 if (chunk_used < user_thresh)
3491 ret = 0;
3492
3493 btrfs_put_block_group(cache);
3494 return ret;
3495}
3496
3497static int chunk_devid_filter(struct extent_buffer *leaf,
3498 struct btrfs_chunk *chunk,
3499 struct btrfs_balance_args *bargs)
3500{
3501 struct btrfs_stripe *stripe;
3502 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3503 int i;
3504
3505 for (i = 0; i < num_stripes; i++) {
3506 stripe = btrfs_stripe_nr(chunk, i);
3507 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3508 return 0;
3509 }
3510
3511 return 1;
3512}
3513
3514static u64 calc_data_stripes(u64 type, int num_stripes)
3515{
3516 const int index = btrfs_bg_flags_to_raid_index(type);
3517 const int ncopies = btrfs_raid_array[index].ncopies;
3518 const int nparity = btrfs_raid_array[index].nparity;
3519
3520 if (nparity)
3521 return num_stripes - nparity;
3522 else
3523 return num_stripes / ncopies;
3524}
3525
3526
3527static int chunk_drange_filter(struct extent_buffer *leaf,
3528 struct btrfs_chunk *chunk,
3529 struct btrfs_balance_args *bargs)
3530{
3531 struct btrfs_stripe *stripe;
3532 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3533 u64 stripe_offset;
3534 u64 stripe_length;
3535 u64 type;
3536 int factor;
3537 int i;
3538
3539 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3540 return 0;
3541
3542 type = btrfs_chunk_type(leaf, chunk);
3543 factor = calc_data_stripes(type, num_stripes);
3544
3545 for (i = 0; i < num_stripes; i++) {
3546 stripe = btrfs_stripe_nr(chunk, i);
3547 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3548 continue;
3549
3550 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3551 stripe_length = btrfs_chunk_length(leaf, chunk);
3552 stripe_length = div_u64(stripe_length, factor);
3553
3554 if (stripe_offset < bargs->pend &&
3555 stripe_offset + stripe_length > bargs->pstart)
3556 return 0;
3557 }
3558
3559 return 1;
3560}
3561
3562
3563static int chunk_vrange_filter(struct extent_buffer *leaf,
3564 struct btrfs_chunk *chunk,
3565 u64 chunk_offset,
3566 struct btrfs_balance_args *bargs)
3567{
3568 if (chunk_offset < bargs->vend &&
3569 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3570
3571 return 0;
3572
3573 return 1;
3574}
3575
3576static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3577 struct btrfs_chunk *chunk,
3578 struct btrfs_balance_args *bargs)
3579{
3580 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3581
3582 if (bargs->stripes_min <= num_stripes
3583 && num_stripes <= bargs->stripes_max)
3584 return 0;
3585
3586 return 1;
3587}
3588
3589static int chunk_soft_convert_filter(u64 chunk_type,
3590 struct btrfs_balance_args *bargs)
3591{
3592 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3593 return 0;
3594
3595 chunk_type = chunk_to_extended(chunk_type) &
3596 BTRFS_EXTENDED_PROFILE_MASK;
3597
3598 if (bargs->target == chunk_type)
3599 return 1;
3600
3601 return 0;
3602}
3603
3604static int should_balance_chunk(struct extent_buffer *leaf,
3605 struct btrfs_chunk *chunk, u64 chunk_offset)
3606{
3607 struct btrfs_fs_info *fs_info = leaf->fs_info;
3608 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3609 struct btrfs_balance_args *bargs = NULL;
3610 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3611
3612
3613 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3614 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3615 return 0;
3616 }
3617
3618 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3619 bargs = &bctl->data;
3620 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3621 bargs = &bctl->sys;
3622 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3623 bargs = &bctl->meta;
3624
3625
3626 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3627 chunk_profiles_filter(chunk_type, bargs)) {
3628 return 0;
3629 }
3630
3631
3632 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3633 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3634 return 0;
3635 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3636 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3637 return 0;
3638 }
3639
3640
3641 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3642 chunk_devid_filter(leaf, chunk, bargs)) {
3643 return 0;
3644 }
3645
3646
3647 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3648 chunk_drange_filter(leaf, chunk, bargs)) {
3649 return 0;
3650 }
3651
3652
3653 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3654 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3655 return 0;
3656 }
3657
3658
3659 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3660 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3661 return 0;
3662 }
3663
3664
3665 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3666 chunk_soft_convert_filter(chunk_type, bargs)) {
3667 return 0;
3668 }
3669
3670
3671
3672
3673 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3674 if (bargs->limit == 0)
3675 return 0;
3676 else
3677 bargs->limit--;
3678 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3679
3680
3681
3682
3683
3684 if (bargs->limit_max == 0)
3685 return 0;
3686 else
3687 bargs->limit_max--;
3688 }
3689
3690 return 1;
3691}
3692
3693static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3694{
3695 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3696 struct btrfs_root *chunk_root = fs_info->chunk_root;
3697 u64 chunk_type;
3698 struct btrfs_chunk *chunk;
3699 struct btrfs_path *path = NULL;
3700 struct btrfs_key key;
3701 struct btrfs_key found_key;
3702 struct extent_buffer *leaf;
3703 int slot;
3704 int ret;
3705 int enospc_errors = 0;
3706 bool counting = true;
3707
3708 u64 limit_data = bctl->data.limit;
3709 u64 limit_meta = bctl->meta.limit;
3710 u64 limit_sys = bctl->sys.limit;
3711 u32 count_data = 0;
3712 u32 count_meta = 0;
3713 u32 count_sys = 0;
3714 int chunk_reserved = 0;
3715
3716 path = btrfs_alloc_path();
3717 if (!path) {
3718 ret = -ENOMEM;
3719 goto error;
3720 }
3721
3722
3723 spin_lock(&fs_info->balance_lock);
3724 memset(&bctl->stat, 0, sizeof(bctl->stat));
3725 spin_unlock(&fs_info->balance_lock);
3726again:
3727 if (!counting) {
3728
3729
3730
3731
3732 bctl->data.limit = limit_data;
3733 bctl->meta.limit = limit_meta;
3734 bctl->sys.limit = limit_sys;
3735 }
3736 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3737 key.offset = (u64)-1;
3738 key.type = BTRFS_CHUNK_ITEM_KEY;
3739
3740 while (1) {
3741 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3742 atomic_read(&fs_info->balance_cancel_req)) {
3743 ret = -ECANCELED;
3744 goto error;
3745 }
3746
3747 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3748 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3749 if (ret < 0) {
3750 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3751 goto error;
3752 }
3753
3754
3755
3756
3757
3758 if (ret == 0)
3759 BUG();
3760
3761 ret = btrfs_previous_item(chunk_root, path, 0,
3762 BTRFS_CHUNK_ITEM_KEY);
3763 if (ret) {
3764 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3765 ret = 0;
3766 break;
3767 }
3768
3769 leaf = path->nodes[0];
3770 slot = path->slots[0];
3771 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3772
3773 if (found_key.objectid != key.objectid) {
3774 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3775 break;
3776 }
3777
3778 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3779 chunk_type = btrfs_chunk_type(leaf, chunk);
3780
3781 if (!counting) {
3782 spin_lock(&fs_info->balance_lock);
3783 bctl->stat.considered++;
3784 spin_unlock(&fs_info->balance_lock);
3785 }
3786
3787 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3788
3789 btrfs_release_path(path);
3790 if (!ret) {
3791 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3792 goto loop;
3793 }
3794
3795 if (counting) {
3796 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3797 spin_lock(&fs_info->balance_lock);
3798 bctl->stat.expected++;
3799 spin_unlock(&fs_info->balance_lock);
3800
3801 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3802 count_data++;
3803 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3804 count_sys++;
3805 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3806 count_meta++;
3807
3808 goto loop;
3809 }
3810
3811
3812
3813
3814
3815 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3816 count_data < bctl->data.limit_min)
3817 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3818 count_meta < bctl->meta.limit_min)
3819 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3820 count_sys < bctl->sys.limit_min)) {
3821 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3822 goto loop;
3823 }
3824
3825 if (!chunk_reserved) {
3826
3827
3828
3829
3830
3831
3832 ret = btrfs_may_alloc_data_chunk(fs_info,
3833 found_key.offset);
3834 if (ret < 0) {
3835 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3836 goto error;
3837 } else if (ret == 1) {
3838 chunk_reserved = 1;
3839 }
3840 }
3841
3842 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3843 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3844 if (ret == -ENOSPC) {
3845 enospc_errors++;
3846 } else if (ret == -ETXTBSY) {
3847 btrfs_info(fs_info,
3848 "skipping relocation of block group %llu due to active swapfile",
3849 found_key.offset);
3850 ret = 0;
3851 } else if (ret) {
3852 goto error;
3853 } else {
3854 spin_lock(&fs_info->balance_lock);
3855 bctl->stat.completed++;
3856 spin_unlock(&fs_info->balance_lock);
3857 }
3858loop:
3859 if (found_key.offset == 0)
3860 break;
3861 key.offset = found_key.offset - 1;
3862 }
3863
3864 if (counting) {
3865 btrfs_release_path(path);
3866 counting = false;
3867 goto again;
3868 }
3869error:
3870 btrfs_free_path(path);
3871 if (enospc_errors) {
3872 btrfs_info(fs_info, "%d enospc errors during balance",
3873 enospc_errors);
3874 if (!ret)
3875 ret = -ENOSPC;
3876 }
3877
3878 return ret;
3879}
3880
3881
3882
3883
3884
3885
3886static int alloc_profile_is_valid(u64 flags, int extended)
3887{
3888 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3889 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3890
3891 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3892
3893
3894 if (flags & ~mask)
3895 return 0;
3896
3897
3898 if (flags == 0)
3899 return !extended;
3900
3901 return has_single_bit_set(flags);
3902}
3903
3904static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3905{
3906
3907 return atomic_read(&fs_info->balance_cancel_req) ||
3908 (atomic_read(&fs_info->balance_pause_req) == 0 &&
3909 atomic_read(&fs_info->balance_cancel_req) == 0);
3910}
3911
3912
3913
3914
3915
3916static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
3917 const struct btrfs_balance_args *bargs,
3918 u64 allowed, const char *type)
3919{
3920 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3921 return true;
3922
3923
3924 if (alloc_profile_is_valid(bargs->target, 1) &&
3925 (bargs->target & ~allowed) == 0)
3926 return true;
3927
3928 btrfs_err(fs_info, "balance: invalid convert %s profile %s",
3929 type, btrfs_bg_type_to_raid_name(bargs->target));
3930 return false;
3931}
3932
3933
3934
3935
3936
3937
3938static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
3939 u32 size_buf)
3940{
3941 int ret;
3942 u32 size_bp = size_buf;
3943 char *bp = buf;
3944 u64 flags = bargs->flags;
3945 char tmp_buf[128] = {'\0'};
3946
3947 if (!flags)
3948 return;
3949
3950#define CHECK_APPEND_NOARG(a) \
3951 do { \
3952 ret = snprintf(bp, size_bp, (a)); \
3953 if (ret < 0 || ret >= size_bp) \
3954 goto out_overflow; \
3955 size_bp -= ret; \
3956 bp += ret; \
3957 } while (0)
3958
3959#define CHECK_APPEND_1ARG(a, v1) \
3960 do { \
3961 ret = snprintf(bp, size_bp, (a), (v1)); \
3962 if (ret < 0 || ret >= size_bp) \
3963 goto out_overflow; \
3964 size_bp -= ret; \
3965 bp += ret; \
3966 } while (0)
3967
3968#define CHECK_APPEND_2ARG(a, v1, v2) \
3969 do { \
3970 ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
3971 if (ret < 0 || ret >= size_bp) \
3972 goto out_overflow; \
3973 size_bp -= ret; \
3974 bp += ret; \
3975 } while (0)
3976
3977 if (flags & BTRFS_BALANCE_ARGS_CONVERT)
3978 CHECK_APPEND_1ARG("convert=%s,",
3979 btrfs_bg_type_to_raid_name(bargs->target));
3980
3981 if (flags & BTRFS_BALANCE_ARGS_SOFT)
3982 CHECK_APPEND_NOARG("soft,");
3983
3984 if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
3985 btrfs_describe_block_groups(bargs->profiles, tmp_buf,
3986 sizeof(tmp_buf));
3987 CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
3988 }
3989
3990 if (flags & BTRFS_BALANCE_ARGS_USAGE)
3991 CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
3992
3993 if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
3994 CHECK_APPEND_2ARG("usage=%u..%u,",
3995 bargs->usage_min, bargs->usage_max);
3996
3997 if (flags & BTRFS_BALANCE_ARGS_DEVID)
3998 CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
3999
4000 if (flags & BTRFS_BALANCE_ARGS_DRANGE)
4001 CHECK_APPEND_2ARG("drange=%llu..%llu,",
4002 bargs->pstart, bargs->pend);
4003
4004 if (flags & BTRFS_BALANCE_ARGS_VRANGE)
4005 CHECK_APPEND_2ARG("vrange=%llu..%llu,",
4006 bargs->vstart, bargs->vend);
4007
4008 if (flags & BTRFS_BALANCE_ARGS_LIMIT)
4009 CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
4010
4011 if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
4012 CHECK_APPEND_2ARG("limit=%u..%u,",
4013 bargs->limit_min, bargs->limit_max);
4014
4015 if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
4016 CHECK_APPEND_2ARG("stripes=%u..%u,",
4017 bargs->stripes_min, bargs->stripes_max);
4018
4019#undef CHECK_APPEND_2ARG
4020#undef CHECK_APPEND_1ARG
4021#undef CHECK_APPEND_NOARG
4022
4023out_overflow:
4024
4025 if (size_bp < size_buf)
4026 buf[size_buf - size_bp - 1] = '\0';
4027 else
4028 buf[0] = '\0';
4029}
4030
4031static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
4032{
4033 u32 size_buf = 1024;
4034 char tmp_buf[192] = {'\0'};
4035 char *buf;
4036 char *bp;
4037 u32 size_bp = size_buf;
4038 int ret;
4039 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4040
4041 buf = kzalloc(size_buf, GFP_KERNEL);
4042 if (!buf)
4043 return;
4044
4045 bp = buf;
4046
4047#define CHECK_APPEND_1ARG(a, v1) \
4048 do { \
4049 ret = snprintf(bp, size_bp, (a), (v1)); \
4050 if (ret < 0 || ret >= size_bp) \
4051 goto out_overflow; \
4052 size_bp -= ret; \
4053 bp += ret; \
4054 } while (0)
4055
4056 if (bctl->flags & BTRFS_BALANCE_FORCE)
4057 CHECK_APPEND_1ARG("%s", "-f ");
4058
4059 if (bctl->flags & BTRFS_BALANCE_DATA) {
4060 describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
4061 CHECK_APPEND_1ARG("-d%s ", tmp_buf);
4062 }
4063
4064 if (bctl->flags & BTRFS_BALANCE_METADATA) {
4065 describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
4066 CHECK_APPEND_1ARG("-m%s ", tmp_buf);
4067 }
4068
4069 if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
4070 describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
4071 CHECK_APPEND_1ARG("-s%s ", tmp_buf);
4072 }
4073
4074#undef CHECK_APPEND_1ARG
4075
4076out_overflow:
4077
4078 if (size_bp < size_buf)
4079 buf[size_buf - size_bp - 1] = '\0';
4080 btrfs_info(fs_info, "balance: %s %s",
4081 (bctl->flags & BTRFS_BALANCE_RESUME) ?
4082 "resume" : "start", buf);
4083
4084 kfree(buf);
4085}
4086
4087
4088
4089
4090int btrfs_balance(struct btrfs_fs_info *fs_info,
4091 struct btrfs_balance_control *bctl,
4092 struct btrfs_ioctl_balance_args *bargs)
4093{
4094 u64 meta_target, data_target;
4095 u64 allowed;
4096 int mixed = 0;
4097 int ret;
4098 u64 num_devices;
4099 unsigned seq;
4100 bool reducing_redundancy;
4101 int i;
4102
4103 if (btrfs_fs_closing(fs_info) ||
4104 atomic_read(&fs_info->balance_pause_req) ||
4105 btrfs_should_cancel_balance(fs_info)) {
4106 ret = -EINVAL;
4107 goto out;
4108 }
4109
4110 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
4111 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
4112 mixed = 1;
4113
4114
4115
4116
4117
4118 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
4119 if (mixed && (bctl->flags & allowed)) {
4120 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
4121 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
4122 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
4123 btrfs_err(fs_info,
4124 "balance: mixed groups data and metadata options must be the same");
4125 ret = -EINVAL;
4126 goto out;
4127 }
4128 }
4129
4130
4131
4132
4133
4134 num_devices = fs_info->fs_devices->rw_devices;
4135
4136
4137
4138
4139
4140
4141 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
4142 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
4143 if (num_devices >= btrfs_raid_array[i].devs_min)
4144 allowed |= btrfs_raid_array[i].bg_flag;
4145
4146 if (!validate_convert_profile(fs_info, &bctl->data, allowed, "data") ||
4147 !validate_convert_profile(fs_info, &bctl->meta, allowed, "metadata") ||
4148 !validate_convert_profile(fs_info, &bctl->sys, allowed, "system")) {
4149 ret = -EINVAL;
4150 goto out;
4151 }
4152
4153
4154
4155
4156
4157 allowed = 0;
4158 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
4159 if (btrfs_raid_array[i].ncopies >= 2 ||
4160 btrfs_raid_array[i].tolerated_failures >= 1)
4161 allowed |= btrfs_raid_array[i].bg_flag;
4162 }
4163 do {
4164 seq = read_seqbegin(&fs_info->profiles_lock);
4165
4166 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4167 (fs_info->avail_system_alloc_bits & allowed) &&
4168 !(bctl->sys.target & allowed)) ||
4169 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4170 (fs_info->avail_metadata_alloc_bits & allowed) &&
4171 !(bctl->meta.target & allowed)))
4172 reducing_redundancy = true;
4173 else
4174 reducing_redundancy = false;
4175
4176
4177 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4178 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
4179 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4180 bctl->data.target : fs_info->avail_data_alloc_bits;
4181 } while (read_seqretry(&fs_info->profiles_lock, seq));
4182
4183 if (reducing_redundancy) {
4184 if (bctl->flags & BTRFS_BALANCE_FORCE) {
4185 btrfs_info(fs_info,
4186 "balance: force reducing metadata redundancy");
4187 } else {
4188 btrfs_err(fs_info,
4189 "balance: reduces metadata redundancy, use --force if you want this");
4190 ret = -EINVAL;
4191 goto out;
4192 }
4193 }
4194
4195 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
4196 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
4197 btrfs_warn(fs_info,
4198 "balance: metadata profile %s has lower redundancy than data profile %s",
4199 btrfs_bg_type_to_raid_name(meta_target),
4200 btrfs_bg_type_to_raid_name(data_target));
4201 }
4202
4203 if (fs_info->send_in_progress) {
4204 btrfs_warn_rl(fs_info,
4205"cannot run balance while send operations are in progress (%d in progress)",
4206 fs_info->send_in_progress);
4207 ret = -EAGAIN;
4208 goto out;
4209 }
4210
4211 ret = insert_balance_item(fs_info, bctl);
4212 if (ret && ret != -EEXIST)
4213 goto out;
4214
4215 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
4216 BUG_ON(ret == -EEXIST);
4217 BUG_ON(fs_info->balance_ctl);
4218 spin_lock(&fs_info->balance_lock);
4219 fs_info->balance_ctl = bctl;
4220 spin_unlock(&fs_info->balance_lock);
4221 } else {
4222 BUG_ON(ret != -EEXIST);
4223 spin_lock(&fs_info->balance_lock);
4224 update_balance_args(bctl);
4225 spin_unlock(&fs_info->balance_lock);
4226 }
4227
4228 ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4229 set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4230 describe_balance_start_or_resume(fs_info);
4231 mutex_unlock(&fs_info->balance_mutex);
4232
4233 ret = __btrfs_balance(fs_info);
4234
4235 mutex_lock(&fs_info->balance_mutex);
4236 if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
4237 btrfs_info(fs_info, "balance: paused");
4238
4239
4240
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251
4252
4253 else if (ret == -ECANCELED || ret == -EINTR)
4254 btrfs_info(fs_info, "balance: canceled");
4255 else
4256 btrfs_info(fs_info, "balance: ended with status: %d", ret);
4257
4258 clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4259
4260 if (bargs) {
4261 memset(bargs, 0, sizeof(*bargs));
4262 btrfs_update_ioctl_balance_args(fs_info, bargs);
4263 }
4264
4265 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4266 balance_need_close(fs_info)) {
4267 reset_balance_state(fs_info);
4268 btrfs_exclop_finish(fs_info);
4269 }
4270
4271 wake_up(&fs_info->balance_wait_q);
4272
4273 return ret;
4274out:
4275 if (bctl->flags & BTRFS_BALANCE_RESUME)
4276 reset_balance_state(fs_info);
4277 else
4278 kfree(bctl);
4279 btrfs_exclop_finish(fs_info);
4280
4281 return ret;
4282}
4283
4284static int balance_kthread(void *data)
4285{
4286 struct btrfs_fs_info *fs_info = data;
4287 int ret = 0;
4288
4289 mutex_lock(&fs_info->balance_mutex);
4290 if (fs_info->balance_ctl)
4291 ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
4292 mutex_unlock(&fs_info->balance_mutex);
4293
4294 return ret;
4295}
4296
4297int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4298{
4299 struct task_struct *tsk;
4300
4301 mutex_lock(&fs_info->balance_mutex);
4302 if (!fs_info->balance_ctl) {
4303 mutex_unlock(&fs_info->balance_mutex);
4304 return 0;
4305 }
4306 mutex_unlock(&fs_info->balance_mutex);
4307
4308 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4309 btrfs_info(fs_info, "balance: resume skipped");
4310 return 0;
4311 }
4312
4313
4314
4315
4316
4317
4318 spin_lock(&fs_info->balance_lock);
4319 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4320 spin_unlock(&fs_info->balance_lock);
4321
4322 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4323 return PTR_ERR_OR_ZERO(tsk);
4324}
4325
4326int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4327{
4328 struct btrfs_balance_control *bctl;
4329 struct btrfs_balance_item *item;
4330 struct btrfs_disk_balance_args disk_bargs;
4331 struct btrfs_path *path;
4332 struct extent_buffer *leaf;
4333 struct btrfs_key key;
4334 int ret;
4335
4336 path = btrfs_alloc_path();
4337 if (!path)
4338 return -ENOMEM;
4339
4340 key.objectid = BTRFS_BALANCE_OBJECTID;
4341 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4342 key.offset = 0;
4343
4344 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4345 if (ret < 0)
4346 goto out;
4347 if (ret > 0) {
4348 ret = 0;
4349 goto out;
4350 }
4351
4352 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4353 if (!bctl) {
4354 ret = -ENOMEM;
4355 goto out;
4356 }
4357
4358 leaf = path->nodes[0];
4359 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4360
4361 bctl->flags = btrfs_balance_flags(leaf, item);
4362 bctl->flags |= BTRFS_BALANCE_RESUME;
4363
4364 btrfs_balance_data(leaf, item, &disk_bargs);
4365 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4366 btrfs_balance_meta(leaf, item, &disk_bargs);
4367 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4368 btrfs_balance_sys(leaf, item, &disk_bargs);
4369 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
4382 btrfs_warn(fs_info,
4383 "balance: cannot set exclusive op status, resume manually");
4384
4385 btrfs_release_path(path);
4386
4387 mutex_lock(&fs_info->balance_mutex);
4388 BUG_ON(fs_info->balance_ctl);
4389 spin_lock(&fs_info->balance_lock);
4390 fs_info->balance_ctl = bctl;
4391 spin_unlock(&fs_info->balance_lock);
4392 mutex_unlock(&fs_info->balance_mutex);
4393out:
4394 btrfs_free_path(path);
4395 return ret;
4396}
4397
4398int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4399{
4400 int ret = 0;
4401
4402 mutex_lock(&fs_info->balance_mutex);
4403 if (!fs_info->balance_ctl) {
4404 mutex_unlock(&fs_info->balance_mutex);
4405 return -ENOTCONN;
4406 }
4407
4408 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4409 atomic_inc(&fs_info->balance_pause_req);
4410 mutex_unlock(&fs_info->balance_mutex);
4411
4412 wait_event(fs_info->balance_wait_q,
4413 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4414
4415 mutex_lock(&fs_info->balance_mutex);
4416
4417 BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4418 atomic_dec(&fs_info->balance_pause_req);
4419 } else {
4420 ret = -ENOTCONN;
4421 }
4422
4423 mutex_unlock(&fs_info->balance_mutex);
4424 return ret;
4425}
4426
4427int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4428{
4429 mutex_lock(&fs_info->balance_mutex);
4430 if (!fs_info->balance_ctl) {
4431 mutex_unlock(&fs_info->balance_mutex);
4432 return -ENOTCONN;
4433 }
4434
4435
4436
4437
4438
4439
4440 if (sb_rdonly(fs_info->sb)) {
4441 mutex_unlock(&fs_info->balance_mutex);
4442 return -EROFS;
4443 }
4444
4445 atomic_inc(&fs_info->balance_cancel_req);
4446
4447
4448
4449
4450 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4451 mutex_unlock(&fs_info->balance_mutex);
4452 wait_event(fs_info->balance_wait_q,
4453 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4454 mutex_lock(&fs_info->balance_mutex);
4455 } else {
4456 mutex_unlock(&fs_info->balance_mutex);
4457
4458
4459
4460
4461 mutex_lock(&fs_info->balance_mutex);
4462
4463 if (fs_info->balance_ctl) {
4464 reset_balance_state(fs_info);
4465 btrfs_exclop_finish(fs_info);
4466 btrfs_info(fs_info, "balance: canceled");
4467 }
4468 }
4469
4470 BUG_ON(fs_info->balance_ctl ||
4471 test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4472 atomic_dec(&fs_info->balance_cancel_req);
4473 mutex_unlock(&fs_info->balance_mutex);
4474 return 0;
4475}
4476
4477int btrfs_uuid_scan_kthread(void *data)
4478{
4479 struct btrfs_fs_info *fs_info = data;
4480 struct btrfs_root *root = fs_info->tree_root;
4481 struct btrfs_key key;
4482 struct btrfs_path *path = NULL;
4483 int ret = 0;
4484 struct extent_buffer *eb;
4485 int slot;
4486 struct btrfs_root_item root_item;
4487 u32 item_size;
4488 struct btrfs_trans_handle *trans = NULL;
4489 bool closing = false;
4490
4491 path = btrfs_alloc_path();
4492 if (!path) {
4493 ret = -ENOMEM;
4494 goto out;
4495 }
4496
4497 key.objectid = 0;
4498 key.type = BTRFS_ROOT_ITEM_KEY;
4499 key.offset = 0;
4500
4501 while (1) {
4502 if (btrfs_fs_closing(fs_info)) {
4503 closing = true;
4504 break;
4505 }
4506 ret = btrfs_search_forward(root, &key, path,
4507 BTRFS_OLDEST_GENERATION);
4508 if (ret) {
4509 if (ret > 0)
4510 ret = 0;
4511 break;
4512 }
4513
4514 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4515 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4516 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4517 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4518 goto skip;
4519
4520 eb = path->nodes[0];
4521 slot = path->slots[0];
4522 item_size = btrfs_item_size_nr(eb, slot);
4523 if (item_size < sizeof(root_item))
4524 goto skip;
4525
4526 read_extent_buffer(eb, &root_item,
4527 btrfs_item_ptr_offset(eb, slot),
4528 (int)sizeof(root_item));
4529 if (btrfs_root_refs(&root_item) == 0)
4530 goto skip;
4531
4532 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4533 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4534 if (trans)
4535 goto update_tree;
4536
4537 btrfs_release_path(path);
4538
4539
4540
4541
4542 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4543 if (IS_ERR(trans)) {
4544 ret = PTR_ERR(trans);
4545 break;
4546 }
4547 continue;
4548 } else {
4549 goto skip;
4550 }
4551update_tree:
4552 btrfs_release_path(path);
4553 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4554 ret = btrfs_uuid_tree_add(trans, root_item.uuid,
4555 BTRFS_UUID_KEY_SUBVOL,
4556 key.objectid);
4557 if (ret < 0) {
4558 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4559 ret);
4560 break;
4561 }
4562 }
4563
4564 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4565 ret = btrfs_uuid_tree_add(trans,
4566 root_item.received_uuid,
4567 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4568 key.objectid);
4569 if (ret < 0) {
4570 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4571 ret);
4572 break;
4573 }
4574 }
4575
4576skip:
4577 btrfs_release_path(path);
4578 if (trans) {
4579 ret = btrfs_end_transaction(trans);
4580 trans = NULL;
4581 if (ret)
4582 break;
4583 }
4584
4585 if (key.offset < (u64)-1) {
4586 key.offset++;
4587 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4588 key.offset = 0;
4589 key.type = BTRFS_ROOT_ITEM_KEY;
4590 } else if (key.objectid < (u64)-1) {
4591 key.offset = 0;
4592 key.type = BTRFS_ROOT_ITEM_KEY;
4593 key.objectid++;
4594 } else {
4595 break;
4596 }
4597 cond_resched();
4598 }
4599
4600out:
4601 btrfs_free_path(path);
4602 if (trans && !IS_ERR(trans))
4603 btrfs_end_transaction(trans);
4604 if (ret)
4605 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4606 else if (!closing)
4607 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4608 up(&fs_info->uuid_tree_rescan_sem);
4609 return 0;
4610}
4611
4612int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4613{
4614 struct btrfs_trans_handle *trans;
4615 struct btrfs_root *tree_root = fs_info->tree_root;
4616 struct btrfs_root *uuid_root;
4617 struct task_struct *task;
4618 int ret;
4619
4620
4621
4622
4623
4624 trans = btrfs_start_transaction(tree_root, 2);
4625 if (IS_ERR(trans))
4626 return PTR_ERR(trans);
4627
4628 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4629 if (IS_ERR(uuid_root)) {
4630 ret = PTR_ERR(uuid_root);
4631 btrfs_abort_transaction(trans, ret);
4632 btrfs_end_transaction(trans);
4633 return ret;
4634 }
4635
4636 fs_info->uuid_root = uuid_root;
4637
4638 ret = btrfs_commit_transaction(trans);
4639 if (ret)
4640 return ret;
4641
4642 down(&fs_info->uuid_tree_rescan_sem);
4643 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4644 if (IS_ERR(task)) {
4645
4646 btrfs_warn(fs_info, "failed to start uuid_scan task");
4647 up(&fs_info->uuid_tree_rescan_sem);
4648 return PTR_ERR(task);
4649 }
4650
4651 return 0;
4652}
4653
4654
4655
4656
4657
4658
4659int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4660{
4661 struct btrfs_fs_info *fs_info = device->fs_info;
4662 struct btrfs_root *root = fs_info->dev_root;
4663 struct btrfs_trans_handle *trans;
4664 struct btrfs_dev_extent *dev_extent = NULL;
4665 struct btrfs_path *path;
4666 u64 length;
4667 u64 chunk_offset;
4668 int ret;
4669 int slot;
4670 int failed = 0;
4671 bool retried = false;
4672 struct extent_buffer *l;
4673 struct btrfs_key key;
4674 struct btrfs_super_block *super_copy = fs_info->super_copy;
4675 u64 old_total = btrfs_super_total_bytes(super_copy);
4676 u64 old_size = btrfs_device_get_total_bytes(device);
4677 u64 diff;
4678 u64 start;
4679
4680 new_size = round_down(new_size, fs_info->sectorsize);
4681 start = new_size;
4682 diff = round_down(old_size - new_size, fs_info->sectorsize);
4683
4684 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4685 return -EINVAL;
4686
4687 path = btrfs_alloc_path();
4688 if (!path)
4689 return -ENOMEM;
4690
4691 path->reada = READA_BACK;
4692
4693 trans = btrfs_start_transaction(root, 0);
4694 if (IS_ERR(trans)) {
4695 btrfs_free_path(path);
4696 return PTR_ERR(trans);
4697 }
4698
4699 mutex_lock(&fs_info->chunk_mutex);
4700
4701 btrfs_device_set_total_bytes(device, new_size);
4702 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4703 device->fs_devices->total_rw_bytes -= diff;
4704 atomic64_sub(diff, &fs_info->free_chunk_space);
4705 }
4706
4707
4708
4709
4710
4711
4712 if (contains_pending_extent(device, &start, diff)) {
4713 mutex_unlock(&fs_info->chunk_mutex);
4714 ret = btrfs_commit_transaction(trans);
4715 if (ret)
4716 goto done;
4717 } else {
4718 mutex_unlock(&fs_info->chunk_mutex);
4719 btrfs_end_transaction(trans);
4720 }
4721
4722again:
4723 key.objectid = device->devid;
4724 key.offset = (u64)-1;
4725 key.type = BTRFS_DEV_EXTENT_KEY;
4726
4727 do {
4728 mutex_lock(&fs_info->delete_unused_bgs_mutex);
4729 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4730 if (ret < 0) {
4731 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4732 goto done;
4733 }
4734
4735 ret = btrfs_previous_item(root, path, 0, key.type);
4736 if (ret) {
4737 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4738 if (ret < 0)
4739 goto done;
4740 ret = 0;
4741 btrfs_release_path(path);
4742 break;
4743 }
4744
4745 l = path->nodes[0];
4746 slot = path->slots[0];
4747 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4748
4749 if (key.objectid != device->devid) {
4750 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4751 btrfs_release_path(path);
4752 break;
4753 }
4754
4755 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4756 length = btrfs_dev_extent_length(l, dev_extent);
4757
4758 if (key.offset + length <= new_size) {
4759 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4760 btrfs_release_path(path);
4761 break;
4762 }
4763
4764 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4765 btrfs_release_path(path);
4766
4767
4768
4769
4770
4771
4772
4773 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4774 if (ret < 0) {
4775 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4776 goto done;
4777 }
4778
4779 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4780 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4781 if (ret == -ENOSPC) {
4782 failed++;
4783 } else if (ret) {
4784 if (ret == -ETXTBSY) {
4785 btrfs_warn(fs_info,
4786 "could not shrink block group %llu due to active swapfile",
4787 chunk_offset);
4788 }
4789 goto done;
4790 }
4791 } while (key.offset-- > 0);
4792
4793 if (failed && !retried) {
4794 failed = 0;
4795 retried = true;
4796 goto again;
4797 } else if (failed && retried) {
4798 ret = -ENOSPC;
4799 goto done;
4800 }
4801
4802
4803 trans = btrfs_start_transaction(root, 0);
4804 if (IS_ERR(trans)) {
4805 ret = PTR_ERR(trans);
4806 goto done;
4807 }
4808
4809 mutex_lock(&fs_info->chunk_mutex);
4810
4811 clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
4812 CHUNK_STATE_MASK);
4813
4814 btrfs_device_set_disk_total_bytes(device, new_size);
4815 if (list_empty(&device->post_commit_list))
4816 list_add_tail(&device->post_commit_list,
4817 &trans->transaction->dev_update_list);
4818
4819 WARN_ON(diff > old_total);
4820 btrfs_set_super_total_bytes(super_copy,
4821 round_down(old_total - diff, fs_info->sectorsize));
4822 mutex_unlock(&fs_info->chunk_mutex);
4823
4824
4825 ret = btrfs_update_device(trans, device);
4826 if (ret < 0) {
4827 btrfs_abort_transaction(trans, ret);
4828 btrfs_end_transaction(trans);
4829 } else {
4830 ret = btrfs_commit_transaction(trans);
4831 }
4832done:
4833 btrfs_free_path(path);
4834 if (ret) {
4835 mutex_lock(&fs_info->chunk_mutex);
4836 btrfs_device_set_total_bytes(device, old_size);
4837 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4838 device->fs_devices->total_rw_bytes += diff;
4839 atomic64_add(diff, &fs_info->free_chunk_space);
4840 mutex_unlock(&fs_info->chunk_mutex);
4841 }
4842 return ret;
4843}
4844
4845static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4846 struct btrfs_key *key,
4847 struct btrfs_chunk *chunk, int item_size)
4848{
4849 struct btrfs_super_block *super_copy = fs_info->super_copy;
4850 struct btrfs_disk_key disk_key;
4851 u32 array_size;
4852 u8 *ptr;
4853
4854 mutex_lock(&fs_info->chunk_mutex);
4855 array_size = btrfs_super_sys_array_size(super_copy);
4856 if (array_size + item_size + sizeof(disk_key)
4857 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
4858 mutex_unlock(&fs_info->chunk_mutex);
4859 return -EFBIG;
4860 }
4861
4862 ptr = super_copy->sys_chunk_array + array_size;
4863 btrfs_cpu_key_to_disk(&disk_key, key);
4864 memcpy(ptr, &disk_key, sizeof(disk_key));
4865 ptr += sizeof(disk_key);
4866 memcpy(ptr, chunk, item_size);
4867 item_size += sizeof(disk_key);
4868 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4869 mutex_unlock(&fs_info->chunk_mutex);
4870
4871 return 0;
4872}
4873
4874
4875
4876
4877static int btrfs_cmp_device_info(const void *a, const void *b)
4878{
4879 const struct btrfs_device_info *di_a = a;
4880 const struct btrfs_device_info *di_b = b;
4881
4882 if (di_a->max_avail > di_b->max_avail)
4883 return -1;
4884 if (di_a->max_avail < di_b->max_avail)
4885 return 1;
4886 if (di_a->total_avail > di_b->total_avail)
4887 return -1;
4888 if (di_a->total_avail < di_b->total_avail)
4889 return 1;
4890 return 0;
4891}
4892
4893static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4894{
4895 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4896 return;
4897
4898 btrfs_set_fs_incompat(info, RAID56);
4899}
4900
4901static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
4902{
4903 if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)))
4904 return;
4905
4906 btrfs_set_fs_incompat(info, RAID1C34);
4907}
4908
4909
4910
4911
4912
4913struct alloc_chunk_ctl {
4914 u64 start;
4915 u64 type;
4916
4917 int num_stripes;
4918
4919 int sub_stripes;
4920
4921 int dev_stripes;
4922
4923 int devs_max;
4924
4925 int devs_min;
4926
4927 int devs_increment;
4928
4929 int ncopies;
4930
4931 int nparity;
4932 u64 max_stripe_size;
4933 u64 max_chunk_size;
4934 u64 dev_extent_min;
4935 u64 stripe_size;
4936 u64 chunk_size;
4937 int ndevs;
4938};
4939
4940static void init_alloc_chunk_ctl_policy_regular(
4941 struct btrfs_fs_devices *fs_devices,
4942 struct alloc_chunk_ctl *ctl)
4943{
4944 u64 type = ctl->type;
4945
4946 if (type & BTRFS_BLOCK_GROUP_DATA) {
4947 ctl->max_stripe_size = SZ_1G;
4948 ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
4949 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
4950
4951 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
4952 ctl->max_stripe_size = SZ_1G;
4953 else
4954 ctl->max_stripe_size = SZ_256M;
4955 ctl->max_chunk_size = ctl->max_stripe_size;
4956 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
4957 ctl->max_stripe_size = SZ_32M;
4958 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
4959 ctl->devs_max = min_t(int, ctl->devs_max,
4960 BTRFS_MAX_DEVS_SYS_CHUNK);
4961 } else {
4962 BUG();
4963 }
4964
4965
4966 ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
4967 ctl->max_chunk_size);
4968 ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
4969}
4970
4971static void init_alloc_chunk_ctl_policy_zoned(
4972 struct btrfs_fs_devices *fs_devices,
4973 struct alloc_chunk_ctl *ctl)
4974{
4975 u64 zone_size = fs_devices->fs_info->zone_size;
4976 u64 limit;
4977 int min_num_stripes = ctl->devs_min * ctl->dev_stripes;
4978 int min_data_stripes = (min_num_stripes - ctl->nparity) / ctl->ncopies;
4979 u64 min_chunk_size = min_data_stripes * zone_size;
4980 u64 type = ctl->type;
4981
4982 ctl->max_stripe_size = zone_size;
4983 if (type & BTRFS_BLOCK_GROUP_DATA) {
4984 ctl->max_chunk_size = round_down(BTRFS_MAX_DATA_CHUNK_SIZE,
4985 zone_size);
4986 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
4987 ctl->max_chunk_size = ctl->max_stripe_size;
4988 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
4989 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
4990 ctl->devs_max = min_t(int, ctl->devs_max,
4991 BTRFS_MAX_DEVS_SYS_CHUNK);
4992 }
4993
4994
4995 limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
4996 zone_size),
4997 min_chunk_size);
4998 ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
4999 ctl->dev_extent_min = zone_size * ctl->dev_stripes;
5000}
5001
5002static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
5003 struct alloc_chunk_ctl *ctl)
5004{
5005 int index = btrfs_bg_flags_to_raid_index(ctl->type);
5006
5007 ctl->sub_stripes = btrfs_raid_array[index].sub_stripes;
5008 ctl->dev_stripes = btrfs_raid_array[index].dev_stripes;
5009 ctl->devs_max = btrfs_raid_array[index].devs_max;
5010 if (!ctl->devs_max)
5011 ctl->devs_max = BTRFS_MAX_DEVS(fs_devices->fs_info);
5012 ctl->devs_min = btrfs_raid_array[index].devs_min;
5013 ctl->devs_increment = btrfs_raid_array[index].devs_increment;
5014 ctl->ncopies = btrfs_raid_array[index].ncopies;
5015 ctl->nparity = btrfs_raid_array[index].nparity;
5016 ctl->ndevs = 0;
5017
5018 switch (fs_devices->chunk_alloc_policy) {
5019 case BTRFS_CHUNK_ALLOC_REGULAR:
5020 init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
5021 break;
5022 case BTRFS_CHUNK_ALLOC_ZONED:
5023 init_alloc_chunk_ctl_policy_zoned(fs_devices, ctl);
5024 break;
5025 default:
5026 BUG();
5027 }
5028}
5029
5030static int gather_device_info(struct btrfs_fs_devices *fs_devices,
5031 struct alloc_chunk_ctl *ctl,
5032 struct btrfs_device_info *devices_info)
5033{
5034 struct btrfs_fs_info *info = fs_devices->fs_info;
5035 struct btrfs_device *device;
5036 u64 total_avail;
5037 u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes;
5038 int ret;
5039 int ndevs = 0;
5040 u64 max_avail;
5041 u64 dev_offset;
5042
5043
5044
5045
5046
5047 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
5048 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
5049 WARN(1, KERN_ERR
5050 "BTRFS: read-only device in alloc_list\n");
5051 continue;
5052 }
5053
5054 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
5055 &device->dev_state) ||
5056 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
5057 continue;
5058
5059 if (device->total_bytes > device->bytes_used)
5060 total_avail = device->total_bytes - device->bytes_used;
5061 else
5062 total_avail = 0;
5063
5064
5065 if (total_avail < ctl->dev_extent_min)
5066 continue;
5067
5068 ret = find_free_dev_extent(device, dev_extent_want, &dev_offset,
5069 &max_avail);
5070 if (ret && ret != -ENOSPC)
5071 return ret;
5072
5073 if (ret == 0)
5074 max_avail = dev_extent_want;
5075
5076 if (max_avail < ctl->dev_extent_min) {
5077 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5078 btrfs_debug(info,
5079 "%s: devid %llu has no free space, have=%llu want=%llu",
5080 __func__, device->devid, max_avail,
5081 ctl->dev_extent_min);
5082 continue;
5083 }
5084
5085 if (ndevs == fs_devices->rw_devices) {
5086 WARN(1, "%s: found more than %llu devices\n",
5087 __func__, fs_devices->rw_devices);
5088 break;
5089 }
5090 devices_info[ndevs].dev_offset = dev_offset;
5091 devices_info[ndevs].max_avail = max_avail;
5092 devices_info[ndevs].total_avail = total_avail;
5093 devices_info[ndevs].dev = device;
5094 ++ndevs;
5095 }
5096 ctl->ndevs = ndevs;
5097
5098
5099
5100
5101 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
5102 btrfs_cmp_device_info, NULL);
5103
5104 return 0;
5105}
5106
5107static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
5108 struct btrfs_device_info *devices_info)
5109{
5110
5111 int data_stripes;
5112
5113
5114
5115
5116
5117
5118
5119
5120 ctl->stripe_size = div_u64(devices_info[ctl->ndevs - 1].max_avail,
5121 ctl->dev_stripes);
5122 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5123
5124
5125 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5126
5127
5128
5129
5130
5131
5132
5133 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
5134
5135
5136
5137
5138
5139 ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
5140 data_stripes), SZ_16M),
5141 ctl->stripe_size);
5142 }
5143
5144
5145 ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
5146 ctl->chunk_size = ctl->stripe_size * data_stripes;
5147
5148 return 0;
5149}
5150
5151static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
5152 struct btrfs_device_info *devices_info)
5153{
5154 u64 zone_size = devices_info[0].dev->zone_info->zone_size;
5155
5156 int data_stripes;
5157
5158
5159
5160
5161
5162 ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min);
5163
5164 ctl->stripe_size = zone_size;
5165 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5166 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5167
5168
5169 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
5170 ctl->ndevs = div_u64(div_u64(ctl->max_chunk_size * ctl->ncopies,
5171 ctl->stripe_size) + ctl->nparity,
5172 ctl->dev_stripes);
5173 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5174 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5175 ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size);
5176 }
5177
5178 ctl->chunk_size = ctl->stripe_size * data_stripes;
5179
5180 return 0;
5181}
5182
5183static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
5184 struct alloc_chunk_ctl *ctl,
5185 struct btrfs_device_info *devices_info)
5186{
5187 struct btrfs_fs_info *info = fs_devices->fs_info;
5188
5189
5190
5191
5192
5193
5194 ctl->ndevs = rounddown(ctl->ndevs, ctl->devs_increment);
5195
5196 if (ctl->ndevs < ctl->devs_min) {
5197 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
5198 btrfs_debug(info,
5199 "%s: not enough devices with free space: have=%d minimum required=%d",
5200 __func__, ctl->ndevs, ctl->devs_min);
5201 }
5202 return -ENOSPC;
5203 }
5204
5205 ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
5206
5207 switch (fs_devices->chunk_alloc_policy) {
5208 case BTRFS_CHUNK_ALLOC_REGULAR:
5209 return decide_stripe_size_regular(ctl, devices_info);
5210 case BTRFS_CHUNK_ALLOC_ZONED:
5211 return decide_stripe_size_zoned(ctl, devices_info);
5212 default:
5213 BUG();
5214 }
5215}
5216
5217static int create_chunk(struct btrfs_trans_handle *trans,
5218 struct alloc_chunk_ctl *ctl,
5219 struct btrfs_device_info *devices_info)
5220{
5221 struct btrfs_fs_info *info = trans->fs_info;
5222 struct map_lookup *map = NULL;
5223 struct extent_map_tree *em_tree;
5224 struct extent_map *em;
5225 u64 start = ctl->start;
5226 u64 type = ctl->type;
5227 int ret;
5228 int i;
5229 int j;
5230
5231 map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS);
5232 if (!map)
5233 return -ENOMEM;
5234 map->num_stripes = ctl->num_stripes;
5235
5236 for (i = 0; i < ctl->ndevs; ++i) {
5237 for (j = 0; j < ctl->dev_stripes; ++j) {
5238 int s = i * ctl->dev_stripes + j;
5239 map->stripes[s].dev = devices_info[i].dev;
5240 map->stripes[s].physical = devices_info[i].dev_offset +
5241 j * ctl->stripe_size;
5242 }
5243 }
5244 map->stripe_len = BTRFS_STRIPE_LEN;
5245 map->io_align = BTRFS_STRIPE_LEN;
5246 map->io_width = BTRFS_STRIPE_LEN;
5247 map->type = type;
5248 map->sub_stripes = ctl->sub_stripes;
5249
5250 trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size);
5251
5252 em = alloc_extent_map();
5253 if (!em) {
5254 kfree(map);
5255 return -ENOMEM;
5256 }
5257 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5258 em->map_lookup = map;
5259 em->start = start;
5260 em->len = ctl->chunk_size;
5261 em->block_start = 0;
5262 em->block_len = em->len;
5263 em->orig_block_len = ctl->stripe_size;
5264
5265 em_tree = &info->mapping_tree;
5266 write_lock(&em_tree->lock);
5267 ret = add_extent_mapping(em_tree, em, 0);
5268 if (ret) {
5269 write_unlock(&em_tree->lock);
5270 free_extent_map(em);
5271 return ret;
5272 }
5273 write_unlock(&em_tree->lock);
5274
5275 ret = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size);
5276 if (ret)
5277 goto error_del_extent;
5278
5279 for (i = 0; i < map->num_stripes; i++) {
5280 struct btrfs_device *dev = map->stripes[i].dev;
5281
5282 btrfs_device_set_bytes_used(dev,
5283 dev->bytes_used + ctl->stripe_size);
5284 if (list_empty(&dev->post_commit_list))
5285 list_add_tail(&dev->post_commit_list,
5286 &trans->transaction->dev_update_list);
5287 }
5288
5289 atomic64_sub(ctl->stripe_size * map->num_stripes,
5290 &info->free_chunk_space);
5291
5292 free_extent_map(em);
5293 check_raid56_incompat_flag(info, type);
5294 check_raid1c34_incompat_flag(info, type);
5295
5296 return 0;
5297
5298error_del_extent:
5299 write_lock(&em_tree->lock);
5300 remove_extent_mapping(em_tree, em);
5301 write_unlock(&em_tree->lock);
5302
5303
5304 free_extent_map(em);
5305
5306 free_extent_map(em);
5307
5308 return ret;
5309}
5310
5311int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
5312{
5313 struct btrfs_fs_info *info = trans->fs_info;
5314 struct btrfs_fs_devices *fs_devices = info->fs_devices;
5315 struct btrfs_device_info *devices_info = NULL;
5316 struct alloc_chunk_ctl ctl;
5317 int ret;
5318
5319 lockdep_assert_held(&info->chunk_mutex);
5320
5321 if (!alloc_profile_is_valid(type, 0)) {
5322 ASSERT(0);
5323 return -EINVAL;
5324 }
5325
5326 if (list_empty(&fs_devices->alloc_list)) {
5327 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5328 btrfs_debug(info, "%s: no writable device", __func__);
5329 return -ENOSPC;
5330 }
5331
5332 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
5333 btrfs_err(info, "invalid chunk type 0x%llx requested", type);
5334 ASSERT(0);
5335 return -EINVAL;
5336 }
5337
5338 ctl.start = find_next_chunk(info);
5339 ctl.type = type;
5340 init_alloc_chunk_ctl(fs_devices, &ctl);
5341
5342 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
5343 GFP_NOFS);
5344 if (!devices_info)
5345 return -ENOMEM;
5346
5347 ret = gather_device_info(fs_devices, &ctl, devices_info);
5348 if (ret < 0)
5349 goto out;
5350
5351 ret = decide_stripe_size(fs_devices, &ctl, devices_info);
5352 if (ret < 0)
5353 goto out;
5354
5355 ret = create_chunk(trans, &ctl, devices_info);
5356
5357out:
5358 kfree(devices_info);
5359 return ret;
5360}
5361
5362
5363
5364
5365
5366
5367
5368
5369int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
5370 u64 chunk_offset, u64 chunk_size)
5371{
5372 struct btrfs_fs_info *fs_info = trans->fs_info;
5373 struct btrfs_root *extent_root = fs_info->extent_root;
5374 struct btrfs_root *chunk_root = fs_info->chunk_root;
5375 struct btrfs_key key;
5376 struct btrfs_device *device;
5377 struct btrfs_chunk *chunk;
5378 struct btrfs_stripe *stripe;
5379 struct extent_map *em;
5380 struct map_lookup *map;
5381 size_t item_size;
5382 u64 dev_offset;
5383 u64 stripe_size;
5384 int i = 0;
5385 int ret = 0;
5386
5387 em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
5388 if (IS_ERR(em))
5389 return PTR_ERR(em);
5390
5391 map = em->map_lookup;
5392 item_size = btrfs_chunk_item_size(map->num_stripes);
5393 stripe_size = em->orig_block_len;
5394
5395 chunk = kzalloc(item_size, GFP_NOFS);
5396 if (!chunk) {
5397 ret = -ENOMEM;
5398 goto out;
5399 }
5400
5401
5402
5403
5404
5405
5406
5407
5408 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5409 for (i = 0; i < map->num_stripes; i++) {
5410 device = map->stripes[i].dev;
5411 dev_offset = map->stripes[i].physical;
5412
5413 ret = btrfs_update_device(trans, device);
5414 if (ret)
5415 break;
5416 ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5417 dev_offset, stripe_size);
5418 if (ret)
5419 break;
5420 }
5421 if (ret) {
5422 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5423 goto out;
5424 }
5425
5426 stripe = &chunk->stripe;
5427 for (i = 0; i < map->num_stripes; i++) {
5428 device = map->stripes[i].dev;
5429 dev_offset = map->stripes[i].physical;
5430
5431 btrfs_set_stack_stripe_devid(stripe, device->devid);
5432 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5433 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5434 stripe++;
5435 }
5436 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5437
5438 btrfs_set_stack_chunk_length(chunk, chunk_size);
5439 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5440 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5441 btrfs_set_stack_chunk_type(chunk, map->type);
5442 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5443 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5444 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5445 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5446 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5447
5448 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5449 key.type = BTRFS_CHUNK_ITEM_KEY;
5450 key.offset = chunk_offset;
5451
5452 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5453 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5454
5455
5456
5457
5458 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5459 }
5460
5461out:
5462 kfree(chunk);
5463 free_extent_map(em);
5464 return ret;
5465}
5466
5467static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5468{
5469 struct btrfs_fs_info *fs_info = trans->fs_info;
5470 u64 alloc_profile;
5471 int ret;
5472
5473 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5474 ret = btrfs_alloc_chunk(trans, alloc_profile);
5475 if (ret)
5476 return ret;
5477
5478 alloc_profile = btrfs_system_alloc_profile(fs_info);
5479 ret = btrfs_alloc_chunk(trans, alloc_profile);
5480 return ret;
5481}
5482
5483static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5484{
5485 const int index = btrfs_bg_flags_to_raid_index(map->type);
5486
5487 return btrfs_raid_array[index].tolerated_failures;
5488}
5489
5490int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5491{
5492 struct extent_map *em;
5493 struct map_lookup *map;
5494 int readonly = 0;
5495 int miss_ndevs = 0;
5496 int i;
5497
5498 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
5499 if (IS_ERR(em))
5500 return 1;
5501
5502 map = em->map_lookup;
5503 for (i = 0; i < map->num_stripes; i++) {
5504 if (test_bit(BTRFS_DEV_STATE_MISSING,
5505 &map->stripes[i].dev->dev_state)) {
5506 miss_ndevs++;
5507 continue;
5508 }
5509 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5510 &map->stripes[i].dev->dev_state)) {
5511 readonly = 1;
5512 goto end;
5513 }
5514 }
5515
5516
5517
5518
5519
5520
5521 if (miss_ndevs > btrfs_chunk_max_errors(map))
5522 readonly = 1;
5523end:
5524 free_extent_map(em);
5525 return readonly;
5526}
5527
5528void btrfs_mapping_tree_free(struct extent_map_tree *tree)
5529{
5530 struct extent_map *em;
5531
5532 while (1) {
5533 write_lock(&tree->lock);
5534 em = lookup_extent_mapping(tree, 0, (u64)-1);
5535 if (em)
5536 remove_extent_mapping(tree, em);
5537 write_unlock(&tree->lock);
5538 if (!em)
5539 break;
5540
5541 free_extent_map(em);
5542
5543 free_extent_map(em);
5544 }
5545}
5546
5547int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5548{
5549 struct extent_map *em;
5550 struct map_lookup *map;
5551 int ret;
5552
5553 em = btrfs_get_chunk_map(fs_info, logical, len);
5554 if (IS_ERR(em))
5555
5556
5557
5558
5559
5560
5561 return 1;
5562
5563 map = em->map_lookup;
5564 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
5565 ret = map->num_stripes;
5566 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5567 ret = map->sub_stripes;
5568 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5569 ret = 2;
5570 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5571
5572
5573
5574
5575
5576
5577
5578 ret = map->num_stripes;
5579 else
5580 ret = 1;
5581 free_extent_map(em);
5582
5583 down_read(&fs_info->dev_replace.rwsem);
5584 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5585 fs_info->dev_replace.tgtdev)
5586 ret++;
5587 up_read(&fs_info->dev_replace.rwsem);
5588
5589 return ret;
5590}
5591
5592unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5593 u64 logical)
5594{
5595 struct extent_map *em;
5596 struct map_lookup *map;
5597 unsigned long len = fs_info->sectorsize;
5598
5599 em = btrfs_get_chunk_map(fs_info, logical, len);
5600
5601 if (!WARN_ON(IS_ERR(em))) {
5602 map = em->map_lookup;
5603 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5604 len = map->stripe_len * nr_data_stripes(map);
5605 free_extent_map(em);
5606 }
5607 return len;
5608}
5609
5610int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5611{
5612 struct extent_map *em;
5613 struct map_lookup *map;
5614 int ret = 0;
5615
5616 em = btrfs_get_chunk_map(fs_info, logical, len);
5617
5618 if(!WARN_ON(IS_ERR(em))) {
5619 map = em->map_lookup;
5620 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5621 ret = 1;
5622 free_extent_map(em);
5623 }
5624 return ret;
5625}
5626
5627static int find_live_mirror(struct btrfs_fs_info *fs_info,
5628 struct map_lookup *map, int first,
5629 int dev_replace_is_ongoing)
5630{
5631 int i;
5632 int num_stripes;
5633 int preferred_mirror;
5634 int tolerance;
5635 struct btrfs_device *srcdev;
5636
5637 ASSERT((map->type &
5638 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
5639
5640 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5641 num_stripes = map->sub_stripes;
5642 else
5643 num_stripes = map->num_stripes;
5644
5645 switch (fs_info->fs_devices->read_policy) {
5646 default:
5647
5648 btrfs_warn_rl(fs_info,
5649 "unknown read_policy type %u, reset to pid",
5650 fs_info->fs_devices->read_policy);
5651 fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
5652 fallthrough;
5653 case BTRFS_READ_POLICY_PID:
5654 preferred_mirror = first + (current->pid % num_stripes);
5655 break;
5656 }
5657
5658 if (dev_replace_is_ongoing &&
5659 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5660 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5661 srcdev = fs_info->dev_replace.srcdev;
5662 else
5663 srcdev = NULL;
5664
5665
5666
5667
5668
5669
5670 for (tolerance = 0; tolerance < 2; tolerance++) {
5671 if (map->stripes[preferred_mirror].dev->bdev &&
5672 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5673 return preferred_mirror;
5674 for (i = first; i < first + num_stripes; i++) {
5675 if (map->stripes[i].dev->bdev &&
5676 (tolerance || map->stripes[i].dev != srcdev))
5677 return i;
5678 }
5679 }
5680
5681
5682
5683
5684 return preferred_mirror;
5685}
5686
5687
5688static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5689{
5690 int i;
5691 int again = 1;
5692
5693 while (again) {
5694 again = 0;
5695 for (i = 0; i < num_stripes - 1; i++) {
5696
5697 if (bbio->raid_map[i] > bbio->raid_map[i + 1]) {
5698 swap(bbio->stripes[i], bbio->stripes[i + 1]);
5699 swap(bbio->raid_map[i], bbio->raid_map[i + 1]);
5700 again = 1;
5701 }
5702 }
5703 }
5704}
5705
5706static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5707{
5708 struct btrfs_bio *bbio = kzalloc(
5709
5710 sizeof(struct btrfs_bio) +
5711
5712 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5713
5714 sizeof(int) * (real_stripes) +
5715
5716
5717
5718
5719 sizeof(u64) * (total_stripes),
5720 GFP_NOFS|__GFP_NOFAIL);
5721
5722 atomic_set(&bbio->error, 0);
5723 refcount_set(&bbio->refs, 1);
5724
5725 bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
5726 bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
5727
5728 return bbio;
5729}
5730
5731void btrfs_get_bbio(struct btrfs_bio *bbio)
5732{
5733 WARN_ON(!refcount_read(&bbio->refs));
5734 refcount_inc(&bbio->refs);
5735}
5736
5737void btrfs_put_bbio(struct btrfs_bio *bbio)
5738{
5739 if (!bbio)
5740 return;
5741 if (refcount_dec_and_test(&bbio->refs))
5742 kfree(bbio);
5743}
5744
5745
5746
5747
5748
5749
5750static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5751 u64 logical, u64 *length_ret,
5752 struct btrfs_bio **bbio_ret)
5753{
5754 struct extent_map *em;
5755 struct map_lookup *map;
5756 struct btrfs_bio *bbio;
5757 u64 length = *length_ret;
5758 u64 offset;
5759 u64 stripe_nr;
5760 u64 stripe_nr_end;
5761 u64 stripe_end_offset;
5762 u64 stripe_cnt;
5763 u64 stripe_len;
5764 u64 stripe_offset;
5765 u64 num_stripes;
5766 u32 stripe_index;
5767 u32 factor = 0;
5768 u32 sub_stripes = 0;
5769 u64 stripes_per_dev = 0;
5770 u32 remaining_stripes = 0;
5771 u32 last_stripe = 0;
5772 int ret = 0;
5773 int i;
5774
5775
5776 ASSERT(bbio_ret);
5777
5778 em = btrfs_get_chunk_map(fs_info, logical, length);
5779 if (IS_ERR(em))
5780 return PTR_ERR(em);
5781
5782 map = em->map_lookup;
5783
5784 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5785 ret = -EOPNOTSUPP;
5786 goto out;
5787 }
5788
5789 offset = logical - em->start;
5790 length = min_t(u64, em->start + em->len - logical, length);
5791 *length_ret = length;
5792
5793 stripe_len = map->stripe_len;
5794
5795
5796
5797
5798 stripe_nr = div64_u64(offset, stripe_len);
5799
5800
5801 stripe_offset = offset - stripe_nr * stripe_len;
5802
5803 stripe_nr_end = round_up(offset + length, map->stripe_len);
5804 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5805 stripe_cnt = stripe_nr_end - stripe_nr;
5806 stripe_end_offset = stripe_nr_end * map->stripe_len -
5807 (offset + length);
5808
5809
5810
5811
5812
5813 num_stripes = 1;
5814 stripe_index = 0;
5815 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5816 BTRFS_BLOCK_GROUP_RAID10)) {
5817 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5818 sub_stripes = 1;
5819 else
5820 sub_stripes = map->sub_stripes;
5821
5822 factor = map->num_stripes / sub_stripes;
5823 num_stripes = min_t(u64, map->num_stripes,
5824 sub_stripes * stripe_cnt);
5825 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5826 stripe_index *= sub_stripes;
5827 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
5828 &remaining_stripes);
5829 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
5830 last_stripe *= sub_stripes;
5831 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
5832 BTRFS_BLOCK_GROUP_DUP)) {
5833 num_stripes = map->num_stripes;
5834 } else {
5835 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5836 &stripe_index);
5837 }
5838
5839 bbio = alloc_btrfs_bio(num_stripes, 0);
5840 if (!bbio) {
5841 ret = -ENOMEM;
5842 goto out;
5843 }
5844
5845 for (i = 0; i < num_stripes; i++) {
5846 bbio->stripes[i].physical =
5847 map->stripes[stripe_index].physical +
5848 stripe_offset + stripe_nr * map->stripe_len;
5849 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
5850
5851 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5852 BTRFS_BLOCK_GROUP_RAID10)) {
5853 bbio->stripes[i].length = stripes_per_dev *
5854 map->stripe_len;
5855
5856 if (i / sub_stripes < remaining_stripes)
5857 bbio->stripes[i].length +=
5858 map->stripe_len;
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868 if (i < sub_stripes)
5869 bbio->stripes[i].length -=
5870 stripe_offset;
5871
5872 if (stripe_index >= last_stripe &&
5873 stripe_index <= (last_stripe +
5874 sub_stripes - 1))
5875 bbio->stripes[i].length -=
5876 stripe_end_offset;
5877
5878 if (i == sub_stripes - 1)
5879 stripe_offset = 0;
5880 } else {
5881 bbio->stripes[i].length = length;
5882 }
5883
5884 stripe_index++;
5885 if (stripe_index == map->num_stripes) {
5886 stripe_index = 0;
5887 stripe_nr++;
5888 }
5889 }
5890
5891 *bbio_ret = bbio;
5892 bbio->map_type = map->type;
5893 bbio->num_stripes = num_stripes;
5894out:
5895 free_extent_map(em);
5896 return ret;
5897}
5898
5899
5900
5901
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
5913 u64 logical, u64 length,
5914 u64 srcdev_devid, int *mirror_num,
5915 u64 *physical)
5916{
5917 struct btrfs_bio *bbio = NULL;
5918 int num_stripes;
5919 int index_srcdev = 0;
5920 int found = 0;
5921 u64 physical_of_found = 0;
5922 int i;
5923 int ret = 0;
5924
5925 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
5926 logical, &length, &bbio, 0, 0);
5927 if (ret) {
5928 ASSERT(bbio == NULL);
5929 return ret;
5930 }
5931
5932 num_stripes = bbio->num_stripes;
5933 if (*mirror_num > num_stripes) {
5934
5935
5936
5937
5938
5939 btrfs_put_bbio(bbio);
5940 return -EIO;
5941 }
5942
5943
5944
5945
5946
5947
5948 for (i = 0; i < num_stripes; i++) {
5949 if (bbio->stripes[i].dev->devid != srcdev_devid)
5950 continue;
5951
5952
5953
5954
5955
5956 if (found &&
5957 physical_of_found <= bbio->stripes[i].physical)
5958 continue;
5959
5960 index_srcdev = i;
5961 found = 1;
5962 physical_of_found = bbio->stripes[i].physical;
5963 }
5964
5965 btrfs_put_bbio(bbio);
5966
5967 ASSERT(found);
5968 if (!found)
5969 return -EIO;
5970
5971 *mirror_num = index_srcdev + 1;
5972 *physical = physical_of_found;
5973 return ret;
5974}
5975
5976static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
5977{
5978 struct btrfs_block_group *cache;
5979 bool ret;
5980
5981
5982 if (!btrfs_is_zoned(fs_info))
5983 return false;
5984
5985 cache = btrfs_lookup_block_group(fs_info, logical);
5986
5987 spin_lock(&cache->lock);
5988 ret = cache->to_copy;
5989 spin_unlock(&cache->lock);
5990
5991 btrfs_put_block_group(cache);
5992 return ret;
5993}
5994
5995static void handle_ops_on_dev_replace(enum btrfs_map_op op,
5996 struct btrfs_bio **bbio_ret,
5997 struct btrfs_dev_replace *dev_replace,
5998 u64 logical,
5999 int *num_stripes_ret, int *max_errors_ret)
6000{
6001 struct btrfs_bio *bbio = *bbio_ret;
6002 u64 srcdev_devid = dev_replace->srcdev->devid;
6003 int tgtdev_indexes = 0;
6004 int num_stripes = *num_stripes_ret;
6005 int max_errors = *max_errors_ret;
6006 int i;
6007
6008 if (op == BTRFS_MAP_WRITE) {
6009 int index_where_to_add;
6010
6011
6012
6013
6014
6015 if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
6016 return;
6017
6018
6019
6020
6021
6022
6023
6024
6025
6026
6027
6028
6029 index_where_to_add = num_stripes;
6030 for (i = 0; i < num_stripes; i++) {
6031 if (bbio->stripes[i].dev->devid == srcdev_devid) {
6032
6033 struct btrfs_bio_stripe *new =
6034 bbio->stripes + index_where_to_add;
6035 struct btrfs_bio_stripe *old =
6036 bbio->stripes + i;
6037
6038 new->physical = old->physical;
6039 new->length = old->length;
6040 new->dev = dev_replace->tgtdev;
6041 bbio->tgtdev_map[i] = index_where_to_add;
6042 index_where_to_add++;
6043 max_errors++;
6044 tgtdev_indexes++;
6045 }
6046 }
6047 num_stripes = index_where_to_add;
6048 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
6049 int index_srcdev = 0;
6050 int found = 0;
6051 u64 physical_of_found = 0;
6052
6053
6054
6055
6056
6057
6058
6059
6060 for (i = 0; i < num_stripes; i++) {
6061 if (bbio->stripes[i].dev->devid == srcdev_devid) {
6062
6063
6064
6065
6066
6067 if (found &&
6068 physical_of_found <=
6069 bbio->stripes[i].physical)
6070 continue;
6071 index_srcdev = i;
6072 found = 1;
6073 physical_of_found = bbio->stripes[i].physical;
6074 }
6075 }
6076 if (found) {
6077 struct btrfs_bio_stripe *tgtdev_stripe =
6078 bbio->stripes + num_stripes;
6079
6080 tgtdev_stripe->physical = physical_of_found;
6081 tgtdev_stripe->length =
6082 bbio->stripes[index_srcdev].length;
6083 tgtdev_stripe->dev = dev_replace->tgtdev;
6084 bbio->tgtdev_map[index_srcdev] = num_stripes;
6085
6086 tgtdev_indexes++;
6087 num_stripes++;
6088 }
6089 }
6090
6091 *num_stripes_ret = num_stripes;
6092 *max_errors_ret = max_errors;
6093 bbio->num_tgtdevs = tgtdev_indexes;
6094 *bbio_ret = bbio;
6095}
6096
6097static bool need_full_stripe(enum btrfs_map_op op)
6098{
6099 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
6100}
6101
6102
6103
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116
6117int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
6118 enum btrfs_map_op op, u64 logical, u64 len,
6119 struct btrfs_io_geometry *io_geom)
6120{
6121 struct map_lookup *map;
6122 u64 offset;
6123 u64 stripe_offset;
6124 u64 stripe_nr;
6125 u64 stripe_len;
6126 u64 raid56_full_stripe_start = (u64)-1;
6127 int data_stripes;
6128
6129 ASSERT(op != BTRFS_MAP_DISCARD);
6130
6131 map = em->map_lookup;
6132
6133 offset = logical - em->start;
6134
6135 stripe_len = map->stripe_len;
6136
6137 stripe_nr = div64_u64(offset, stripe_len);
6138
6139 stripe_offset = stripe_nr * stripe_len;
6140 if (offset < stripe_offset) {
6141 btrfs_crit(fs_info,
6142"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
6143 stripe_offset, offset, em->start, logical, stripe_len);
6144 return -EINVAL;
6145 }
6146
6147
6148 stripe_offset = offset - stripe_offset;
6149 data_stripes = nr_data_stripes(map);
6150
6151 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6152 u64 max_len = stripe_len - stripe_offset;
6153
6154
6155
6156
6157 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6158 unsigned long full_stripe_len = stripe_len * data_stripes;
6159 raid56_full_stripe_start = offset;
6160
6161
6162
6163
6164
6165 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
6166 full_stripe_len);
6167 raid56_full_stripe_start *= full_stripe_len;
6168
6169
6170
6171
6172
6173
6174 if (op == BTRFS_MAP_WRITE) {
6175 max_len = stripe_len * data_stripes -
6176 (offset - raid56_full_stripe_start);
6177 }
6178 }
6179 len = min_t(u64, em->len - offset, max_len);
6180 } else {
6181 len = em->len - offset;
6182 }
6183
6184 io_geom->len = len;
6185 io_geom->offset = offset;
6186 io_geom->stripe_len = stripe_len;
6187 io_geom->stripe_nr = stripe_nr;
6188 io_geom->stripe_offset = stripe_offset;
6189 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
6190
6191 return 0;
6192}
6193
6194static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
6195 enum btrfs_map_op op,
6196 u64 logical, u64 *length,
6197 struct btrfs_bio **bbio_ret,
6198 int mirror_num, int need_raid_map)
6199{
6200 struct extent_map *em;
6201 struct map_lookup *map;
6202 u64 stripe_offset;
6203 u64 stripe_nr;
6204 u64 stripe_len;
6205 u32 stripe_index;
6206 int data_stripes;
6207 int i;
6208 int ret = 0;
6209 int num_stripes;
6210 int max_errors = 0;
6211 int tgtdev_indexes = 0;
6212 struct btrfs_bio *bbio = NULL;
6213 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
6214 int dev_replace_is_ongoing = 0;
6215 int num_alloc_stripes;
6216 int patch_the_first_stripe_for_dev_replace = 0;
6217 u64 physical_to_patch_in_first_stripe = 0;
6218 u64 raid56_full_stripe_start = (u64)-1;
6219 struct btrfs_io_geometry geom;
6220
6221 ASSERT(bbio_ret);
6222 ASSERT(op != BTRFS_MAP_DISCARD);
6223
6224 em = btrfs_get_chunk_map(fs_info, logical, *length);
6225 ASSERT(!IS_ERR(em));
6226
6227 ret = btrfs_get_io_geometry(fs_info, em, op, logical, *length, &geom);
6228 if (ret < 0)
6229 return ret;
6230
6231 map = em->map_lookup;
6232
6233 *length = geom.len;
6234 stripe_len = geom.stripe_len;
6235 stripe_nr = geom.stripe_nr;
6236 stripe_offset = geom.stripe_offset;
6237 raid56_full_stripe_start = geom.raid56_stripe_offset;
6238 data_stripes = nr_data_stripes(map);
6239
6240 down_read(&dev_replace->rwsem);
6241 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
6242
6243
6244
6245
6246 if (!dev_replace_is_ongoing)
6247 up_read(&dev_replace->rwsem);
6248
6249 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
6250 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
6251 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
6252 dev_replace->srcdev->devid,
6253 &mirror_num,
6254 &physical_to_patch_in_first_stripe);
6255 if (ret)
6256 goto out;
6257 else
6258 patch_the_first_stripe_for_dev_replace = 1;
6259 } else if (mirror_num > map->num_stripes) {
6260 mirror_num = 0;
6261 }
6262
6263 num_stripes = 1;
6264 stripe_index = 0;
6265 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6266 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6267 &stripe_index);
6268 if (!need_full_stripe(op))
6269 mirror_num = 1;
6270 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
6271 if (need_full_stripe(op))
6272 num_stripes = map->num_stripes;
6273 else if (mirror_num)
6274 stripe_index = mirror_num - 1;
6275 else {
6276 stripe_index = find_live_mirror(fs_info, map, 0,
6277 dev_replace_is_ongoing);
6278 mirror_num = stripe_index + 1;
6279 }
6280
6281 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
6282 if (need_full_stripe(op)) {
6283 num_stripes = map->num_stripes;
6284 } else if (mirror_num) {
6285 stripe_index = mirror_num - 1;
6286 } else {
6287 mirror_num = 1;
6288 }
6289
6290 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6291 u32 factor = map->num_stripes / map->sub_stripes;
6292
6293 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6294 stripe_index *= map->sub_stripes;
6295
6296 if (need_full_stripe(op))
6297 num_stripes = map->sub_stripes;
6298 else if (mirror_num)
6299 stripe_index += mirror_num - 1;
6300 else {
6301 int old_stripe_index = stripe_index;
6302 stripe_index = find_live_mirror(fs_info, map,
6303 stripe_index,
6304 dev_replace_is_ongoing);
6305 mirror_num = stripe_index - old_stripe_index + 1;
6306 }
6307
6308 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6309 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
6310
6311 stripe_nr = div64_u64(raid56_full_stripe_start,
6312 stripe_len * data_stripes);
6313
6314
6315 num_stripes = map->num_stripes;
6316 max_errors = nr_parity_stripes(map);
6317
6318 *length = map->stripe_len;
6319 stripe_index = 0;
6320 stripe_offset = 0;
6321 } else {
6322
6323
6324
6325
6326
6327 stripe_nr = div_u64_rem(stripe_nr,
6328 data_stripes, &stripe_index);
6329 if (mirror_num > 1)
6330 stripe_index = data_stripes + mirror_num - 2;
6331
6332
6333 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
6334 &stripe_index);
6335 if (!need_full_stripe(op) && mirror_num <= 1)
6336 mirror_num = 1;
6337 }
6338 } else {
6339
6340
6341
6342
6343
6344 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6345 &stripe_index);
6346 mirror_num = stripe_index + 1;
6347 }
6348 if (stripe_index >= map->num_stripes) {
6349 btrfs_crit(fs_info,
6350 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
6351 stripe_index, map->num_stripes);
6352 ret = -EINVAL;
6353 goto out;
6354 }
6355
6356 num_alloc_stripes = num_stripes;
6357 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
6358 if (op == BTRFS_MAP_WRITE)
6359 num_alloc_stripes <<= 1;
6360 if (op == BTRFS_MAP_GET_READ_MIRRORS)
6361 num_alloc_stripes++;
6362 tgtdev_indexes = num_stripes;
6363 }
6364
6365 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
6366 if (!bbio) {
6367 ret = -ENOMEM;
6368 goto out;
6369 }
6370
6371 for (i = 0; i < num_stripes; i++) {
6372 bbio->stripes[i].physical = map->stripes[stripe_index].physical +
6373 stripe_offset + stripe_nr * map->stripe_len;
6374 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
6375 stripe_index++;
6376 }
6377
6378
6379 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
6380 (need_full_stripe(op) || mirror_num > 1)) {
6381 u64 tmp;
6382 unsigned rot;
6383
6384
6385 div_u64_rem(stripe_nr, num_stripes, &rot);
6386
6387
6388 tmp = stripe_nr * data_stripes;
6389 for (i = 0; i < data_stripes; i++)
6390 bbio->raid_map[(i+rot) % num_stripes] =
6391 em->start + (tmp + i) * map->stripe_len;
6392
6393 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
6394 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
6395 bbio->raid_map[(i+rot+1) % num_stripes] =
6396 RAID6_Q_STRIPE;
6397
6398 sort_parity_stripes(bbio, num_stripes);
6399 }
6400
6401 if (need_full_stripe(op))
6402 max_errors = btrfs_chunk_max_errors(map);
6403
6404 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
6405 need_full_stripe(op)) {
6406 handle_ops_on_dev_replace(op, &bbio, dev_replace, logical,
6407 &num_stripes, &max_errors);
6408 }
6409
6410 *bbio_ret = bbio;
6411 bbio->map_type = map->type;
6412 bbio->num_stripes = num_stripes;
6413 bbio->max_errors = max_errors;
6414 bbio->mirror_num = mirror_num;
6415
6416
6417
6418
6419
6420
6421 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6422 WARN_ON(num_stripes > 1);
6423 bbio->stripes[0].dev = dev_replace->tgtdev;
6424 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6425 bbio->mirror_num = map->num_stripes + 1;
6426 }
6427out:
6428 if (dev_replace_is_ongoing) {
6429 lockdep_assert_held(&dev_replace->rwsem);
6430
6431 up_read(&dev_replace->rwsem);
6432 }
6433 free_extent_map(em);
6434 return ret;
6435}
6436
6437int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6438 u64 logical, u64 *length,
6439 struct btrfs_bio **bbio_ret, int mirror_num)
6440{
6441 if (op == BTRFS_MAP_DISCARD)
6442 return __btrfs_map_block_for_discard(fs_info, logical,
6443 length, bbio_ret);
6444
6445 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6446 mirror_num, 0);
6447}
6448
6449
6450int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6451 u64 logical, u64 *length,
6452 struct btrfs_bio **bbio_ret)
6453{
6454 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6455}
6456
6457static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6458{
6459 bio->bi_private = bbio->private;
6460 bio->bi_end_io = bbio->end_io;
6461 bio_endio(bio);
6462
6463 btrfs_put_bbio(bbio);
6464}
6465
6466static void btrfs_end_bio(struct bio *bio)
6467{
6468 struct btrfs_bio *bbio = bio->bi_private;
6469 int is_orig_bio = 0;
6470
6471 if (bio->bi_status) {
6472 atomic_inc(&bbio->error);
6473 if (bio->bi_status == BLK_STS_IOERR ||
6474 bio->bi_status == BLK_STS_TARGET) {
6475 struct btrfs_device *dev = btrfs_io_bio(bio)->device;
6476
6477 ASSERT(dev->bdev);
6478 if (btrfs_op(bio) == BTRFS_MAP_WRITE)
6479 btrfs_dev_stat_inc_and_print(dev,
6480 BTRFS_DEV_STAT_WRITE_ERRS);
6481 else if (!(bio->bi_opf & REQ_RAHEAD))
6482 btrfs_dev_stat_inc_and_print(dev,
6483 BTRFS_DEV_STAT_READ_ERRS);
6484 if (bio->bi_opf & REQ_PREFLUSH)
6485 btrfs_dev_stat_inc_and_print(dev,
6486 BTRFS_DEV_STAT_FLUSH_ERRS);
6487 }
6488 }
6489
6490 if (bio == bbio->orig_bio)
6491 is_orig_bio = 1;
6492
6493 btrfs_bio_counter_dec(bbio->fs_info);
6494
6495 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6496 if (!is_orig_bio) {
6497 bio_put(bio);
6498 bio = bbio->orig_bio;
6499 }
6500
6501 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6502
6503
6504
6505 if (atomic_read(&bbio->error) > bbio->max_errors) {
6506 bio->bi_status = BLK_STS_IOERR;
6507 } else {
6508
6509
6510
6511
6512 bio->bi_status = BLK_STS_OK;
6513 }
6514
6515 btrfs_end_bbio(bbio, bio);
6516 } else if (!is_orig_bio) {
6517 bio_put(bio);
6518 }
6519}
6520
6521static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6522 u64 physical, struct btrfs_device *dev)
6523{
6524 struct btrfs_fs_info *fs_info = bbio->fs_info;
6525
6526 bio->bi_private = bbio;
6527 btrfs_io_bio(bio)->device = dev;
6528 bio->bi_end_io = btrfs_end_bio;
6529 bio->bi_iter.bi_sector = physical >> 9;
6530
6531
6532
6533
6534 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
6535 if (btrfs_dev_is_sequential(dev, physical)) {
6536 u64 zone_start = round_down(physical, fs_info->zone_size);
6537
6538 bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
6539 } else {
6540 bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
6541 bio->bi_opf |= REQ_OP_WRITE;
6542 }
6543 }
6544 btrfs_debug_in_rcu(fs_info,
6545 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6546 bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
6547 (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
6548 dev->devid, bio->bi_iter.bi_size);
6549 bio_set_dev(bio, dev->bdev);
6550
6551 btrfs_bio_counter_inc_noblocked(fs_info);
6552
6553 btrfsic_submit_bio(bio);
6554}
6555
6556static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6557{
6558 atomic_inc(&bbio->error);
6559 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6560
6561 WARN_ON(bio != bbio->orig_bio);
6562
6563 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6564 bio->bi_iter.bi_sector = logical >> 9;
6565 if (atomic_read(&bbio->error) > bbio->max_errors)
6566 bio->bi_status = BLK_STS_IOERR;
6567 else
6568 bio->bi_status = BLK_STS_OK;
6569 btrfs_end_bbio(bbio, bio);
6570 }
6571}
6572
6573blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6574 int mirror_num)
6575{
6576 struct btrfs_device *dev;
6577 struct bio *first_bio = bio;
6578 u64 logical = bio->bi_iter.bi_sector << 9;
6579 u64 length = 0;
6580 u64 map_length;
6581 int ret;
6582 int dev_nr;
6583 int total_devs;
6584 struct btrfs_bio *bbio = NULL;
6585
6586 length = bio->bi_iter.bi_size;
6587 map_length = length;
6588
6589 btrfs_bio_counter_inc_blocked(fs_info);
6590 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6591 &map_length, &bbio, mirror_num, 1);
6592 if (ret) {
6593 btrfs_bio_counter_dec(fs_info);
6594 return errno_to_blk_status(ret);
6595 }
6596
6597 total_devs = bbio->num_stripes;
6598 bbio->orig_bio = first_bio;
6599 bbio->private = first_bio->bi_private;
6600 bbio->end_io = first_bio->bi_end_io;
6601 bbio->fs_info = fs_info;
6602 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6603
6604 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6605 ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
6606
6607
6608 if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
6609 ret = raid56_parity_write(fs_info, bio, bbio,
6610 map_length);
6611 } else {
6612 ret = raid56_parity_recover(fs_info, bio, bbio,
6613 map_length, mirror_num, 1);
6614 }
6615
6616 btrfs_bio_counter_dec(fs_info);
6617 return errno_to_blk_status(ret);
6618 }
6619
6620 if (map_length < length) {
6621 btrfs_crit(fs_info,
6622 "mapping failed logical %llu bio len %llu len %llu",
6623 logical, length, map_length);
6624 BUG();
6625 }
6626
6627 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6628 dev = bbio->stripes[dev_nr].dev;
6629 if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
6630 &dev->dev_state) ||
6631 (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
6632 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6633 bbio_error(bbio, first_bio, logical);
6634 continue;
6635 }
6636
6637 if (dev_nr < total_devs - 1)
6638 bio = btrfs_bio_clone(first_bio);
6639 else
6640 bio = first_bio;
6641
6642 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
6643 }
6644 btrfs_bio_counter_dec(fs_info);
6645 return BLK_STS_OK;
6646}
6647
6648
6649
6650
6651
6652
6653
6654
6655
6656
6657struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
6658 u64 devid, u8 *uuid, u8 *fsid)
6659{
6660 struct btrfs_device *device;
6661 struct btrfs_fs_devices *seed_devs;
6662
6663 if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6664 list_for_each_entry(device, &fs_devices->devices, dev_list) {
6665 if (device->devid == devid &&
6666 (!uuid || memcmp(device->uuid, uuid,
6667 BTRFS_UUID_SIZE) == 0))
6668 return device;
6669 }
6670 }
6671
6672 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
6673 if (!fsid ||
6674 !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6675 list_for_each_entry(device, &seed_devs->devices,
6676 dev_list) {
6677 if (device->devid == devid &&
6678 (!uuid || memcmp(device->uuid, uuid,
6679 BTRFS_UUID_SIZE) == 0))
6680 return device;
6681 }
6682 }
6683 }
6684
6685 return NULL;
6686}
6687
6688static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6689 u64 devid, u8 *dev_uuid)
6690{
6691 struct btrfs_device *device;
6692 unsigned int nofs_flag;
6693
6694
6695
6696
6697
6698
6699
6700 nofs_flag = memalloc_nofs_save();
6701 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6702 memalloc_nofs_restore(nofs_flag);
6703 if (IS_ERR(device))
6704 return device;
6705
6706 list_add(&device->dev_list, &fs_devices->devices);
6707 device->fs_devices = fs_devices;
6708 fs_devices->num_devices++;
6709
6710 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6711 fs_devices->missing_devices++;
6712
6713 return device;
6714}
6715
6716
6717
6718
6719
6720
6721
6722
6723
6724
6725
6726
6727
6728
6729struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6730 const u64 *devid,
6731 const u8 *uuid)
6732{
6733 struct btrfs_device *dev;
6734 u64 tmp;
6735
6736 if (WARN_ON(!devid && !fs_info))
6737 return ERR_PTR(-EINVAL);
6738
6739 dev = __alloc_device(fs_info);
6740 if (IS_ERR(dev))
6741 return dev;
6742
6743 if (devid)
6744 tmp = *devid;
6745 else {
6746 int ret;
6747
6748 ret = find_next_devid(fs_info, &tmp);
6749 if (ret) {
6750 btrfs_free_device(dev);
6751 return ERR_PTR(ret);
6752 }
6753 }
6754 dev->devid = tmp;
6755
6756 if (uuid)
6757 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6758 else
6759 generate_random_uuid(dev->uuid);
6760
6761 return dev;
6762}
6763
6764static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6765 u64 devid, u8 *uuid, bool error)
6766{
6767 if (error)
6768 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6769 devid, uuid);
6770 else
6771 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6772 devid, uuid);
6773}
6774
6775static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6776{
6777 int index = btrfs_bg_flags_to_raid_index(type);
6778 int ncopies = btrfs_raid_array[index].ncopies;
6779 const int nparity = btrfs_raid_array[index].nparity;
6780 int data_stripes;
6781
6782 if (nparity)
6783 data_stripes = num_stripes - nparity;
6784 else
6785 data_stripes = num_stripes / ncopies;
6786
6787 return div_u64(chunk_len, data_stripes);
6788}
6789
6790static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
6791 struct btrfs_chunk *chunk)
6792{
6793 struct btrfs_fs_info *fs_info = leaf->fs_info;
6794 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
6795 struct map_lookup *map;
6796 struct extent_map *em;
6797 u64 logical;
6798 u64 length;
6799 u64 devid;
6800 u8 uuid[BTRFS_UUID_SIZE];
6801 int num_stripes;
6802 int ret;
6803 int i;
6804
6805 logical = key->offset;
6806 length = btrfs_chunk_length(leaf, chunk);
6807 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6808
6809
6810
6811
6812
6813 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
6814 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
6815 if (ret)
6816 return ret;
6817 }
6818
6819 read_lock(&map_tree->lock);
6820 em = lookup_extent_mapping(map_tree, logical, 1);
6821 read_unlock(&map_tree->lock);
6822
6823
6824 if (em && em->start <= logical && em->start + em->len > logical) {
6825 free_extent_map(em);
6826 return 0;
6827 } else if (em) {
6828 free_extent_map(em);
6829 }
6830
6831 em = alloc_extent_map();
6832 if (!em)
6833 return -ENOMEM;
6834 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
6835 if (!map) {
6836 free_extent_map(em);
6837 return -ENOMEM;
6838 }
6839
6840 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
6841 em->map_lookup = map;
6842 em->start = logical;
6843 em->len = length;
6844 em->orig_start = 0;
6845 em->block_start = 0;
6846 em->block_len = em->len;
6847
6848 map->num_stripes = num_stripes;
6849 map->io_width = btrfs_chunk_io_width(leaf, chunk);
6850 map->io_align = btrfs_chunk_io_align(leaf, chunk);
6851 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6852 map->type = btrfs_chunk_type(leaf, chunk);
6853 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6854 map->verified_stripes = 0;
6855 em->orig_block_len = calc_stripe_length(map->type, em->len,
6856 map->num_stripes);
6857 for (i = 0; i < num_stripes; i++) {
6858 map->stripes[i].physical =
6859 btrfs_stripe_offset_nr(leaf, chunk, i);
6860 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
6861 read_extent_buffer(leaf, uuid, (unsigned long)
6862 btrfs_stripe_dev_uuid_nr(chunk, i),
6863 BTRFS_UUID_SIZE);
6864 map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
6865 devid, uuid, NULL);
6866 if (!map->stripes[i].dev &&
6867 !btrfs_test_opt(fs_info, DEGRADED)) {
6868 free_extent_map(em);
6869 btrfs_report_missing_device(fs_info, devid, uuid, true);
6870 return -ENOENT;
6871 }
6872 if (!map->stripes[i].dev) {
6873 map->stripes[i].dev =
6874 add_missing_dev(fs_info->fs_devices, devid,
6875 uuid);
6876 if (IS_ERR(map->stripes[i].dev)) {
6877 free_extent_map(em);
6878 btrfs_err(fs_info,
6879 "failed to init missing dev %llu: %ld",
6880 devid, PTR_ERR(map->stripes[i].dev));
6881 return PTR_ERR(map->stripes[i].dev);
6882 }
6883 btrfs_report_missing_device(fs_info, devid, uuid, false);
6884 }
6885 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
6886 &(map->stripes[i].dev->dev_state));
6887
6888 }
6889
6890 write_lock(&map_tree->lock);
6891 ret = add_extent_mapping(map_tree, em, 0);
6892 write_unlock(&map_tree->lock);
6893 if (ret < 0) {
6894 btrfs_err(fs_info,
6895 "failed to add chunk map, start=%llu len=%llu: %d",
6896 em->start, em->len, ret);
6897 }
6898 free_extent_map(em);
6899
6900 return ret;
6901}
6902
6903static void fill_device_from_item(struct extent_buffer *leaf,
6904 struct btrfs_dev_item *dev_item,
6905 struct btrfs_device *device)
6906{
6907 unsigned long ptr;
6908
6909 device->devid = btrfs_device_id(leaf, dev_item);
6910 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
6911 device->total_bytes = device->disk_total_bytes;
6912 device->commit_total_bytes = device->disk_total_bytes;
6913 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
6914 device->commit_bytes_used = device->bytes_used;
6915 device->type = btrfs_device_type(leaf, dev_item);
6916 device->io_align = btrfs_device_io_align(leaf, dev_item);
6917 device->io_width = btrfs_device_io_width(leaf, dev_item);
6918 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
6919 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
6920 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
6921
6922 ptr = btrfs_device_uuid(dev_item);
6923 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
6924}
6925
6926static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
6927 u8 *fsid)
6928{
6929 struct btrfs_fs_devices *fs_devices;
6930 int ret;
6931
6932 lockdep_assert_held(&uuid_mutex);
6933 ASSERT(fsid);
6934
6935
6936 list_for_each_entry(fs_devices, &fs_info->fs_devices->seed_list, seed_list)
6937 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
6938 return fs_devices;
6939
6940
6941 fs_devices = find_fsid(fsid, NULL);
6942 if (!fs_devices) {
6943 if (!btrfs_test_opt(fs_info, DEGRADED))
6944 return ERR_PTR(-ENOENT);
6945
6946 fs_devices = alloc_fs_devices(fsid, NULL);
6947 if (IS_ERR(fs_devices))
6948 return fs_devices;
6949
6950 fs_devices->seeding = true;
6951 fs_devices->opened = 1;
6952 return fs_devices;
6953 }
6954
6955
6956
6957
6958
6959 fs_devices = clone_fs_devices(fs_devices);
6960 if (IS_ERR(fs_devices))
6961 return fs_devices;
6962
6963 ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
6964 if (ret) {
6965 free_fs_devices(fs_devices);
6966 return ERR_PTR(ret);
6967 }
6968
6969 if (!fs_devices->seeding) {
6970 close_fs_devices(fs_devices);
6971 free_fs_devices(fs_devices);
6972 return ERR_PTR(-EINVAL);
6973 }
6974
6975 list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list);
6976
6977 return fs_devices;
6978}
6979
6980static int read_one_dev(struct extent_buffer *leaf,
6981 struct btrfs_dev_item *dev_item)
6982{
6983 struct btrfs_fs_info *fs_info = leaf->fs_info;
6984 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6985 struct btrfs_device *device;
6986 u64 devid;
6987 int ret;
6988 u8 fs_uuid[BTRFS_FSID_SIZE];
6989 u8 dev_uuid[BTRFS_UUID_SIZE];
6990
6991 devid = btrfs_device_id(leaf, dev_item);
6992 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
6993 BTRFS_UUID_SIZE);
6994 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
6995 BTRFS_FSID_SIZE);
6996
6997 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
6998 fs_devices = open_seed_devices(fs_info, fs_uuid);
6999 if (IS_ERR(fs_devices))
7000 return PTR_ERR(fs_devices);
7001 }
7002
7003 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
7004 fs_uuid);
7005 if (!device) {
7006 if (!btrfs_test_opt(fs_info, DEGRADED)) {
7007 btrfs_report_missing_device(fs_info, devid,
7008 dev_uuid, true);
7009 return -ENOENT;
7010 }
7011
7012 device = add_missing_dev(fs_devices, devid, dev_uuid);
7013 if (IS_ERR(device)) {
7014 btrfs_err(fs_info,
7015 "failed to add missing dev %llu: %ld",
7016 devid, PTR_ERR(device));
7017 return PTR_ERR(device);
7018 }
7019 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
7020 } else {
7021 if (!device->bdev) {
7022 if (!btrfs_test_opt(fs_info, DEGRADED)) {
7023 btrfs_report_missing_device(fs_info,
7024 devid, dev_uuid, true);
7025 return -ENOENT;
7026 }
7027 btrfs_report_missing_device(fs_info, devid,
7028 dev_uuid, false);
7029 }
7030
7031 if (!device->bdev &&
7032 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
7033
7034
7035
7036
7037
7038
7039 device->fs_devices->missing_devices++;
7040 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
7041 }
7042
7043
7044 if (device->fs_devices != fs_devices) {
7045 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
7046 &device->dev_state));
7047
7048 list_move(&device->dev_list, &fs_devices->devices);
7049 device->fs_devices->num_devices--;
7050 fs_devices->num_devices++;
7051
7052 device->fs_devices->missing_devices--;
7053 fs_devices->missing_devices++;
7054
7055 device->fs_devices = fs_devices;
7056 }
7057 }
7058
7059 if (device->fs_devices != fs_info->fs_devices) {
7060 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
7061 if (device->generation !=
7062 btrfs_device_generation(leaf, dev_item))
7063 return -EINVAL;
7064 }
7065
7066 fill_device_from_item(leaf, dev_item, device);
7067 if (device->bdev) {
7068 u64 max_total_bytes = i_size_read(device->bdev->bd_inode);
7069
7070 if (device->total_bytes > max_total_bytes) {
7071 btrfs_err(fs_info,
7072 "device total_bytes should be at most %llu but found %llu",
7073 max_total_bytes, device->total_bytes);
7074 return -EINVAL;
7075 }
7076 }
7077 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
7078 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
7079 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
7080 device->fs_devices->total_rw_bytes += device->total_bytes;
7081 atomic64_add(device->total_bytes - device->bytes_used,
7082 &fs_info->free_chunk_space);
7083 }
7084 ret = 0;
7085 return ret;
7086}
7087
7088int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
7089{
7090 struct btrfs_root *root = fs_info->tree_root;
7091 struct btrfs_super_block *super_copy = fs_info->super_copy;
7092 struct extent_buffer *sb;
7093 struct btrfs_disk_key *disk_key;
7094 struct btrfs_chunk *chunk;
7095 u8 *array_ptr;
7096 unsigned long sb_array_offset;
7097 int ret = 0;
7098 u32 num_stripes;
7099 u32 array_size;
7100 u32 len = 0;
7101 u32 cur_offset;
7102 u64 type;
7103 struct btrfs_key key;
7104
7105 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
7106
7107
7108
7109
7110
7111 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
7112 root->root_key.objectid, 0);
7113 if (IS_ERR(sb))
7114 return PTR_ERR(sb);
7115 set_extent_buffer_uptodate(sb);
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
7129 SetPageUptodate(sb->pages[0]);
7130
7131 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
7132 array_size = btrfs_super_sys_array_size(super_copy);
7133
7134 array_ptr = super_copy->sys_chunk_array;
7135 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
7136 cur_offset = 0;
7137
7138 while (cur_offset < array_size) {
7139 disk_key = (struct btrfs_disk_key *)array_ptr;
7140 len = sizeof(*disk_key);
7141 if (cur_offset + len > array_size)
7142 goto out_short_read;
7143
7144 btrfs_disk_key_to_cpu(&key, disk_key);
7145
7146 array_ptr += len;
7147 sb_array_offset += len;
7148 cur_offset += len;
7149
7150 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
7151 btrfs_err(fs_info,
7152 "unexpected item type %u in sys_array at offset %u",
7153 (u32)key.type, cur_offset);
7154 ret = -EIO;
7155 break;
7156 }
7157
7158 chunk = (struct btrfs_chunk *)sb_array_offset;
7159
7160
7161
7162
7163 len = btrfs_chunk_item_size(1);
7164 if (cur_offset + len > array_size)
7165 goto out_short_read;
7166
7167 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
7168 if (!num_stripes) {
7169 btrfs_err(fs_info,
7170 "invalid number of stripes %u in sys_array at offset %u",
7171 num_stripes, cur_offset);
7172 ret = -EIO;
7173 break;
7174 }
7175
7176 type = btrfs_chunk_type(sb, chunk);
7177 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
7178 btrfs_err(fs_info,
7179 "invalid chunk type %llu in sys_array at offset %u",
7180 type, cur_offset);
7181 ret = -EIO;
7182 break;
7183 }
7184
7185 len = btrfs_chunk_item_size(num_stripes);
7186 if (cur_offset + len > array_size)
7187 goto out_short_read;
7188
7189 ret = read_one_chunk(&key, sb, chunk);
7190 if (ret)
7191 break;
7192
7193 array_ptr += len;
7194 sb_array_offset += len;
7195 cur_offset += len;
7196 }
7197 clear_extent_buffer_uptodate(sb);
7198 free_extent_buffer_stale(sb);
7199 return ret;
7200
7201out_short_read:
7202 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
7203 len, cur_offset);
7204 clear_extent_buffer_uptodate(sb);
7205 free_extent_buffer_stale(sb);
7206 return -EIO;
7207}
7208
7209
7210
7211
7212
7213
7214
7215
7216
7217bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
7218 struct btrfs_device *failing_dev)
7219{
7220 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
7221 struct extent_map *em;
7222 u64 next_start = 0;
7223 bool ret = true;
7224
7225 read_lock(&map_tree->lock);
7226 em = lookup_extent_mapping(map_tree, 0, (u64)-1);
7227 read_unlock(&map_tree->lock);
7228
7229 if (!em) {
7230 ret = false;
7231 goto out;
7232 }
7233 while (em) {
7234 struct map_lookup *map;
7235 int missing = 0;
7236 int max_tolerated;
7237 int i;
7238
7239 map = em->map_lookup;
7240 max_tolerated =
7241 btrfs_get_num_tolerated_disk_barrier_failures(
7242 map->type);
7243 for (i = 0; i < map->num_stripes; i++) {
7244 struct btrfs_device *dev = map->stripes[i].dev;
7245
7246 if (!dev || !dev->bdev ||
7247 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
7248 dev->last_flush_error)
7249 missing++;
7250 else if (failing_dev && failing_dev == dev)
7251 missing++;
7252 }
7253 if (missing > max_tolerated) {
7254 if (!failing_dev)
7255 btrfs_warn(fs_info,
7256 "chunk %llu missing %d devices, max tolerance is %d for writable mount",
7257 em->start, missing, max_tolerated);
7258 free_extent_map(em);
7259 ret = false;
7260 goto out;
7261 }
7262 next_start = extent_map_end(em);
7263 free_extent_map(em);
7264
7265 read_lock(&map_tree->lock);
7266 em = lookup_extent_mapping(map_tree, next_start,
7267 (u64)(-1) - next_start);
7268 read_unlock(&map_tree->lock);
7269 }
7270out:
7271 return ret;
7272}
7273
7274static void readahead_tree_node_children(struct extent_buffer *node)
7275{
7276 int i;
7277 const int nr_items = btrfs_header_nritems(node);
7278
7279 for (i = 0; i < nr_items; i++)
7280 btrfs_readahead_node_child(node, i);
7281}
7282
7283int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7284{
7285 struct btrfs_root *root = fs_info->chunk_root;
7286 struct btrfs_path *path;
7287 struct extent_buffer *leaf;
7288 struct btrfs_key key;
7289 struct btrfs_key found_key;
7290 int ret;
7291 int slot;
7292 u64 total_dev = 0;
7293 u64 last_ra_node = 0;
7294
7295 path = btrfs_alloc_path();
7296 if (!path)
7297 return -ENOMEM;
7298
7299
7300
7301
7302
7303 mutex_lock(&uuid_mutex);
7304
7305
7306
7307
7308
7309
7310
7311 fs_info->fs_devices->total_rw_bytes = 0;
7312
7313
7314
7315
7316
7317
7318
7319 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7320 key.offset = 0;
7321 key.type = 0;
7322 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7323 if (ret < 0)
7324 goto error;
7325 while (1) {
7326 struct extent_buffer *node;
7327
7328 leaf = path->nodes[0];
7329 slot = path->slots[0];
7330 if (slot >= btrfs_header_nritems(leaf)) {
7331 ret = btrfs_next_leaf(root, path);
7332 if (ret == 0)
7333 continue;
7334 if (ret < 0)
7335 goto error;
7336 break;
7337 }
7338
7339
7340
7341
7342 node = path->nodes[1];
7343 if (node) {
7344 if (last_ra_node != node->start) {
7345 readahead_tree_node_children(node);
7346 last_ra_node = node->start;
7347 }
7348 }
7349 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7350 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7351 struct btrfs_dev_item *dev_item;
7352 dev_item = btrfs_item_ptr(leaf, slot,
7353 struct btrfs_dev_item);
7354 ret = read_one_dev(leaf, dev_item);
7355 if (ret)
7356 goto error;
7357 total_dev++;
7358 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7359 struct btrfs_chunk *chunk;
7360 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7361 mutex_lock(&fs_info->chunk_mutex);
7362 ret = read_one_chunk(&found_key, leaf, chunk);
7363 mutex_unlock(&fs_info->chunk_mutex);
7364 if (ret)
7365 goto error;
7366 }
7367 path->slots[0]++;
7368 }
7369
7370
7371
7372
7373
7374 if (total_dev != fs_info->fs_devices->total_devices) {
7375 btrfs_err(fs_info,
7376 "super_num_devices %llu mismatch with num_devices %llu found here",
7377 btrfs_super_num_devices(fs_info->super_copy),
7378 total_dev);
7379 ret = -EINVAL;
7380 goto error;
7381 }
7382 if (btrfs_super_total_bytes(fs_info->super_copy) <
7383 fs_info->fs_devices->total_rw_bytes) {
7384 btrfs_err(fs_info,
7385 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7386 btrfs_super_total_bytes(fs_info->super_copy),
7387 fs_info->fs_devices->total_rw_bytes);
7388 ret = -EINVAL;
7389 goto error;
7390 }
7391 ret = 0;
7392error:
7393 mutex_unlock(&uuid_mutex);
7394
7395 btrfs_free_path(path);
7396 return ret;
7397}
7398
7399void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7400{
7401 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
7402 struct btrfs_device *device;
7403
7404 fs_devices->fs_info = fs_info;
7405
7406 mutex_lock(&fs_devices->device_list_mutex);
7407 list_for_each_entry(device, &fs_devices->devices, dev_list)
7408 device->fs_info = fs_info;
7409
7410 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
7411 list_for_each_entry(device, &seed_devs->devices, dev_list)
7412 device->fs_info = fs_info;
7413
7414 seed_devs->fs_info = fs_info;
7415 }
7416 mutex_unlock(&fs_devices->device_list_mutex);
7417}
7418
7419static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
7420 const struct btrfs_dev_stats_item *ptr,
7421 int index)
7422{
7423 u64 val;
7424
7425 read_extent_buffer(eb, &val,
7426 offsetof(struct btrfs_dev_stats_item, values) +
7427 ((unsigned long)ptr) + (index * sizeof(u64)),
7428 sizeof(val));
7429 return val;
7430}
7431
7432static void btrfs_set_dev_stats_value(struct extent_buffer *eb,
7433 struct btrfs_dev_stats_item *ptr,
7434 int index, u64 val)
7435{
7436 write_extent_buffer(eb, &val,
7437 offsetof(struct btrfs_dev_stats_item, values) +
7438 ((unsigned long)ptr) + (index * sizeof(u64)),
7439 sizeof(val));
7440}
7441
7442static int btrfs_device_init_dev_stats(struct btrfs_device *device,
7443 struct btrfs_path *path)
7444{
7445 struct btrfs_dev_stats_item *ptr;
7446 struct extent_buffer *eb;
7447 struct btrfs_key key;
7448 int item_size;
7449 int i, ret, slot;
7450
7451 if (!device->fs_info->dev_root)
7452 return 0;
7453
7454 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7455 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7456 key.offset = device->devid;
7457 ret = btrfs_search_slot(NULL, device->fs_info->dev_root, &key, path, 0, 0);
7458 if (ret) {
7459 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7460 btrfs_dev_stat_set(device, i, 0);
7461 device->dev_stats_valid = 1;
7462 btrfs_release_path(path);
7463 return ret < 0 ? ret : 0;
7464 }
7465 slot = path->slots[0];
7466 eb = path->nodes[0];
7467 item_size = btrfs_item_size_nr(eb, slot);
7468
7469 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_stats_item);
7470
7471 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7472 if (item_size >= (1 + i) * sizeof(__le64))
7473 btrfs_dev_stat_set(device, i,
7474 btrfs_dev_stats_value(eb, ptr, i));
7475 else
7476 btrfs_dev_stat_set(device, i, 0);
7477 }
7478
7479 device->dev_stats_valid = 1;
7480 btrfs_dev_stat_print_on_load(device);
7481 btrfs_release_path(path);
7482
7483 return 0;
7484}
7485
7486int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7487{
7488 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
7489 struct btrfs_device *device;
7490 struct btrfs_path *path = NULL;
7491 int ret = 0;
7492
7493 path = btrfs_alloc_path();
7494 if (!path)
7495 return -ENOMEM;
7496
7497 mutex_lock(&fs_devices->device_list_mutex);
7498 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7499 ret = btrfs_device_init_dev_stats(device, path);
7500 if (ret)
7501 goto out;
7502 }
7503 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
7504 list_for_each_entry(device, &seed_devs->devices, dev_list) {
7505 ret = btrfs_device_init_dev_stats(device, path);
7506 if (ret)
7507 goto out;
7508 }
7509 }
7510out:
7511 mutex_unlock(&fs_devices->device_list_mutex);
7512
7513 btrfs_free_path(path);
7514 return ret;
7515}
7516
7517static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7518 struct btrfs_device *device)
7519{
7520 struct btrfs_fs_info *fs_info = trans->fs_info;
7521 struct btrfs_root *dev_root = fs_info->dev_root;
7522 struct btrfs_path *path;
7523 struct btrfs_key key;
7524 struct extent_buffer *eb;
7525 struct btrfs_dev_stats_item *ptr;
7526 int ret;
7527 int i;
7528
7529 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7530 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7531 key.offset = device->devid;
7532
7533 path = btrfs_alloc_path();
7534 if (!path)
7535 return -ENOMEM;
7536 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7537 if (ret < 0) {
7538 btrfs_warn_in_rcu(fs_info,
7539 "error %d while searching for dev_stats item for device %s",
7540 ret, rcu_str_deref(device->name));
7541 goto out;
7542 }
7543
7544 if (ret == 0 &&
7545 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7546
7547 ret = btrfs_del_item(trans, dev_root, path);
7548 if (ret != 0) {
7549 btrfs_warn_in_rcu(fs_info,
7550 "delete too small dev_stats item for device %s failed %d",
7551 rcu_str_deref(device->name), ret);
7552 goto out;
7553 }
7554 ret = 1;
7555 }
7556
7557 if (ret == 1) {
7558
7559 btrfs_release_path(path);
7560 ret = btrfs_insert_empty_item(trans, dev_root, path,
7561 &key, sizeof(*ptr));
7562 if (ret < 0) {
7563 btrfs_warn_in_rcu(fs_info,
7564 "insert dev_stats item for device %s failed %d",
7565 rcu_str_deref(device->name), ret);
7566 goto out;
7567 }
7568 }
7569
7570 eb = path->nodes[0];
7571 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7572 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7573 btrfs_set_dev_stats_value(eb, ptr, i,
7574 btrfs_dev_stat_read(device, i));
7575 btrfs_mark_buffer_dirty(eb);
7576
7577out:
7578 btrfs_free_path(path);
7579 return ret;
7580}
7581
7582
7583
7584
7585int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7586{
7587 struct btrfs_fs_info *fs_info = trans->fs_info;
7588 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7589 struct btrfs_device *device;
7590 int stats_cnt;
7591 int ret = 0;
7592
7593 mutex_lock(&fs_devices->device_list_mutex);
7594 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7595 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7596 if (!device->dev_stats_valid || stats_cnt == 0)
7597 continue;
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611 smp_rmb();
7612
7613 ret = update_dev_stat_item(trans, device);
7614 if (!ret)
7615 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7616 }
7617 mutex_unlock(&fs_devices->device_list_mutex);
7618
7619 return ret;
7620}
7621
7622void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7623{
7624 btrfs_dev_stat_inc(dev, index);
7625 btrfs_dev_stat_print_on_error(dev);
7626}
7627
7628static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7629{
7630 if (!dev->dev_stats_valid)
7631 return;
7632 btrfs_err_rl_in_rcu(dev->fs_info,
7633 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7634 rcu_str_deref(dev->name),
7635 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7636 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7637 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7638 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7639 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7640}
7641
7642static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7643{
7644 int i;
7645
7646 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7647 if (btrfs_dev_stat_read(dev, i) != 0)
7648 break;
7649 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7650 return;
7651
7652 btrfs_info_in_rcu(dev->fs_info,
7653 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7654 rcu_str_deref(dev->name),
7655 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7656 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7657 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7658 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7659 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7660}
7661
7662int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7663 struct btrfs_ioctl_get_dev_stats *stats)
7664{
7665 struct btrfs_device *dev;
7666 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7667 int i;
7668
7669 mutex_lock(&fs_devices->device_list_mutex);
7670 dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
7671 mutex_unlock(&fs_devices->device_list_mutex);
7672
7673 if (!dev) {
7674 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7675 return -ENODEV;
7676 } else if (!dev->dev_stats_valid) {
7677 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7678 return -ENODEV;
7679 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7680 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7681 if (stats->nr_items > i)
7682 stats->values[i] =
7683 btrfs_dev_stat_read_and_reset(dev, i);
7684 else
7685 btrfs_dev_stat_set(dev, i, 0);
7686 }
7687 btrfs_info(fs_info, "device stats zeroed by %s (%d)",
7688 current->comm, task_pid_nr(current));
7689 } else {
7690 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7691 if (stats->nr_items > i)
7692 stats->values[i] = btrfs_dev_stat_read(dev, i);
7693 }
7694 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7695 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7696 return 0;
7697}
7698
7699
7700
7701
7702
7703
7704
7705
7706void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7707{
7708 struct btrfs_device *curr, *next;
7709
7710 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7711
7712 if (list_empty(&trans->dev_update_list))
7713 return;
7714
7715
7716
7717
7718
7719
7720 mutex_lock(&trans->fs_info->chunk_mutex);
7721 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7722 post_commit_list) {
7723 list_del_init(&curr->post_commit_list);
7724 curr->commit_total_bytes = curr->disk_total_bytes;
7725 curr->commit_bytes_used = curr->bytes_used;
7726 }
7727 mutex_unlock(&trans->fs_info->chunk_mutex);
7728}
7729
7730
7731
7732
7733int btrfs_bg_type_to_factor(u64 flags)
7734{
7735 const int index = btrfs_bg_flags_to_raid_index(flags);
7736
7737 return btrfs_raid_array[index].ncopies;
7738}
7739
7740
7741
7742static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7743 u64 chunk_offset, u64 devid,
7744 u64 physical_offset, u64 physical_len)
7745{
7746 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7747 struct extent_map *em;
7748 struct map_lookup *map;
7749 struct btrfs_device *dev;
7750 u64 stripe_len;
7751 bool found = false;
7752 int ret = 0;
7753 int i;
7754
7755 read_lock(&em_tree->lock);
7756 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
7757 read_unlock(&em_tree->lock);
7758
7759 if (!em) {
7760 btrfs_err(fs_info,
7761"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
7762 physical_offset, devid);
7763 ret = -EUCLEAN;
7764 goto out;
7765 }
7766
7767 map = em->map_lookup;
7768 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
7769 if (physical_len != stripe_len) {
7770 btrfs_err(fs_info,
7771"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
7772 physical_offset, devid, em->start, physical_len,
7773 stripe_len);
7774 ret = -EUCLEAN;
7775 goto out;
7776 }
7777
7778 for (i = 0; i < map->num_stripes; i++) {
7779 if (map->stripes[i].dev->devid == devid &&
7780 map->stripes[i].physical == physical_offset) {
7781 found = true;
7782 if (map->verified_stripes >= map->num_stripes) {
7783 btrfs_err(fs_info,
7784 "too many dev extents for chunk %llu found",
7785 em->start);
7786 ret = -EUCLEAN;
7787 goto out;
7788 }
7789 map->verified_stripes++;
7790 break;
7791 }
7792 }
7793 if (!found) {
7794 btrfs_err(fs_info,
7795 "dev extent physical offset %llu devid %llu has no corresponding chunk",
7796 physical_offset, devid);
7797 ret = -EUCLEAN;
7798 }
7799
7800
7801 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
7802 if (!dev) {
7803 btrfs_err(fs_info, "failed to find devid %llu", devid);
7804 ret = -EUCLEAN;
7805 goto out;
7806 }
7807
7808 if (physical_offset + physical_len > dev->disk_total_bytes) {
7809 btrfs_err(fs_info,
7810"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
7811 devid, physical_offset, physical_len,
7812 dev->disk_total_bytes);
7813 ret = -EUCLEAN;
7814 goto out;
7815 }
7816
7817 if (dev->zone_info) {
7818 u64 zone_size = dev->zone_info->zone_size;
7819
7820 if (!IS_ALIGNED(physical_offset, zone_size) ||
7821 !IS_ALIGNED(physical_len, zone_size)) {
7822 btrfs_err(fs_info,
7823"zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone",
7824 devid, physical_offset, physical_len);
7825 ret = -EUCLEAN;
7826 goto out;
7827 }
7828 }
7829
7830out:
7831 free_extent_map(em);
7832 return ret;
7833}
7834
7835static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
7836{
7837 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7838 struct extent_map *em;
7839 struct rb_node *node;
7840 int ret = 0;
7841
7842 read_lock(&em_tree->lock);
7843 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
7844 em = rb_entry(node, struct extent_map, rb_node);
7845 if (em->map_lookup->num_stripes !=
7846 em->map_lookup->verified_stripes) {
7847 btrfs_err(fs_info,
7848 "chunk %llu has missing dev extent, have %d expect %d",
7849 em->start, em->map_lookup->verified_stripes,
7850 em->map_lookup->num_stripes);
7851 ret = -EUCLEAN;
7852 goto out;
7853 }
7854 }
7855out:
7856 read_unlock(&em_tree->lock);
7857 return ret;
7858}
7859
7860
7861
7862
7863
7864
7865
7866
7867int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
7868{
7869 struct btrfs_path *path;
7870 struct btrfs_root *root = fs_info->dev_root;
7871 struct btrfs_key key;
7872 u64 prev_devid = 0;
7873 u64 prev_dev_ext_end = 0;
7874 int ret = 0;
7875
7876
7877
7878
7879
7880
7881
7882
7883
7884
7885
7886 if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
7887 return 0;
7888
7889 key.objectid = 1;
7890 key.type = BTRFS_DEV_EXTENT_KEY;
7891 key.offset = 0;
7892
7893 path = btrfs_alloc_path();
7894 if (!path)
7895 return -ENOMEM;
7896
7897 path->reada = READA_FORWARD;
7898 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7899 if (ret < 0)
7900 goto out;
7901
7902 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7903 ret = btrfs_next_item(root, path);
7904 if (ret < 0)
7905 goto out;
7906
7907 if (ret > 0) {
7908 ret = -EUCLEAN;
7909 goto out;
7910 }
7911 }
7912 while (1) {
7913 struct extent_buffer *leaf = path->nodes[0];
7914 struct btrfs_dev_extent *dext;
7915 int slot = path->slots[0];
7916 u64 chunk_offset;
7917 u64 physical_offset;
7918 u64 physical_len;
7919 u64 devid;
7920
7921 btrfs_item_key_to_cpu(leaf, &key, slot);
7922 if (key.type != BTRFS_DEV_EXTENT_KEY)
7923 break;
7924 devid = key.objectid;
7925 physical_offset = key.offset;
7926
7927 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7928 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
7929 physical_len = btrfs_dev_extent_length(leaf, dext);
7930
7931
7932 if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
7933 btrfs_err(fs_info,
7934"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
7935 devid, physical_offset, prev_dev_ext_end);
7936 ret = -EUCLEAN;
7937 goto out;
7938 }
7939
7940 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
7941 physical_offset, physical_len);
7942 if (ret < 0)
7943 goto out;
7944 prev_devid = devid;
7945 prev_dev_ext_end = physical_offset + physical_len;
7946
7947 ret = btrfs_next_item(root, path);
7948 if (ret < 0)
7949 goto out;
7950 if (ret > 0) {
7951 ret = 0;
7952 break;
7953 }
7954 }
7955
7956
7957 ret = verify_chunk_dev_extent_mapping(fs_info);
7958out:
7959 btrfs_free_path(path);
7960 return ret;
7961}
7962
7963
7964
7965
7966
7967bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
7968{
7969 struct btrfs_swapfile_pin *sp;
7970 struct rb_node *node;
7971
7972 spin_lock(&fs_info->swapfile_pins_lock);
7973 node = fs_info->swapfile_pins.rb_node;
7974 while (node) {
7975 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
7976 if (ptr < sp->ptr)
7977 node = node->rb_left;
7978 else if (ptr > sp->ptr)
7979 node = node->rb_right;
7980 else
7981 break;
7982 }
7983 spin_unlock(&fs_info->swapfile_pins_lock);
7984 return node != NULL;
7985}
7986
7987static int relocating_repair_kthread(void *data)
7988{
7989 struct btrfs_block_group *cache = (struct btrfs_block_group *)data;
7990 struct btrfs_fs_info *fs_info = cache->fs_info;
7991 u64 target;
7992 int ret = 0;
7993
7994 target = cache->start;
7995 btrfs_put_block_group(cache);
7996
7997 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
7998 btrfs_info(fs_info,
7999 "zoned: skip relocating block group %llu to repair: EBUSY",
8000 target);
8001 return -EBUSY;
8002 }
8003
8004 mutex_lock(&fs_info->delete_unused_bgs_mutex);
8005
8006
8007 cache = btrfs_lookup_block_group(fs_info, target);
8008 if (!cache)
8009 goto out;
8010
8011 if (!cache->relocating_repair)
8012 goto out;
8013
8014 ret = btrfs_may_alloc_data_chunk(fs_info, target);
8015 if (ret < 0)
8016 goto out;
8017
8018 btrfs_info(fs_info,
8019 "zoned: relocating block group %llu to repair IO failure",
8020 target);
8021 ret = btrfs_relocate_chunk(fs_info, target);
8022
8023out:
8024 if (cache)
8025 btrfs_put_block_group(cache);
8026 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
8027 btrfs_exclop_finish(fs_info);
8028
8029 return ret;
8030}
8031
8032int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
8033{
8034 struct btrfs_block_group *cache;
8035
8036
8037 if (btrfs_test_opt(fs_info, DEGRADED))
8038 return 0;
8039
8040 cache = btrfs_lookup_block_group(fs_info, logical);
8041 if (!cache)
8042 return 0;
8043
8044 spin_lock(&cache->lock);
8045 if (cache->relocating_repair) {
8046 spin_unlock(&cache->lock);
8047 btrfs_put_block_group(cache);
8048 return 0;
8049 }
8050 cache->relocating_repair = 1;
8051 spin_unlock(&cache->lock);
8052
8053 kthread_run(relocating_repair_kthread, cache,
8054 "btrfs-relocating-repair");
8055
8056 return 0;
8057}
8058