1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/sched/mm.h>
8#include <linux/bio.h>
9#include <linux/slab.h>
10#include <linux/blkdev.h>
11#include <linux/ratelimit.h>
12#include <linux/kthread.h>
13#include <linux/raid/pq.h>
14#include <linux/semaphore.h>
15#include <linux/uuid.h>
16#include <linux/list_sort.h>
17#include "misc.h"
18#include "ctree.h"
19#include "extent_map.h"
20#include "disk-io.h"
21#include "transaction.h"
22#include "print-tree.h"
23#include "volumes.h"
24#include "raid56.h"
25#include "async-thread.h"
26#include "check-integrity.h"
27#include "rcu-string.h"
28#include "dev-replace.h"
29#include "sysfs.h"
30#include "tree-checker.h"
31#include "space-info.h"
32#include "block-group.h"
33#include "discard.h"
34#include "zoned.h"
35
36const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
37 [BTRFS_RAID_RAID10] = {
38 .sub_stripes = 2,
39 .dev_stripes = 1,
40 .devs_max = 0,
41 .devs_min = 4,
42 .tolerated_failures = 1,
43 .devs_increment = 2,
44 .ncopies = 2,
45 .nparity = 0,
46 .raid_name = "raid10",
47 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
48 .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
49 },
50 [BTRFS_RAID_RAID1] = {
51 .sub_stripes = 1,
52 .dev_stripes = 1,
53 .devs_max = 2,
54 .devs_min = 2,
55 .tolerated_failures = 1,
56 .devs_increment = 2,
57 .ncopies = 2,
58 .nparity = 0,
59 .raid_name = "raid1",
60 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
61 .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
62 },
63 [BTRFS_RAID_RAID1C3] = {
64 .sub_stripes = 1,
65 .dev_stripes = 1,
66 .devs_max = 3,
67 .devs_min = 3,
68 .tolerated_failures = 2,
69 .devs_increment = 3,
70 .ncopies = 3,
71 .nparity = 0,
72 .raid_name = "raid1c3",
73 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
74 .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
75 },
76 [BTRFS_RAID_RAID1C4] = {
77 .sub_stripes = 1,
78 .dev_stripes = 1,
79 .devs_max = 4,
80 .devs_min = 4,
81 .tolerated_failures = 3,
82 .devs_increment = 4,
83 .ncopies = 4,
84 .nparity = 0,
85 .raid_name = "raid1c4",
86 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
87 .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
88 },
89 [BTRFS_RAID_DUP] = {
90 .sub_stripes = 1,
91 .dev_stripes = 2,
92 .devs_max = 1,
93 .devs_min = 1,
94 .tolerated_failures = 0,
95 .devs_increment = 1,
96 .ncopies = 2,
97 .nparity = 0,
98 .raid_name = "dup",
99 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
100 .mindev_error = 0,
101 },
102 [BTRFS_RAID_RAID0] = {
103 .sub_stripes = 1,
104 .dev_stripes = 1,
105 .devs_max = 0,
106 .devs_min = 2,
107 .tolerated_failures = 0,
108 .devs_increment = 1,
109 .ncopies = 1,
110 .nparity = 0,
111 .raid_name = "raid0",
112 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
113 .mindev_error = 0,
114 },
115 [BTRFS_RAID_SINGLE] = {
116 .sub_stripes = 1,
117 .dev_stripes = 1,
118 .devs_max = 1,
119 .devs_min = 1,
120 .tolerated_failures = 0,
121 .devs_increment = 1,
122 .ncopies = 1,
123 .nparity = 0,
124 .raid_name = "single",
125 .bg_flag = 0,
126 .mindev_error = 0,
127 },
128 [BTRFS_RAID_RAID5] = {
129 .sub_stripes = 1,
130 .dev_stripes = 1,
131 .devs_max = 0,
132 .devs_min = 2,
133 .tolerated_failures = 1,
134 .devs_increment = 1,
135 .ncopies = 1,
136 .nparity = 1,
137 .raid_name = "raid5",
138 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
139 .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
140 },
141 [BTRFS_RAID_RAID6] = {
142 .sub_stripes = 1,
143 .dev_stripes = 1,
144 .devs_max = 0,
145 .devs_min = 3,
146 .tolerated_failures = 2,
147 .devs_increment = 1,
148 .ncopies = 1,
149 .nparity = 2,
150 .raid_name = "raid6",
151 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
152 .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
153 },
154};
155
156const char *btrfs_bg_type_to_raid_name(u64 flags)
157{
158 const int index = btrfs_bg_flags_to_raid_index(flags);
159
160 if (index >= BTRFS_NR_RAID_TYPES)
161 return NULL;
162
163 return btrfs_raid_array[index].raid_name;
164}
165
166
167
168
169
170void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
171{
172 int i;
173 int ret;
174 char *bp = buf;
175 u64 flags = bg_flags;
176 u32 size_bp = size_buf;
177
178 if (!flags) {
179 strcpy(bp, "NONE");
180 return;
181 }
182
183#define DESCRIBE_FLAG(flag, desc) \
184 do { \
185 if (flags & (flag)) { \
186 ret = snprintf(bp, size_bp, "%s|", (desc)); \
187 if (ret < 0 || ret >= size_bp) \
188 goto out_overflow; \
189 size_bp -= ret; \
190 bp += ret; \
191 flags &= ~(flag); \
192 } \
193 } while (0)
194
195 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
196 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
197 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
198
199 DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
200 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
201 DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
202 btrfs_raid_array[i].raid_name);
203#undef DESCRIBE_FLAG
204
205 if (flags) {
206 ret = snprintf(bp, size_bp, "0x%llx|", flags);
207 size_bp -= ret;
208 }
209
210 if (size_bp < size_buf)
211 buf[size_buf - size_bp - 1] = '\0';
212
213
214
215
216
217out_overflow:;
218}
219
220static int init_first_rw_device(struct btrfs_trans_handle *trans);
221static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
222static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
223static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
224static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
225 enum btrfs_map_op op,
226 u64 logical, u64 *length,
227 struct btrfs_bio **bbio_ret,
228 int mirror_num, int need_raid_map);
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330DEFINE_MUTEX(uuid_mutex);
331static LIST_HEAD(fs_uuids);
332struct list_head * __attribute_const__ btrfs_get_fs_uuids(void)
333{
334 return &fs_uuids;
335}
336
337
338
339
340
341
342
343
344
345
346static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
347 const u8 *metadata_fsid)
348{
349 struct btrfs_fs_devices *fs_devs;
350
351 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
352 if (!fs_devs)
353 return ERR_PTR(-ENOMEM);
354
355 mutex_init(&fs_devs->device_list_mutex);
356
357 INIT_LIST_HEAD(&fs_devs->devices);
358 INIT_LIST_HEAD(&fs_devs->alloc_list);
359 INIT_LIST_HEAD(&fs_devs->fs_list);
360 INIT_LIST_HEAD(&fs_devs->seed_list);
361 if (fsid)
362 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
363
364 if (metadata_fsid)
365 memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
366 else if (fsid)
367 memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
368
369 return fs_devs;
370}
371
372void btrfs_free_device(struct btrfs_device *device)
373{
374 WARN_ON(!list_empty(&device->post_commit_list));
375 rcu_string_free(device->name);
376 extent_io_tree_release(&device->alloc_state);
377 bio_put(device->flush_bio);
378 btrfs_destroy_dev_zone_info(device);
379 kfree(device);
380}
381
382static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
383{
384 struct btrfs_device *device;
385 WARN_ON(fs_devices->opened);
386 while (!list_empty(&fs_devices->devices)) {
387 device = list_entry(fs_devices->devices.next,
388 struct btrfs_device, dev_list);
389 list_del(&device->dev_list);
390 btrfs_free_device(device);
391 }
392 kfree(fs_devices);
393}
394
395void __exit btrfs_cleanup_fs_uuids(void)
396{
397 struct btrfs_fs_devices *fs_devices;
398
399 while (!list_empty(&fs_uuids)) {
400 fs_devices = list_entry(fs_uuids.next,
401 struct btrfs_fs_devices, fs_list);
402 list_del(&fs_devices->fs_list);
403 free_fs_devices(fs_devices);
404 }
405}
406
407
408
409
410
411
412static struct btrfs_device *__alloc_device(struct btrfs_fs_info *fs_info)
413{
414 struct btrfs_device *dev;
415
416 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
417 if (!dev)
418 return ERR_PTR(-ENOMEM);
419
420
421
422
423
424 dev->flush_bio = bio_kmalloc(GFP_KERNEL, 0);
425 if (!dev->flush_bio) {
426 kfree(dev);
427 return ERR_PTR(-ENOMEM);
428 }
429
430 INIT_LIST_HEAD(&dev->dev_list);
431 INIT_LIST_HEAD(&dev->dev_alloc_list);
432 INIT_LIST_HEAD(&dev->post_commit_list);
433
434 atomic_set(&dev->reada_in_flight, 0);
435 atomic_set(&dev->dev_stats_ccnt, 0);
436 btrfs_device_data_ordered_init(dev);
437 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
438 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
439 extent_io_tree_init(fs_info, &dev->alloc_state,
440 IO_TREE_DEVICE_ALLOC_STATE, NULL);
441
442 return dev;
443}
444
445static noinline struct btrfs_fs_devices *find_fsid(
446 const u8 *fsid, const u8 *metadata_fsid)
447{
448 struct btrfs_fs_devices *fs_devices;
449
450 ASSERT(fsid);
451
452
453 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
454 if (metadata_fsid) {
455 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
456 && memcmp(metadata_fsid, fs_devices->metadata_uuid,
457 BTRFS_FSID_SIZE) == 0)
458 return fs_devices;
459 } else {
460 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
461 return fs_devices;
462 }
463 }
464 return NULL;
465}
466
467static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
468 struct btrfs_super_block *disk_super)
469{
470
471 struct btrfs_fs_devices *fs_devices;
472
473
474
475
476
477
478
479 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
480 if (fs_devices->fsid_change &&
481 memcmp(disk_super->metadata_uuid, fs_devices->fsid,
482 BTRFS_FSID_SIZE) == 0 &&
483 memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
484 BTRFS_FSID_SIZE) == 0) {
485 return fs_devices;
486 }
487 }
488
489
490
491
492
493
494 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
495 if (fs_devices->fsid_change &&
496 memcmp(fs_devices->metadata_uuid,
497 fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
498 memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
499 BTRFS_FSID_SIZE) == 0) {
500 return fs_devices;
501 }
502 }
503
504 return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
505}
506
507
508static int
509btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
510 int flush, struct block_device **bdev,
511 struct btrfs_super_block **disk_super)
512{
513 int ret;
514
515 *bdev = blkdev_get_by_path(device_path, flags, holder);
516
517 if (IS_ERR(*bdev)) {
518 ret = PTR_ERR(*bdev);
519 goto error;
520 }
521
522 if (flush)
523 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
524 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
525 if (ret) {
526 blkdev_put(*bdev, flags);
527 goto error;
528 }
529 invalidate_bdev(*bdev);
530 *disk_super = btrfs_read_dev_super(*bdev);
531 if (IS_ERR(*disk_super)) {
532 ret = PTR_ERR(*disk_super);
533 blkdev_put(*bdev, flags);
534 goto error;
535 }
536
537 return 0;
538
539error:
540 *bdev = NULL;
541 return ret;
542}
543
544static bool device_path_matched(const char *path, struct btrfs_device *device)
545{
546 int found;
547
548 rcu_read_lock();
549 found = strcmp(rcu_str_deref(device->name), path);
550 rcu_read_unlock();
551
552 return found == 0;
553}
554
555
556
557
558
559
560
561
562
563
564
565
566static int btrfs_free_stale_devices(const char *path,
567 struct btrfs_device *skip_device)
568{
569 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
570 struct btrfs_device *device, *tmp_device;
571 int ret = 0;
572
573 if (path)
574 ret = -ENOENT;
575
576 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
577
578 mutex_lock(&fs_devices->device_list_mutex);
579 list_for_each_entry_safe(device, tmp_device,
580 &fs_devices->devices, dev_list) {
581 if (skip_device && skip_device == device)
582 continue;
583 if (path && !device->name)
584 continue;
585 if (path && !device_path_matched(path, device))
586 continue;
587 if (fs_devices->opened) {
588
589 if (path && ret != 0)
590 ret = -EBUSY;
591 break;
592 }
593
594
595 fs_devices->num_devices--;
596 list_del(&device->dev_list);
597 btrfs_free_device(device);
598
599 ret = 0;
600 }
601 mutex_unlock(&fs_devices->device_list_mutex);
602
603 if (fs_devices->num_devices == 0) {
604 btrfs_sysfs_remove_fsid(fs_devices);
605 list_del(&fs_devices->fs_list);
606 free_fs_devices(fs_devices);
607 }
608 }
609
610 return ret;
611}
612
613
614
615
616
617
618static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
619 struct btrfs_device *device, fmode_t flags,
620 void *holder)
621{
622 struct request_queue *q;
623 struct block_device *bdev;
624 struct btrfs_super_block *disk_super;
625 u64 devid;
626 int ret;
627
628 if (device->bdev)
629 return -EINVAL;
630 if (!device->name)
631 return -EINVAL;
632
633 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
634 &bdev, &disk_super);
635 if (ret)
636 return ret;
637
638 devid = btrfs_stack_device_id(&disk_super->dev_item);
639 if (devid != device->devid)
640 goto error_free_page;
641
642 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
643 goto error_free_page;
644
645 device->generation = btrfs_super_generation(disk_super);
646
647 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
648 if (btrfs_super_incompat_flags(disk_super) &
649 BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
650 pr_err(
651 "BTRFS: Invalid seeding and uuid-changed device detected\n");
652 goto error_free_page;
653 }
654
655 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
656 fs_devices->seeding = true;
657 } else {
658 if (bdev_read_only(bdev))
659 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
660 else
661 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
662 }
663
664 q = bdev_get_queue(bdev);
665 if (!blk_queue_nonrot(q))
666 fs_devices->rotating = true;
667
668 device->bdev = bdev;
669 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
670 device->mode = flags;
671
672 fs_devices->open_devices++;
673 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
674 device->devid != BTRFS_DEV_REPLACE_DEVID) {
675 fs_devices->rw_devices++;
676 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
677 }
678 btrfs_release_disk_super(disk_super);
679
680 return 0;
681
682error_free_page:
683 btrfs_release_disk_super(disk_super);
684 blkdev_put(bdev, flags);
685
686 return -EINVAL;
687}
688
689
690
691
692
693
694
695static struct btrfs_fs_devices *find_fsid_inprogress(
696 struct btrfs_super_block *disk_super)
697{
698 struct btrfs_fs_devices *fs_devices;
699
700 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
701 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
702 BTRFS_FSID_SIZE) != 0 &&
703 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
704 BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
705 return fs_devices;
706 }
707 }
708
709 return find_fsid(disk_super->fsid, NULL);
710}
711
712
713static struct btrfs_fs_devices *find_fsid_changed(
714 struct btrfs_super_block *disk_super)
715{
716 struct btrfs_fs_devices *fs_devices;
717
718
719
720
721
722
723
724
725
726
727 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
728
729 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
730 BTRFS_FSID_SIZE) != 0 &&
731 memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
732 BTRFS_FSID_SIZE) == 0 &&
733 memcmp(fs_devices->fsid, disk_super->fsid,
734 BTRFS_FSID_SIZE) != 0)
735 return fs_devices;
736
737
738 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
739 BTRFS_FSID_SIZE) == 0 &&
740 memcmp(fs_devices->fsid, disk_super->metadata_uuid,
741 BTRFS_FSID_SIZE) == 0)
742 return fs_devices;
743 }
744
745 return NULL;
746}
747
748static struct btrfs_fs_devices *find_fsid_reverted_metadata(
749 struct btrfs_super_block *disk_super)
750{
751 struct btrfs_fs_devices *fs_devices;
752
753
754
755
756
757
758
759
760
761
762 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
763 if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
764 BTRFS_FSID_SIZE) != 0 &&
765 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
766 BTRFS_FSID_SIZE) == 0 &&
767 fs_devices->fsid_change)
768 return fs_devices;
769 }
770
771 return NULL;
772}
773
774
775
776
777
778
779
780static noinline struct btrfs_device *device_list_add(const char *path,
781 struct btrfs_super_block *disk_super,
782 bool *new_device_added)
783{
784 struct btrfs_device *device;
785 struct btrfs_fs_devices *fs_devices = NULL;
786 struct rcu_string *name;
787 u64 found_transid = btrfs_super_generation(disk_super);
788 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
789 bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
790 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
791 bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
792 BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
793
794 if (fsid_change_in_progress) {
795 if (!has_metadata_uuid)
796 fs_devices = find_fsid_inprogress(disk_super);
797 else
798 fs_devices = find_fsid_changed(disk_super);
799 } else if (has_metadata_uuid) {
800 fs_devices = find_fsid_with_metadata_uuid(disk_super);
801 } else {
802 fs_devices = find_fsid_reverted_metadata(disk_super);
803 if (!fs_devices)
804 fs_devices = find_fsid(disk_super->fsid, NULL);
805 }
806
807
808 if (!fs_devices) {
809 if (has_metadata_uuid)
810 fs_devices = alloc_fs_devices(disk_super->fsid,
811 disk_super->metadata_uuid);
812 else
813 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
814
815 if (IS_ERR(fs_devices))
816 return ERR_CAST(fs_devices);
817
818 fs_devices->fsid_change = fsid_change_in_progress;
819
820 mutex_lock(&fs_devices->device_list_mutex);
821 list_add(&fs_devices->fs_list, &fs_uuids);
822
823 device = NULL;
824 } else {
825 mutex_lock(&fs_devices->device_list_mutex);
826 device = btrfs_find_device(fs_devices, devid,
827 disk_super->dev_item.uuid, NULL);
828
829
830
831
832
833
834 if (fs_devices->fsid_change &&
835 found_transid > fs_devices->latest_generation) {
836 memcpy(fs_devices->fsid, disk_super->fsid,
837 BTRFS_FSID_SIZE);
838
839 if (has_metadata_uuid)
840 memcpy(fs_devices->metadata_uuid,
841 disk_super->metadata_uuid,
842 BTRFS_FSID_SIZE);
843 else
844 memcpy(fs_devices->metadata_uuid,
845 disk_super->fsid, BTRFS_FSID_SIZE);
846
847 fs_devices->fsid_change = false;
848 }
849 }
850
851 if (!device) {
852 if (fs_devices->opened) {
853 mutex_unlock(&fs_devices->device_list_mutex);
854 return ERR_PTR(-EBUSY);
855 }
856
857 device = btrfs_alloc_device(NULL, &devid,
858 disk_super->dev_item.uuid);
859 if (IS_ERR(device)) {
860 mutex_unlock(&fs_devices->device_list_mutex);
861
862 return device;
863 }
864
865 name = rcu_string_strdup(path, GFP_NOFS);
866 if (!name) {
867 btrfs_free_device(device);
868 mutex_unlock(&fs_devices->device_list_mutex);
869 return ERR_PTR(-ENOMEM);
870 }
871 rcu_assign_pointer(device->name, name);
872
873 list_add_rcu(&device->dev_list, &fs_devices->devices);
874 fs_devices->num_devices++;
875
876 device->fs_devices = fs_devices;
877 *new_device_added = true;
878
879 if (disk_super->label[0])
880 pr_info(
881 "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
882 disk_super->label, devid, found_transid, path,
883 current->comm, task_pid_nr(current));
884 else
885 pr_info(
886 "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
887 disk_super->fsid, devid, found_transid, path,
888 current->comm, task_pid_nr(current));
889
890 } else if (!device->name || strcmp(device->name->str, path)) {
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917 if (!fs_devices->opened && found_transid < device->generation) {
918
919
920
921
922
923
924
925 mutex_unlock(&fs_devices->device_list_mutex);
926 return ERR_PTR(-EEXIST);
927 }
928
929
930
931
932
933 if (device->bdev) {
934 int error;
935 dev_t path_dev;
936
937 error = lookup_bdev(path, &path_dev);
938 if (error) {
939 mutex_unlock(&fs_devices->device_list_mutex);
940 return ERR_PTR(error);
941 }
942
943 if (device->bdev->bd_dev != path_dev) {
944 mutex_unlock(&fs_devices->device_list_mutex);
945
946
947
948
949
950
951 btrfs_warn_in_rcu(NULL,
952 "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
953 path, devid, found_transid,
954 current->comm,
955 task_pid_nr(current));
956 return ERR_PTR(-EEXIST);
957 }
958 btrfs_info_in_rcu(device->fs_info,
959 "devid %llu device path %s changed to %s scanned by %s (%d)",
960 devid, rcu_str_deref(device->name),
961 path, current->comm,
962 task_pid_nr(current));
963 }
964
965 name = rcu_string_strdup(path, GFP_NOFS);
966 if (!name) {
967 mutex_unlock(&fs_devices->device_list_mutex);
968 return ERR_PTR(-ENOMEM);
969 }
970 rcu_string_free(device->name);
971 rcu_assign_pointer(device->name, name);
972 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
973 fs_devices->missing_devices--;
974 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
975 }
976 }
977
978
979
980
981
982
983
984 if (!fs_devices->opened) {
985 device->generation = found_transid;
986 fs_devices->latest_generation = max_t(u64, found_transid,
987 fs_devices->latest_generation);
988 }
989
990 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
991
992 mutex_unlock(&fs_devices->device_list_mutex);
993 return device;
994}
995
996static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
997{
998 struct btrfs_fs_devices *fs_devices;
999 struct btrfs_device *device;
1000 struct btrfs_device *orig_dev;
1001 int ret = 0;
1002
1003 fs_devices = alloc_fs_devices(orig->fsid, NULL);
1004 if (IS_ERR(fs_devices))
1005 return fs_devices;
1006
1007 mutex_lock(&orig->device_list_mutex);
1008 fs_devices->total_devices = orig->total_devices;
1009
1010 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
1011 struct rcu_string *name;
1012
1013 device = btrfs_alloc_device(NULL, &orig_dev->devid,
1014 orig_dev->uuid);
1015 if (IS_ERR(device)) {
1016 ret = PTR_ERR(device);
1017 goto error;
1018 }
1019
1020
1021
1022
1023
1024 if (orig_dev->name) {
1025 name = rcu_string_strdup(orig_dev->name->str,
1026 GFP_KERNEL);
1027 if (!name) {
1028 btrfs_free_device(device);
1029 ret = -ENOMEM;
1030 goto error;
1031 }
1032 rcu_assign_pointer(device->name, name);
1033 }
1034
1035 list_add(&device->dev_list, &fs_devices->devices);
1036 device->fs_devices = fs_devices;
1037 fs_devices->num_devices++;
1038 }
1039 mutex_unlock(&orig->device_list_mutex);
1040 return fs_devices;
1041error:
1042 mutex_unlock(&orig->device_list_mutex);
1043 free_fs_devices(fs_devices);
1044 return ERR_PTR(ret);
1045}
1046
1047static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
1048 struct btrfs_device **latest_dev)
1049{
1050 struct btrfs_device *device, *next;
1051
1052
1053 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1054 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state)) {
1055 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1056 &device->dev_state) &&
1057 !test_bit(BTRFS_DEV_STATE_MISSING,
1058 &device->dev_state) &&
1059 (!*latest_dev ||
1060 device->generation > (*latest_dev)->generation)) {
1061 *latest_dev = device;
1062 }
1063 continue;
1064 }
1065
1066
1067
1068
1069
1070 if (device->devid == BTRFS_DEV_REPLACE_DEVID)
1071 continue;
1072
1073 if (device->bdev) {
1074 blkdev_put(device->bdev, device->mode);
1075 device->bdev = NULL;
1076 fs_devices->open_devices--;
1077 }
1078 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1079 list_del_init(&device->dev_alloc_list);
1080 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1081 fs_devices->rw_devices--;
1082 }
1083 list_del_init(&device->dev_list);
1084 fs_devices->num_devices--;
1085 btrfs_free_device(device);
1086 }
1087
1088}
1089
1090
1091
1092
1093
1094void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
1095{
1096 struct btrfs_device *latest_dev = NULL;
1097 struct btrfs_fs_devices *seed_dev;
1098
1099 mutex_lock(&uuid_mutex);
1100 __btrfs_free_extra_devids(fs_devices, &latest_dev);
1101
1102 list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
1103 __btrfs_free_extra_devids(seed_dev, &latest_dev);
1104
1105 fs_devices->latest_bdev = latest_dev->bdev;
1106
1107 mutex_unlock(&uuid_mutex);
1108}
1109
1110static void btrfs_close_bdev(struct btrfs_device *device)
1111{
1112 if (!device->bdev)
1113 return;
1114
1115 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1116 sync_blockdev(device->bdev);
1117 invalidate_bdev(device->bdev);
1118 }
1119
1120 blkdev_put(device->bdev, device->mode);
1121}
1122
1123static void btrfs_close_one_device(struct btrfs_device *device)
1124{
1125 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1126
1127 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1128 device->devid != BTRFS_DEV_REPLACE_DEVID) {
1129 list_del_init(&device->dev_alloc_list);
1130 fs_devices->rw_devices--;
1131 }
1132
1133 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1134 fs_devices->missing_devices--;
1135
1136 btrfs_close_bdev(device);
1137 if (device->bdev) {
1138 fs_devices->open_devices--;
1139 device->bdev = NULL;
1140 }
1141 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1142 btrfs_destroy_dev_zone_info(device);
1143
1144 device->fs_info = NULL;
1145 atomic_set(&device->dev_stats_ccnt, 0);
1146 extent_io_tree_release(&device->alloc_state);
1147
1148
1149 ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
1150 ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1151 ASSERT(list_empty(&device->dev_alloc_list));
1152 ASSERT(list_empty(&device->post_commit_list));
1153 ASSERT(atomic_read(&device->reada_in_flight) == 0);
1154}
1155
1156static void close_fs_devices(struct btrfs_fs_devices *fs_devices)
1157{
1158 struct btrfs_device *device, *tmp;
1159
1160 lockdep_assert_held(&uuid_mutex);
1161
1162 if (--fs_devices->opened > 0)
1163 return;
1164
1165 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list)
1166 btrfs_close_one_device(device);
1167
1168 WARN_ON(fs_devices->open_devices);
1169 WARN_ON(fs_devices->rw_devices);
1170 fs_devices->opened = 0;
1171 fs_devices->seeding = false;
1172 fs_devices->fs_info = NULL;
1173}
1174
1175void btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1176{
1177 LIST_HEAD(list);
1178 struct btrfs_fs_devices *tmp;
1179
1180 mutex_lock(&uuid_mutex);
1181 close_fs_devices(fs_devices);
1182 if (!fs_devices->opened)
1183 list_splice_init(&fs_devices->seed_list, &list);
1184
1185 list_for_each_entry_safe(fs_devices, tmp, &list, seed_list) {
1186 close_fs_devices(fs_devices);
1187 list_del(&fs_devices->seed_list);
1188 free_fs_devices(fs_devices);
1189 }
1190 mutex_unlock(&uuid_mutex);
1191}
1192
1193static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
1194 fmode_t flags, void *holder)
1195{
1196 struct btrfs_device *device;
1197 struct btrfs_device *latest_dev = NULL;
1198 struct btrfs_device *tmp_device;
1199
1200 flags |= FMODE_EXCL;
1201
1202 list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
1203 dev_list) {
1204 int ret;
1205
1206 ret = btrfs_open_one_device(fs_devices, device, flags, holder);
1207 if (ret == 0 &&
1208 (!latest_dev || device->generation > latest_dev->generation)) {
1209 latest_dev = device;
1210 } else if (ret == -ENODATA) {
1211 fs_devices->num_devices--;
1212 list_del(&device->dev_list);
1213 btrfs_free_device(device);
1214 }
1215 }
1216 if (fs_devices->open_devices == 0)
1217 return -EINVAL;
1218
1219 fs_devices->opened = 1;
1220 fs_devices->latest_bdev = latest_dev->bdev;
1221 fs_devices->total_rw_bytes = 0;
1222 fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
1223 fs_devices->read_policy = BTRFS_READ_POLICY_PID;
1224
1225 return 0;
1226}
1227
1228static int devid_cmp(void *priv, const struct list_head *a,
1229 const struct list_head *b)
1230{
1231 struct btrfs_device *dev1, *dev2;
1232
1233 dev1 = list_entry(a, struct btrfs_device, dev_list);
1234 dev2 = list_entry(b, struct btrfs_device, dev_list);
1235
1236 if (dev1->devid < dev2->devid)
1237 return -1;
1238 else if (dev1->devid > dev2->devid)
1239 return 1;
1240 return 0;
1241}
1242
1243int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1244 fmode_t flags, void *holder)
1245{
1246 int ret;
1247
1248 lockdep_assert_held(&uuid_mutex);
1249
1250
1251
1252
1253
1254
1255
1256
1257 if (fs_devices->opened) {
1258 fs_devices->opened++;
1259 ret = 0;
1260 } else {
1261 list_sort(NULL, &fs_devices->devices, devid_cmp);
1262 ret = open_fs_devices(fs_devices, flags, holder);
1263 }
1264
1265 return ret;
1266}
1267
1268void btrfs_release_disk_super(struct btrfs_super_block *super)
1269{
1270 struct page *page = virt_to_page(super);
1271
1272 put_page(page);
1273}
1274
1275static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
1276 u64 bytenr, u64 bytenr_orig)
1277{
1278 struct btrfs_super_block *disk_super;
1279 struct page *page;
1280 void *p;
1281 pgoff_t index;
1282
1283
1284 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1285 return ERR_PTR(-EINVAL);
1286
1287
1288 if (sizeof(*disk_super) > PAGE_SIZE)
1289 return ERR_PTR(-EINVAL);
1290
1291
1292 index = bytenr >> PAGE_SHIFT;
1293 if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
1294 return ERR_PTR(-EINVAL);
1295
1296
1297 page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
1298
1299 if (IS_ERR(page))
1300 return ERR_CAST(page);
1301
1302 p = page_address(page);
1303
1304
1305 disk_super = p + offset_in_page(bytenr);
1306
1307 if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
1308 btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
1309 btrfs_release_disk_super(p);
1310 return ERR_PTR(-EINVAL);
1311 }
1312
1313 if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
1314 disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
1315
1316 return disk_super;
1317}
1318
1319int btrfs_forget_devices(const char *path)
1320{
1321 int ret;
1322
1323 mutex_lock(&uuid_mutex);
1324 ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
1325 mutex_unlock(&uuid_mutex);
1326
1327 return ret;
1328}
1329
1330
1331
1332
1333
1334
1335struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1336 void *holder)
1337{
1338 struct btrfs_super_block *disk_super;
1339 bool new_device_added = false;
1340 struct btrfs_device *device = NULL;
1341 struct block_device *bdev;
1342 u64 bytenr, bytenr_orig;
1343 int ret;
1344
1345 lockdep_assert_held(&uuid_mutex);
1346
1347
1348
1349
1350
1351
1352
1353 flags |= FMODE_EXCL;
1354
1355 bdev = blkdev_get_by_path(path, flags, holder);
1356 if (IS_ERR(bdev))
1357 return ERR_CAST(bdev);
1358
1359 bytenr_orig = btrfs_sb_offset(0);
1360 ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
1361 if (ret)
1362 return ERR_PTR(ret);
1363
1364 disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
1365 if (IS_ERR(disk_super)) {
1366 device = ERR_CAST(disk_super);
1367 goto error_bdev_put;
1368 }
1369
1370 device = device_list_add(path, disk_super, &new_device_added);
1371 if (!IS_ERR(device)) {
1372 if (new_device_added)
1373 btrfs_free_stale_devices(path, device);
1374 }
1375
1376 btrfs_release_disk_super(disk_super);
1377
1378error_bdev_put:
1379 blkdev_put(bdev, flags);
1380
1381 return device;
1382}
1383
1384
1385
1386
1387
1388static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1389 u64 len)
1390{
1391 u64 physical_start, physical_end;
1392
1393 lockdep_assert_held(&device->fs_info->chunk_mutex);
1394
1395 if (!find_first_extent_bit(&device->alloc_state, *start,
1396 &physical_start, &physical_end,
1397 CHUNK_ALLOCATED, NULL)) {
1398
1399 if (in_range(physical_start, *start, len) ||
1400 in_range(*start, physical_start,
1401 physical_end - physical_start)) {
1402 *start = physical_end + 1;
1403 return true;
1404 }
1405 }
1406 return false;
1407}
1408
1409static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
1410{
1411 switch (device->fs_devices->chunk_alloc_policy) {
1412 case BTRFS_CHUNK_ALLOC_REGULAR:
1413
1414
1415
1416
1417
1418 return max_t(u64, start, SZ_1M);
1419 case BTRFS_CHUNK_ALLOC_ZONED:
1420
1421
1422
1423
1424
1425 return ALIGN(start, device->zone_info->zone_size);
1426 default:
1427 BUG();
1428 }
1429}
1430
1431static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
1432 u64 *hole_start, u64 *hole_size,
1433 u64 num_bytes)
1434{
1435 u64 zone_size = device->zone_info->zone_size;
1436 u64 pos;
1437 int ret;
1438 bool changed = false;
1439
1440 ASSERT(IS_ALIGNED(*hole_start, zone_size));
1441
1442 while (*hole_size > 0) {
1443 pos = btrfs_find_allocatable_zones(device, *hole_start,
1444 *hole_start + *hole_size,
1445 num_bytes);
1446 if (pos != *hole_start) {
1447 *hole_size = *hole_start + *hole_size - pos;
1448 *hole_start = pos;
1449 changed = true;
1450 if (*hole_size < num_bytes)
1451 break;
1452 }
1453
1454 ret = btrfs_ensure_empty_zones(device, pos, num_bytes);
1455
1456
1457 if (!ret)
1458 return changed;
1459
1460
1461 if (ret == -ERANGE) {
1462 *hole_start += *hole_size;
1463 *hole_size = 0;
1464 return true;
1465 }
1466
1467 *hole_start += zone_size;
1468 *hole_size -= zone_size;
1469 changed = true;
1470 }
1471
1472 return changed;
1473}
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
1486 u64 *hole_size, u64 num_bytes)
1487{
1488 bool changed = false;
1489 u64 hole_end = *hole_start + *hole_size;
1490
1491 for (;;) {
1492
1493
1494
1495
1496 if (contains_pending_extent(device, hole_start, *hole_size)) {
1497 if (hole_end >= *hole_start)
1498 *hole_size = hole_end - *hole_start;
1499 else
1500 *hole_size = 0;
1501 changed = true;
1502 }
1503
1504 switch (device->fs_devices->chunk_alloc_policy) {
1505 case BTRFS_CHUNK_ALLOC_REGULAR:
1506
1507 break;
1508 case BTRFS_CHUNK_ALLOC_ZONED:
1509 if (dev_extent_hole_check_zoned(device, hole_start,
1510 hole_size, num_bytes)) {
1511 changed = true;
1512
1513
1514
1515
1516 continue;
1517 }
1518 break;
1519 default:
1520 BUG();
1521 }
1522
1523 break;
1524 }
1525
1526 return changed;
1527}
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556static int find_free_dev_extent_start(struct btrfs_device *device,
1557 u64 num_bytes, u64 search_start, u64 *start,
1558 u64 *len)
1559{
1560 struct btrfs_fs_info *fs_info = device->fs_info;
1561 struct btrfs_root *root = fs_info->dev_root;
1562 struct btrfs_key key;
1563 struct btrfs_dev_extent *dev_extent;
1564 struct btrfs_path *path;
1565 u64 hole_size;
1566 u64 max_hole_start;
1567 u64 max_hole_size;
1568 u64 extent_end;
1569 u64 search_end = device->total_bytes;
1570 int ret;
1571 int slot;
1572 struct extent_buffer *l;
1573
1574 search_start = dev_extent_search_start(device, search_start);
1575
1576 WARN_ON(device->zone_info &&
1577 !IS_ALIGNED(num_bytes, device->zone_info->zone_size));
1578
1579 path = btrfs_alloc_path();
1580 if (!path)
1581 return -ENOMEM;
1582
1583 max_hole_start = search_start;
1584 max_hole_size = 0;
1585
1586again:
1587 if (search_start >= search_end ||
1588 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1589 ret = -ENOSPC;
1590 goto out;
1591 }
1592
1593 path->reada = READA_FORWARD;
1594 path->search_commit_root = 1;
1595 path->skip_locking = 1;
1596
1597 key.objectid = device->devid;
1598 key.offset = search_start;
1599 key.type = BTRFS_DEV_EXTENT_KEY;
1600
1601 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1602 if (ret < 0)
1603 goto out;
1604 if (ret > 0) {
1605 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1606 if (ret < 0)
1607 goto out;
1608 }
1609
1610 while (1) {
1611 l = path->nodes[0];
1612 slot = path->slots[0];
1613 if (slot >= btrfs_header_nritems(l)) {
1614 ret = btrfs_next_leaf(root, path);
1615 if (ret == 0)
1616 continue;
1617 if (ret < 0)
1618 goto out;
1619
1620 break;
1621 }
1622 btrfs_item_key_to_cpu(l, &key, slot);
1623
1624 if (key.objectid < device->devid)
1625 goto next;
1626
1627 if (key.objectid > device->devid)
1628 break;
1629
1630 if (key.type != BTRFS_DEV_EXTENT_KEY)
1631 goto next;
1632
1633 if (key.offset > search_start) {
1634 hole_size = key.offset - search_start;
1635 dev_extent_hole_check(device, &search_start, &hole_size,
1636 num_bytes);
1637
1638 if (hole_size > max_hole_size) {
1639 max_hole_start = search_start;
1640 max_hole_size = hole_size;
1641 }
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652 if (hole_size >= num_bytes) {
1653 ret = 0;
1654 goto out;
1655 }
1656 }
1657
1658 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1659 extent_end = key.offset + btrfs_dev_extent_length(l,
1660 dev_extent);
1661 if (extent_end > search_start)
1662 search_start = extent_end;
1663next:
1664 path->slots[0]++;
1665 cond_resched();
1666 }
1667
1668
1669
1670
1671
1672
1673 if (search_end > search_start) {
1674 hole_size = search_end - search_start;
1675 if (dev_extent_hole_check(device, &search_start, &hole_size,
1676 num_bytes)) {
1677 btrfs_release_path(path);
1678 goto again;
1679 }
1680
1681 if (hole_size > max_hole_size) {
1682 max_hole_start = search_start;
1683 max_hole_size = hole_size;
1684 }
1685 }
1686
1687
1688 if (max_hole_size < num_bytes)
1689 ret = -ENOSPC;
1690 else
1691 ret = 0;
1692
1693out:
1694 btrfs_free_path(path);
1695 *start = max_hole_start;
1696 if (len)
1697 *len = max_hole_size;
1698 return ret;
1699}
1700
1701int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1702 u64 *start, u64 *len)
1703{
1704
1705 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1706}
1707
1708static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1709 struct btrfs_device *device,
1710 u64 start, u64 *dev_extent_len)
1711{
1712 struct btrfs_fs_info *fs_info = device->fs_info;
1713 struct btrfs_root *root = fs_info->dev_root;
1714 int ret;
1715 struct btrfs_path *path;
1716 struct btrfs_key key;
1717 struct btrfs_key found_key;
1718 struct extent_buffer *leaf = NULL;
1719 struct btrfs_dev_extent *extent = NULL;
1720
1721 path = btrfs_alloc_path();
1722 if (!path)
1723 return -ENOMEM;
1724
1725 key.objectid = device->devid;
1726 key.offset = start;
1727 key.type = BTRFS_DEV_EXTENT_KEY;
1728again:
1729 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1730 if (ret > 0) {
1731 ret = btrfs_previous_item(root, path, key.objectid,
1732 BTRFS_DEV_EXTENT_KEY);
1733 if (ret)
1734 goto out;
1735 leaf = path->nodes[0];
1736 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1737 extent = btrfs_item_ptr(leaf, path->slots[0],
1738 struct btrfs_dev_extent);
1739 BUG_ON(found_key.offset > start || found_key.offset +
1740 btrfs_dev_extent_length(leaf, extent) < start);
1741 key = found_key;
1742 btrfs_release_path(path);
1743 goto again;
1744 } else if (ret == 0) {
1745 leaf = path->nodes[0];
1746 extent = btrfs_item_ptr(leaf, path->slots[0],
1747 struct btrfs_dev_extent);
1748 } else {
1749 goto out;
1750 }
1751
1752 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1753
1754 ret = btrfs_del_item(trans, root, path);
1755 if (ret == 0)
1756 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1757out:
1758 btrfs_free_path(path);
1759 return ret;
1760}
1761
1762static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1763 struct btrfs_device *device,
1764 u64 chunk_offset, u64 start, u64 num_bytes)
1765{
1766 int ret;
1767 struct btrfs_path *path;
1768 struct btrfs_fs_info *fs_info = device->fs_info;
1769 struct btrfs_root *root = fs_info->dev_root;
1770 struct btrfs_dev_extent *extent;
1771 struct extent_buffer *leaf;
1772 struct btrfs_key key;
1773
1774 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1775 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1776 path = btrfs_alloc_path();
1777 if (!path)
1778 return -ENOMEM;
1779
1780 key.objectid = device->devid;
1781 key.offset = start;
1782 key.type = BTRFS_DEV_EXTENT_KEY;
1783 ret = btrfs_insert_empty_item(trans, root, path, &key,
1784 sizeof(*extent));
1785 if (ret)
1786 goto out;
1787
1788 leaf = path->nodes[0];
1789 extent = btrfs_item_ptr(leaf, path->slots[0],
1790 struct btrfs_dev_extent);
1791 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1792 BTRFS_CHUNK_TREE_OBJECTID);
1793 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1794 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1795 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1796
1797 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1798 btrfs_mark_buffer_dirty(leaf);
1799out:
1800 btrfs_free_path(path);
1801 return ret;
1802}
1803
1804static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1805{
1806 struct extent_map_tree *em_tree;
1807 struct extent_map *em;
1808 struct rb_node *n;
1809 u64 ret = 0;
1810
1811 em_tree = &fs_info->mapping_tree;
1812 read_lock(&em_tree->lock);
1813 n = rb_last(&em_tree->map.rb_root);
1814 if (n) {
1815 em = rb_entry(n, struct extent_map, rb_node);
1816 ret = em->start + em->len;
1817 }
1818 read_unlock(&em_tree->lock);
1819
1820 return ret;
1821}
1822
1823static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1824 u64 *devid_ret)
1825{
1826 int ret;
1827 struct btrfs_key key;
1828 struct btrfs_key found_key;
1829 struct btrfs_path *path;
1830
1831 path = btrfs_alloc_path();
1832 if (!path)
1833 return -ENOMEM;
1834
1835 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1836 key.type = BTRFS_DEV_ITEM_KEY;
1837 key.offset = (u64)-1;
1838
1839 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1840 if (ret < 0)
1841 goto error;
1842
1843 if (ret == 0) {
1844
1845 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
1846 ret = -EUCLEAN;
1847 goto error;
1848 }
1849
1850 ret = btrfs_previous_item(fs_info->chunk_root, path,
1851 BTRFS_DEV_ITEMS_OBJECTID,
1852 BTRFS_DEV_ITEM_KEY);
1853 if (ret) {
1854 *devid_ret = 1;
1855 } else {
1856 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1857 path->slots[0]);
1858 *devid_ret = found_key.offset + 1;
1859 }
1860 ret = 0;
1861error:
1862 btrfs_free_path(path);
1863 return ret;
1864}
1865
1866
1867
1868
1869
1870static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1871 struct btrfs_device *device)
1872{
1873 int ret;
1874 struct btrfs_path *path;
1875 struct btrfs_dev_item *dev_item;
1876 struct extent_buffer *leaf;
1877 struct btrfs_key key;
1878 unsigned long ptr;
1879
1880 path = btrfs_alloc_path();
1881 if (!path)
1882 return -ENOMEM;
1883
1884 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1885 key.type = BTRFS_DEV_ITEM_KEY;
1886 key.offset = device->devid;
1887
1888 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1889 &key, sizeof(*dev_item));
1890 if (ret)
1891 goto out;
1892
1893 leaf = path->nodes[0];
1894 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1895
1896 btrfs_set_device_id(leaf, dev_item, device->devid);
1897 btrfs_set_device_generation(leaf, dev_item, 0);
1898 btrfs_set_device_type(leaf, dev_item, device->type);
1899 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1900 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1901 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1902 btrfs_set_device_total_bytes(leaf, dev_item,
1903 btrfs_device_get_disk_total_bytes(device));
1904 btrfs_set_device_bytes_used(leaf, dev_item,
1905 btrfs_device_get_bytes_used(device));
1906 btrfs_set_device_group(leaf, dev_item, 0);
1907 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1908 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1909 btrfs_set_device_start_offset(leaf, dev_item, 0);
1910
1911 ptr = btrfs_device_uuid(dev_item);
1912 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1913 ptr = btrfs_device_fsid(dev_item);
1914 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
1915 ptr, BTRFS_FSID_SIZE);
1916 btrfs_mark_buffer_dirty(leaf);
1917
1918 ret = 0;
1919out:
1920 btrfs_free_path(path);
1921 return ret;
1922}
1923
1924
1925
1926
1927
1928static void update_dev_time(const char *path_name)
1929{
1930 struct file *filp;
1931
1932 filp = filp_open(path_name, O_RDWR, 0);
1933 if (IS_ERR(filp))
1934 return;
1935 file_update_time(filp);
1936 filp_close(filp, NULL);
1937}
1938
1939static int btrfs_rm_dev_item(struct btrfs_device *device)
1940{
1941 struct btrfs_root *root = device->fs_info->chunk_root;
1942 int ret;
1943 struct btrfs_path *path;
1944 struct btrfs_key key;
1945 struct btrfs_trans_handle *trans;
1946
1947 path = btrfs_alloc_path();
1948 if (!path)
1949 return -ENOMEM;
1950
1951 trans = btrfs_start_transaction(root, 0);
1952 if (IS_ERR(trans)) {
1953 btrfs_free_path(path);
1954 return PTR_ERR(trans);
1955 }
1956 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1957 key.type = BTRFS_DEV_ITEM_KEY;
1958 key.offset = device->devid;
1959
1960 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1961 if (ret) {
1962 if (ret > 0)
1963 ret = -ENOENT;
1964 btrfs_abort_transaction(trans, ret);
1965 btrfs_end_transaction(trans);
1966 goto out;
1967 }
1968
1969 ret = btrfs_del_item(trans, root, path);
1970 if (ret) {
1971 btrfs_abort_transaction(trans, ret);
1972 btrfs_end_transaction(trans);
1973 }
1974
1975out:
1976 btrfs_free_path(path);
1977 if (!ret)
1978 ret = btrfs_commit_transaction(trans);
1979 return ret;
1980}
1981
1982
1983
1984
1985
1986
1987static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
1988 u64 num_devices)
1989{
1990 u64 all_avail;
1991 unsigned seq;
1992 int i;
1993
1994 do {
1995 seq = read_seqbegin(&fs_info->profiles_lock);
1996
1997 all_avail = fs_info->avail_data_alloc_bits |
1998 fs_info->avail_system_alloc_bits |
1999 fs_info->avail_metadata_alloc_bits;
2000 } while (read_seqretry(&fs_info->profiles_lock, seq));
2001
2002 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2003 if (!(all_avail & btrfs_raid_array[i].bg_flag))
2004 continue;
2005
2006 if (num_devices < btrfs_raid_array[i].devs_min) {
2007 int ret = btrfs_raid_array[i].mindev_error;
2008
2009 if (ret)
2010 return ret;
2011 }
2012 }
2013
2014 return 0;
2015}
2016
2017static struct btrfs_device * btrfs_find_next_active_device(
2018 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
2019{
2020 struct btrfs_device *next_device;
2021
2022 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
2023 if (next_device != device &&
2024 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
2025 && next_device->bdev)
2026 return next_device;
2027 }
2028
2029 return NULL;
2030}
2031
2032
2033
2034
2035
2036
2037
2038void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
2039 struct btrfs_device *next_device)
2040{
2041 struct btrfs_fs_info *fs_info = device->fs_info;
2042
2043 if (!next_device)
2044 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
2045 device);
2046 ASSERT(next_device);
2047
2048 if (fs_info->sb->s_bdev &&
2049 (fs_info->sb->s_bdev == device->bdev))
2050 fs_info->sb->s_bdev = next_device->bdev;
2051
2052 if (fs_info->fs_devices->latest_bdev == device->bdev)
2053 fs_info->fs_devices->latest_bdev = next_device->bdev;
2054}
2055
2056
2057
2058
2059
2060static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
2061{
2062 u64 num_devices = fs_info->fs_devices->num_devices;
2063
2064 down_read(&fs_info->dev_replace.rwsem);
2065 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
2066 ASSERT(num_devices > 1);
2067 num_devices--;
2068 }
2069 up_read(&fs_info->dev_replace.rwsem);
2070
2071 return num_devices;
2072}
2073
2074void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
2075 struct block_device *bdev,
2076 const char *device_path)
2077{
2078 struct btrfs_super_block *disk_super;
2079 int copy_num;
2080
2081 if (!bdev)
2082 return;
2083
2084 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
2085 struct page *page;
2086 int ret;
2087
2088 disk_super = btrfs_read_dev_one_super(bdev, copy_num);
2089 if (IS_ERR(disk_super))
2090 continue;
2091
2092 if (bdev_is_zoned(bdev)) {
2093 btrfs_reset_sb_log_zones(bdev, copy_num);
2094 continue;
2095 }
2096
2097 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
2098
2099 page = virt_to_page(disk_super);
2100 set_page_dirty(page);
2101 lock_page(page);
2102
2103 ret = write_one_page(page);
2104 if (ret)
2105 btrfs_warn(fs_info,
2106 "error clearing superblock number %d (%d)",
2107 copy_num, ret);
2108 btrfs_release_disk_super(disk_super);
2109
2110 }
2111
2112
2113 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
2114
2115
2116 update_dev_time(device_path);
2117}
2118
2119int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2120 u64 devid)
2121{
2122 struct btrfs_device *device;
2123 struct btrfs_fs_devices *cur_devices;
2124 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2125 u64 num_devices;
2126 int ret = 0;
2127
2128 mutex_lock(&uuid_mutex);
2129
2130 num_devices = btrfs_num_devices(fs_info);
2131
2132 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
2133 if (ret)
2134 goto out;
2135
2136 device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
2137
2138 if (IS_ERR(device)) {
2139 if (PTR_ERR(device) == -ENOENT &&
2140 strcmp(device_path, "missing") == 0)
2141 ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2142 else
2143 ret = PTR_ERR(device);
2144 goto out;
2145 }
2146
2147 if (btrfs_pinned_by_swapfile(fs_info, device)) {
2148 btrfs_warn_in_rcu(fs_info,
2149 "cannot remove device %s (devid %llu) due to active swapfile",
2150 rcu_str_deref(device->name), device->devid);
2151 ret = -ETXTBSY;
2152 goto out;
2153 }
2154
2155 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2156 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
2157 goto out;
2158 }
2159
2160 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
2161 fs_info->fs_devices->rw_devices == 1) {
2162 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
2163 goto out;
2164 }
2165
2166 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2167 mutex_lock(&fs_info->chunk_mutex);
2168 list_del_init(&device->dev_alloc_list);
2169 device->fs_devices->rw_devices--;
2170 mutex_unlock(&fs_info->chunk_mutex);
2171 }
2172
2173 mutex_unlock(&uuid_mutex);
2174 ret = btrfs_shrink_device(device, 0);
2175 if (!ret)
2176 btrfs_reada_remove_dev(device);
2177 mutex_lock(&uuid_mutex);
2178 if (ret)
2179 goto error_undo;
2180
2181
2182
2183
2184
2185
2186 ret = btrfs_rm_dev_item(device);
2187 if (ret)
2188 goto error_undo;
2189
2190 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2191 btrfs_scrub_cancel_dev(device);
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208 cur_devices = device->fs_devices;
2209 mutex_lock(&fs_devices->device_list_mutex);
2210 list_del_rcu(&device->dev_list);
2211
2212 cur_devices->num_devices--;
2213 cur_devices->total_devices--;
2214
2215 if (cur_devices != fs_devices)
2216 fs_devices->total_devices--;
2217
2218 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2219 cur_devices->missing_devices--;
2220
2221 btrfs_assign_next_active_device(device, NULL);
2222
2223 if (device->bdev) {
2224 cur_devices->open_devices--;
2225
2226 btrfs_sysfs_remove_device(device);
2227 }
2228
2229 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2230 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2231 mutex_unlock(&fs_devices->device_list_mutex);
2232
2233
2234
2235
2236
2237
2238 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2239 btrfs_scratch_superblocks(fs_info, device->bdev,
2240 device->name->str);
2241
2242 btrfs_close_bdev(device);
2243 synchronize_rcu();
2244 btrfs_free_device(device);
2245
2246 if (cur_devices->open_devices == 0) {
2247 list_del_init(&cur_devices->seed_list);
2248 close_fs_devices(cur_devices);
2249 free_fs_devices(cur_devices);
2250 }
2251
2252out:
2253 mutex_unlock(&uuid_mutex);
2254 return ret;
2255
2256error_undo:
2257 btrfs_reada_undo_remove_dev(device);
2258 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2259 mutex_lock(&fs_info->chunk_mutex);
2260 list_add(&device->dev_alloc_list,
2261 &fs_devices->alloc_list);
2262 device->fs_devices->rw_devices++;
2263 mutex_unlock(&fs_info->chunk_mutex);
2264 }
2265 goto out;
2266}
2267
2268void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2269{
2270 struct btrfs_fs_devices *fs_devices;
2271
2272 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2273
2274
2275
2276
2277
2278
2279
2280 fs_devices = srcdev->fs_devices;
2281
2282 list_del_rcu(&srcdev->dev_list);
2283 list_del(&srcdev->dev_alloc_list);
2284 fs_devices->num_devices--;
2285 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2286 fs_devices->missing_devices--;
2287
2288 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2289 fs_devices->rw_devices--;
2290
2291 if (srcdev->bdev)
2292 fs_devices->open_devices--;
2293}
2294
2295void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2296{
2297 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2298
2299 mutex_lock(&uuid_mutex);
2300
2301 btrfs_close_bdev(srcdev);
2302 synchronize_rcu();
2303 btrfs_free_device(srcdev);
2304
2305
2306 if (!fs_devices->num_devices) {
2307
2308
2309
2310
2311
2312
2313 ASSERT(fs_devices->seeding);
2314
2315 list_del_init(&fs_devices->seed_list);
2316 close_fs_devices(fs_devices);
2317 free_fs_devices(fs_devices);
2318 }
2319 mutex_unlock(&uuid_mutex);
2320}
2321
2322void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2323{
2324 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2325
2326 mutex_lock(&fs_devices->device_list_mutex);
2327
2328 btrfs_sysfs_remove_device(tgtdev);
2329
2330 if (tgtdev->bdev)
2331 fs_devices->open_devices--;
2332
2333 fs_devices->num_devices--;
2334
2335 btrfs_assign_next_active_device(tgtdev, NULL);
2336
2337 list_del_rcu(&tgtdev->dev_list);
2338
2339 mutex_unlock(&fs_devices->device_list_mutex);
2340
2341
2342
2343
2344
2345
2346
2347
2348 btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
2349 tgtdev->name->str);
2350
2351 btrfs_close_bdev(tgtdev);
2352 synchronize_rcu();
2353 btrfs_free_device(tgtdev);
2354}
2355
2356static struct btrfs_device *btrfs_find_device_by_path(
2357 struct btrfs_fs_info *fs_info, const char *device_path)
2358{
2359 int ret = 0;
2360 struct btrfs_super_block *disk_super;
2361 u64 devid;
2362 u8 *dev_uuid;
2363 struct block_device *bdev;
2364 struct btrfs_device *device;
2365
2366 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2367 fs_info->bdev_holder, 0, &bdev, &disk_super);
2368 if (ret)
2369 return ERR_PTR(ret);
2370
2371 devid = btrfs_stack_device_id(&disk_super->dev_item);
2372 dev_uuid = disk_super->dev_item.uuid;
2373 if (btrfs_fs_incompat(fs_info, METADATA_UUID))
2374 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2375 disk_super->metadata_uuid);
2376 else
2377 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2378 disk_super->fsid);
2379
2380 btrfs_release_disk_super(disk_super);
2381 if (!device)
2382 device = ERR_PTR(-ENOENT);
2383 blkdev_put(bdev, FMODE_READ);
2384 return device;
2385}
2386
2387
2388
2389
2390struct btrfs_device *btrfs_find_device_by_devspec(
2391 struct btrfs_fs_info *fs_info, u64 devid,
2392 const char *device_path)
2393{
2394 struct btrfs_device *device;
2395
2396 if (devid) {
2397 device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
2398 NULL);
2399 if (!device)
2400 return ERR_PTR(-ENOENT);
2401 return device;
2402 }
2403
2404 if (!device_path || !device_path[0])
2405 return ERR_PTR(-EINVAL);
2406
2407 if (strcmp(device_path, "missing") == 0) {
2408
2409 list_for_each_entry(device, &fs_info->fs_devices->devices,
2410 dev_list) {
2411 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2412 &device->dev_state) && !device->bdev)
2413 return device;
2414 }
2415 return ERR_PTR(-ENOENT);
2416 }
2417
2418 return btrfs_find_device_by_path(fs_info, device_path);
2419}
2420
2421
2422
2423
2424static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2425{
2426 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2427 struct btrfs_fs_devices *old_devices;
2428 struct btrfs_fs_devices *seed_devices;
2429 struct btrfs_super_block *disk_super = fs_info->super_copy;
2430 struct btrfs_device *device;
2431 u64 super_flags;
2432
2433 lockdep_assert_held(&uuid_mutex);
2434 if (!fs_devices->seeding)
2435 return -EINVAL;
2436
2437
2438
2439
2440
2441 seed_devices = alloc_fs_devices(NULL, NULL);
2442 if (IS_ERR(seed_devices))
2443 return PTR_ERR(seed_devices);
2444
2445
2446
2447
2448
2449
2450
2451 old_devices = clone_fs_devices(fs_devices);
2452 if (IS_ERR(old_devices)) {
2453 kfree(seed_devices);
2454 return PTR_ERR(old_devices);
2455 }
2456
2457 list_add(&old_devices->fs_list, &fs_uuids);
2458
2459 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2460 seed_devices->opened = 1;
2461 INIT_LIST_HEAD(&seed_devices->devices);
2462 INIT_LIST_HEAD(&seed_devices->alloc_list);
2463 mutex_init(&seed_devices->device_list_mutex);
2464
2465 mutex_lock(&fs_devices->device_list_mutex);
2466 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2467 synchronize_rcu);
2468 list_for_each_entry(device, &seed_devices->devices, dev_list)
2469 device->fs_devices = seed_devices;
2470
2471 fs_devices->seeding = false;
2472 fs_devices->num_devices = 0;
2473 fs_devices->open_devices = 0;
2474 fs_devices->missing_devices = 0;
2475 fs_devices->rotating = false;
2476 list_add(&seed_devices->seed_list, &fs_devices->seed_list);
2477
2478 generate_random_uuid(fs_devices->fsid);
2479 memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
2480 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2481 mutex_unlock(&fs_devices->device_list_mutex);
2482
2483 super_flags = btrfs_super_flags(disk_super) &
2484 ~BTRFS_SUPER_FLAG_SEEDING;
2485 btrfs_set_super_flags(disk_super, super_flags);
2486
2487 return 0;
2488}
2489
2490
2491
2492
2493static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2494{
2495 struct btrfs_fs_info *fs_info = trans->fs_info;
2496 struct btrfs_root *root = fs_info->chunk_root;
2497 struct btrfs_path *path;
2498 struct extent_buffer *leaf;
2499 struct btrfs_dev_item *dev_item;
2500 struct btrfs_device *device;
2501 struct btrfs_key key;
2502 u8 fs_uuid[BTRFS_FSID_SIZE];
2503 u8 dev_uuid[BTRFS_UUID_SIZE];
2504 u64 devid;
2505 int ret;
2506
2507 path = btrfs_alloc_path();
2508 if (!path)
2509 return -ENOMEM;
2510
2511 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2512 key.offset = 0;
2513 key.type = BTRFS_DEV_ITEM_KEY;
2514
2515 while (1) {
2516 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2517 if (ret < 0)
2518 goto error;
2519
2520 leaf = path->nodes[0];
2521next_slot:
2522 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2523 ret = btrfs_next_leaf(root, path);
2524 if (ret > 0)
2525 break;
2526 if (ret < 0)
2527 goto error;
2528 leaf = path->nodes[0];
2529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2530 btrfs_release_path(path);
2531 continue;
2532 }
2533
2534 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2535 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2536 key.type != BTRFS_DEV_ITEM_KEY)
2537 break;
2538
2539 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2540 struct btrfs_dev_item);
2541 devid = btrfs_device_id(leaf, dev_item);
2542 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2543 BTRFS_UUID_SIZE);
2544 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2545 BTRFS_FSID_SIZE);
2546 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2547 fs_uuid);
2548 BUG_ON(!device);
2549
2550 if (device->fs_devices->seeding) {
2551 btrfs_set_device_generation(leaf, dev_item,
2552 device->generation);
2553 btrfs_mark_buffer_dirty(leaf);
2554 }
2555
2556 path->slots[0]++;
2557 goto next_slot;
2558 }
2559 ret = 0;
2560error:
2561 btrfs_free_path(path);
2562 return ret;
2563}
2564
2565int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2566{
2567 struct btrfs_root *root = fs_info->dev_root;
2568 struct request_queue *q;
2569 struct btrfs_trans_handle *trans;
2570 struct btrfs_device *device;
2571 struct block_device *bdev;
2572 struct super_block *sb = fs_info->sb;
2573 struct rcu_string *name;
2574 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2575 u64 orig_super_total_bytes;
2576 u64 orig_super_num_devices;
2577 int seeding_dev = 0;
2578 int ret = 0;
2579 bool locked = false;
2580
2581 if (sb_rdonly(sb) && !fs_devices->seeding)
2582 return -EROFS;
2583
2584 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2585 fs_info->bdev_holder);
2586 if (IS_ERR(bdev))
2587 return PTR_ERR(bdev);
2588
2589 if (!btrfs_check_device_zone_type(fs_info, bdev)) {
2590 ret = -EINVAL;
2591 goto error;
2592 }
2593
2594 if (fs_devices->seeding) {
2595 seeding_dev = 1;
2596 down_write(&sb->s_umount);
2597 mutex_lock(&uuid_mutex);
2598 locked = true;
2599 }
2600
2601 sync_blockdev(bdev);
2602
2603 rcu_read_lock();
2604 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
2605 if (device->bdev == bdev) {
2606 ret = -EEXIST;
2607 rcu_read_unlock();
2608 goto error;
2609 }
2610 }
2611 rcu_read_unlock();
2612
2613 device = btrfs_alloc_device(fs_info, NULL, NULL);
2614 if (IS_ERR(device)) {
2615
2616 ret = PTR_ERR(device);
2617 goto error;
2618 }
2619
2620 name = rcu_string_strdup(device_path, GFP_KERNEL);
2621 if (!name) {
2622 ret = -ENOMEM;
2623 goto error_free_device;
2624 }
2625 rcu_assign_pointer(device->name, name);
2626
2627 device->fs_info = fs_info;
2628 device->bdev = bdev;
2629
2630 ret = btrfs_get_dev_zone_info(device);
2631 if (ret)
2632 goto error_free_device;
2633
2634 trans = btrfs_start_transaction(root, 0);
2635 if (IS_ERR(trans)) {
2636 ret = PTR_ERR(trans);
2637 goto error_free_zone;
2638 }
2639
2640 q = bdev_get_queue(bdev);
2641 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2642 device->generation = trans->transid;
2643 device->io_width = fs_info->sectorsize;
2644 device->io_align = fs_info->sectorsize;
2645 device->sector_size = fs_info->sectorsize;
2646 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2647 fs_info->sectorsize);
2648 device->disk_total_bytes = device->total_bytes;
2649 device->commit_total_bytes = device->total_bytes;
2650 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2651 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2652 device->mode = FMODE_EXCL;
2653 device->dev_stats_valid = 1;
2654 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2655
2656 if (seeding_dev) {
2657 btrfs_clear_sb_rdonly(sb);
2658 ret = btrfs_prepare_sprout(fs_info);
2659 if (ret) {
2660 btrfs_abort_transaction(trans, ret);
2661 goto error_trans;
2662 }
2663 }
2664
2665 device->fs_devices = fs_devices;
2666
2667 mutex_lock(&fs_devices->device_list_mutex);
2668 mutex_lock(&fs_info->chunk_mutex);
2669 list_add_rcu(&device->dev_list, &fs_devices->devices);
2670 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2671 fs_devices->num_devices++;
2672 fs_devices->open_devices++;
2673 fs_devices->rw_devices++;
2674 fs_devices->total_devices++;
2675 fs_devices->total_rw_bytes += device->total_bytes;
2676
2677 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2678
2679 if (!blk_queue_nonrot(q))
2680 fs_devices->rotating = true;
2681
2682 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2683 btrfs_set_super_total_bytes(fs_info->super_copy,
2684 round_down(orig_super_total_bytes + device->total_bytes,
2685 fs_info->sectorsize));
2686
2687 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2688 btrfs_set_super_num_devices(fs_info->super_copy,
2689 orig_super_num_devices + 1);
2690
2691
2692
2693
2694
2695 btrfs_clear_space_info_full(fs_info);
2696
2697 mutex_unlock(&fs_info->chunk_mutex);
2698
2699
2700 btrfs_sysfs_add_device(device);
2701
2702 mutex_unlock(&fs_devices->device_list_mutex);
2703
2704 if (seeding_dev) {
2705 mutex_lock(&fs_info->chunk_mutex);
2706 ret = init_first_rw_device(trans);
2707 mutex_unlock(&fs_info->chunk_mutex);
2708 if (ret) {
2709 btrfs_abort_transaction(trans, ret);
2710 goto error_sysfs;
2711 }
2712 }
2713
2714 ret = btrfs_add_dev_item(trans, device);
2715 if (ret) {
2716 btrfs_abort_transaction(trans, ret);
2717 goto error_sysfs;
2718 }
2719
2720 if (seeding_dev) {
2721 ret = btrfs_finish_sprout(trans);
2722 if (ret) {
2723 btrfs_abort_transaction(trans, ret);
2724 goto error_sysfs;
2725 }
2726
2727
2728
2729
2730
2731 btrfs_sysfs_update_sprout_fsid(fs_devices);
2732 }
2733
2734 ret = btrfs_commit_transaction(trans);
2735
2736 if (seeding_dev) {
2737 mutex_unlock(&uuid_mutex);
2738 up_write(&sb->s_umount);
2739 locked = false;
2740
2741 if (ret)
2742 return ret;
2743
2744 ret = btrfs_relocate_sys_chunks(fs_info);
2745 if (ret < 0)
2746 btrfs_handle_fs_error(fs_info, ret,
2747 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2748 trans = btrfs_attach_transaction(root);
2749 if (IS_ERR(trans)) {
2750 if (PTR_ERR(trans) == -ENOENT)
2751 return 0;
2752 ret = PTR_ERR(trans);
2753 trans = NULL;
2754 goto error_sysfs;
2755 }
2756 ret = btrfs_commit_transaction(trans);
2757 }
2758
2759
2760
2761
2762
2763
2764
2765
2766 btrfs_forget_devices(device_path);
2767
2768
2769 update_dev_time(device_path);
2770
2771 return ret;
2772
2773error_sysfs:
2774 btrfs_sysfs_remove_device(device);
2775 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2776 mutex_lock(&fs_info->chunk_mutex);
2777 list_del_rcu(&device->dev_list);
2778 list_del(&device->dev_alloc_list);
2779 fs_info->fs_devices->num_devices--;
2780 fs_info->fs_devices->open_devices--;
2781 fs_info->fs_devices->rw_devices--;
2782 fs_info->fs_devices->total_devices--;
2783 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2784 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2785 btrfs_set_super_total_bytes(fs_info->super_copy,
2786 orig_super_total_bytes);
2787 btrfs_set_super_num_devices(fs_info->super_copy,
2788 orig_super_num_devices);
2789 mutex_unlock(&fs_info->chunk_mutex);
2790 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2791error_trans:
2792 if (seeding_dev)
2793 btrfs_set_sb_rdonly(sb);
2794 if (trans)
2795 btrfs_end_transaction(trans);
2796error_free_zone:
2797 btrfs_destroy_dev_zone_info(device);
2798error_free_device:
2799 btrfs_free_device(device);
2800error:
2801 blkdev_put(bdev, FMODE_EXCL);
2802 if (locked) {
2803 mutex_unlock(&uuid_mutex);
2804 up_write(&sb->s_umount);
2805 }
2806 return ret;
2807}
2808
2809static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2810 struct btrfs_device *device)
2811{
2812 int ret;
2813 struct btrfs_path *path;
2814 struct btrfs_root *root = device->fs_info->chunk_root;
2815 struct btrfs_dev_item *dev_item;
2816 struct extent_buffer *leaf;
2817 struct btrfs_key key;
2818
2819 path = btrfs_alloc_path();
2820 if (!path)
2821 return -ENOMEM;
2822
2823 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2824 key.type = BTRFS_DEV_ITEM_KEY;
2825 key.offset = device->devid;
2826
2827 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2828 if (ret < 0)
2829 goto out;
2830
2831 if (ret > 0) {
2832 ret = -ENOENT;
2833 goto out;
2834 }
2835
2836 leaf = path->nodes[0];
2837 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2838
2839 btrfs_set_device_id(leaf, dev_item, device->devid);
2840 btrfs_set_device_type(leaf, dev_item, device->type);
2841 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2842 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2843 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2844 btrfs_set_device_total_bytes(leaf, dev_item,
2845 btrfs_device_get_disk_total_bytes(device));
2846 btrfs_set_device_bytes_used(leaf, dev_item,
2847 btrfs_device_get_bytes_used(device));
2848 btrfs_mark_buffer_dirty(leaf);
2849
2850out:
2851 btrfs_free_path(path);
2852 return ret;
2853}
2854
2855int btrfs_grow_device(struct btrfs_trans_handle *trans,
2856 struct btrfs_device *device, u64 new_size)
2857{
2858 struct btrfs_fs_info *fs_info = device->fs_info;
2859 struct btrfs_super_block *super_copy = fs_info->super_copy;
2860 u64 old_total;
2861 u64 diff;
2862
2863 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2864 return -EACCES;
2865
2866 new_size = round_down(new_size, fs_info->sectorsize);
2867
2868 mutex_lock(&fs_info->chunk_mutex);
2869 old_total = btrfs_super_total_bytes(super_copy);
2870 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2871
2872 if (new_size <= device->total_bytes ||
2873 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2874 mutex_unlock(&fs_info->chunk_mutex);
2875 return -EINVAL;
2876 }
2877
2878 btrfs_set_super_total_bytes(super_copy,
2879 round_down(old_total + diff, fs_info->sectorsize));
2880 device->fs_devices->total_rw_bytes += diff;
2881
2882 btrfs_device_set_total_bytes(device, new_size);
2883 btrfs_device_set_disk_total_bytes(device, new_size);
2884 btrfs_clear_space_info_full(device->fs_info);
2885 if (list_empty(&device->post_commit_list))
2886 list_add_tail(&device->post_commit_list,
2887 &trans->transaction->dev_update_list);
2888 mutex_unlock(&fs_info->chunk_mutex);
2889
2890 return btrfs_update_device(trans, device);
2891}
2892
2893static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2894{
2895 struct btrfs_fs_info *fs_info = trans->fs_info;
2896 struct btrfs_root *root = fs_info->chunk_root;
2897 int ret;
2898 struct btrfs_path *path;
2899 struct btrfs_key key;
2900
2901 path = btrfs_alloc_path();
2902 if (!path)
2903 return -ENOMEM;
2904
2905 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2906 key.offset = chunk_offset;
2907 key.type = BTRFS_CHUNK_ITEM_KEY;
2908
2909 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2910 if (ret < 0)
2911 goto out;
2912 else if (ret > 0) {
2913 btrfs_handle_fs_error(fs_info, -ENOENT,
2914 "Failed lookup while freeing chunk.");
2915 ret = -ENOENT;
2916 goto out;
2917 }
2918
2919 ret = btrfs_del_item(trans, root, path);
2920 if (ret < 0)
2921 btrfs_handle_fs_error(fs_info, ret,
2922 "Failed to delete chunk item.");
2923out:
2924 btrfs_free_path(path);
2925 return ret;
2926}
2927
2928static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2929{
2930 struct btrfs_super_block *super_copy = fs_info->super_copy;
2931 struct btrfs_disk_key *disk_key;
2932 struct btrfs_chunk *chunk;
2933 u8 *ptr;
2934 int ret = 0;
2935 u32 num_stripes;
2936 u32 array_size;
2937 u32 len = 0;
2938 u32 cur;
2939 struct btrfs_key key;
2940
2941 lockdep_assert_held(&fs_info->chunk_mutex);
2942 array_size = btrfs_super_sys_array_size(super_copy);
2943
2944 ptr = super_copy->sys_chunk_array;
2945 cur = 0;
2946
2947 while (cur < array_size) {
2948 disk_key = (struct btrfs_disk_key *)ptr;
2949 btrfs_disk_key_to_cpu(&key, disk_key);
2950
2951 len = sizeof(*disk_key);
2952
2953 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2954 chunk = (struct btrfs_chunk *)(ptr + len);
2955 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2956 len += btrfs_chunk_item_size(num_stripes);
2957 } else {
2958 ret = -EIO;
2959 break;
2960 }
2961 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2962 key.offset == chunk_offset) {
2963 memmove(ptr, ptr + len, array_size - (cur + len));
2964 array_size -= len;
2965 btrfs_set_super_sys_array_size(super_copy, array_size);
2966 } else {
2967 ptr += len;
2968 cur += len;
2969 }
2970 }
2971 return ret;
2972}
2973
2974
2975
2976
2977
2978
2979
2980
2981struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
2982 u64 logical, u64 length)
2983{
2984 struct extent_map_tree *em_tree;
2985 struct extent_map *em;
2986
2987 em_tree = &fs_info->mapping_tree;
2988 read_lock(&em_tree->lock);
2989 em = lookup_extent_mapping(em_tree, logical, length);
2990 read_unlock(&em_tree->lock);
2991
2992 if (!em) {
2993 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2994 logical, length);
2995 return ERR_PTR(-EINVAL);
2996 }
2997
2998 if (em->start > logical || em->start + em->len < logical) {
2999 btrfs_crit(fs_info,
3000 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
3001 logical, length, em->start, em->start + em->len);
3002 free_extent_map(em);
3003 return ERR_PTR(-EINVAL);
3004 }
3005
3006
3007 return em;
3008}
3009
3010static int remove_chunk_item(struct btrfs_trans_handle *trans,
3011 struct map_lookup *map, u64 chunk_offset)
3012{
3013 int i;
3014
3015
3016
3017
3018
3019
3020 lockdep_assert_held(&trans->fs_info->chunk_mutex);
3021
3022 for (i = 0; i < map->num_stripes; i++) {
3023 int ret;
3024
3025 ret = btrfs_update_device(trans, map->stripes[i].dev);
3026 if (ret)
3027 return ret;
3028 }
3029
3030 return btrfs_free_chunk(trans, chunk_offset);
3031}
3032
3033int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
3034{
3035 struct btrfs_fs_info *fs_info = trans->fs_info;
3036 struct extent_map *em;
3037 struct map_lookup *map;
3038 u64 dev_extent_len = 0;
3039 int i, ret = 0;
3040 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
3041
3042 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
3043 if (IS_ERR(em)) {
3044
3045
3046
3047
3048
3049 ASSERT(0);
3050 return PTR_ERR(em);
3051 }
3052 map = em->map_lookup;
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064 mutex_lock(&fs_devices->device_list_mutex);
3065 for (i = 0; i < map->num_stripes; i++) {
3066 struct btrfs_device *device = map->stripes[i].dev;
3067 ret = btrfs_free_dev_extent(trans, device,
3068 map->stripes[i].physical,
3069 &dev_extent_len);
3070 if (ret) {
3071 mutex_unlock(&fs_devices->device_list_mutex);
3072 btrfs_abort_transaction(trans, ret);
3073 goto out;
3074 }
3075
3076 if (device->bytes_used > 0) {
3077 mutex_lock(&fs_info->chunk_mutex);
3078 btrfs_device_set_bytes_used(device,
3079 device->bytes_used - dev_extent_len);
3080 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
3081 btrfs_clear_space_info_full(fs_info);
3082 mutex_unlock(&fs_info->chunk_mutex);
3083 }
3084 }
3085 mutex_unlock(&fs_devices->device_list_mutex);
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108 trans->removing_chunk = true;
3109 mutex_lock(&fs_info->chunk_mutex);
3110
3111 check_system_chunk(trans, map->type);
3112
3113 ret = remove_chunk_item(trans, map, chunk_offset);
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128 if (ret == -ENOSPC) {
3129 const u64 sys_flags = btrfs_system_alloc_profile(fs_info);
3130 struct btrfs_block_group *sys_bg;
3131
3132 sys_bg = btrfs_alloc_chunk(trans, sys_flags);
3133 if (IS_ERR(sys_bg)) {
3134 ret = PTR_ERR(sys_bg);
3135 btrfs_abort_transaction(trans, ret);
3136 goto out;
3137 }
3138
3139 ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
3140 if (ret) {
3141 btrfs_abort_transaction(trans, ret);
3142 goto out;
3143 }
3144
3145 ret = remove_chunk_item(trans, map, chunk_offset);
3146 if (ret) {
3147 btrfs_abort_transaction(trans, ret);
3148 goto out;
3149 }
3150 } else if (ret) {
3151 btrfs_abort_transaction(trans, ret);
3152 goto out;
3153 }
3154
3155 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
3156
3157 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3158 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
3159 if (ret) {
3160 btrfs_abort_transaction(trans, ret);
3161 goto out;
3162 }
3163 }
3164
3165 mutex_unlock(&fs_info->chunk_mutex);
3166 trans->removing_chunk = false;
3167
3168
3169
3170
3171
3172 btrfs_trans_release_chunk_metadata(trans);
3173
3174 ret = btrfs_remove_block_group(trans, chunk_offset, em);
3175 if (ret) {
3176 btrfs_abort_transaction(trans, ret);
3177 goto out;
3178 }
3179
3180out:
3181 if (trans->removing_chunk) {
3182 mutex_unlock(&fs_info->chunk_mutex);
3183 trans->removing_chunk = false;
3184 }
3185
3186 free_extent_map(em);
3187 return ret;
3188}
3189
3190int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3191{
3192 struct btrfs_root *root = fs_info->chunk_root;
3193 struct btrfs_trans_handle *trans;
3194 struct btrfs_block_group *block_group;
3195 u64 length;
3196 int ret;
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210 lockdep_assert_held(&fs_info->reclaim_bgs_lock);
3211
3212
3213 btrfs_scrub_pause(fs_info);
3214 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
3215 btrfs_scrub_continue(fs_info);
3216 if (ret)
3217 return ret;
3218
3219 block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
3220 if (!block_group)
3221 return -ENOENT;
3222 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
3223 length = block_group->length;
3224 btrfs_put_block_group(block_group);
3225
3226
3227
3228
3229
3230
3231
3232 if (btrfs_is_zoned(fs_info)) {
3233 ret = btrfs_discard_extent(fs_info, chunk_offset, length, NULL);
3234 if (ret)
3235 btrfs_info(fs_info,
3236 "failed to reset zone %llu after relocation",
3237 chunk_offset);
3238 }
3239
3240 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3241 chunk_offset);
3242 if (IS_ERR(trans)) {
3243 ret = PTR_ERR(trans);
3244 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3245 return ret;
3246 }
3247
3248
3249
3250
3251
3252 ret = btrfs_remove_chunk(trans, chunk_offset);
3253 btrfs_end_transaction(trans);
3254 return ret;
3255}
3256
3257static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3258{
3259 struct btrfs_root *chunk_root = fs_info->chunk_root;
3260 struct btrfs_path *path;
3261 struct extent_buffer *leaf;
3262 struct btrfs_chunk *chunk;
3263 struct btrfs_key key;
3264 struct btrfs_key found_key;
3265 u64 chunk_type;
3266 bool retried = false;
3267 int failed = 0;
3268 int ret;
3269
3270 path = btrfs_alloc_path();
3271 if (!path)
3272 return -ENOMEM;
3273
3274again:
3275 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3276 key.offset = (u64)-1;
3277 key.type = BTRFS_CHUNK_ITEM_KEY;
3278
3279 while (1) {
3280 mutex_lock(&fs_info->reclaim_bgs_lock);
3281 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3282 if (ret < 0) {
3283 mutex_unlock(&fs_info->reclaim_bgs_lock);
3284 goto error;
3285 }
3286 BUG_ON(ret == 0);
3287
3288 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3289 key.type);
3290 if (ret)
3291 mutex_unlock(&fs_info->reclaim_bgs_lock);
3292 if (ret < 0)
3293 goto error;
3294 if (ret > 0)
3295 break;
3296
3297 leaf = path->nodes[0];
3298 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3299
3300 chunk = btrfs_item_ptr(leaf, path->slots[0],
3301 struct btrfs_chunk);
3302 chunk_type = btrfs_chunk_type(leaf, chunk);
3303 btrfs_release_path(path);
3304
3305 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3306 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3307 if (ret == -ENOSPC)
3308 failed++;
3309 else
3310 BUG_ON(ret);
3311 }
3312 mutex_unlock(&fs_info->reclaim_bgs_lock);
3313
3314 if (found_key.offset == 0)
3315 break;
3316 key.offset = found_key.offset - 1;
3317 }
3318 ret = 0;
3319 if (failed && !retried) {
3320 failed = 0;
3321 retried = true;
3322 goto again;
3323 } else if (WARN_ON(failed && retried)) {
3324 ret = -ENOSPC;
3325 }
3326error:
3327 btrfs_free_path(path);
3328 return ret;
3329}
3330
3331
3332
3333
3334
3335
3336static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3337 u64 chunk_offset)
3338{
3339 struct btrfs_block_group *cache;
3340 u64 bytes_used;
3341 u64 chunk_type;
3342
3343 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3344 ASSERT(cache);
3345 chunk_type = cache->flags;
3346 btrfs_put_block_group(cache);
3347
3348 if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA))
3349 return 0;
3350
3351 spin_lock(&fs_info->data_sinfo->lock);
3352 bytes_used = fs_info->data_sinfo->bytes_used;
3353 spin_unlock(&fs_info->data_sinfo->lock);
3354
3355 if (!bytes_used) {
3356 struct btrfs_trans_handle *trans;
3357 int ret;
3358
3359 trans = btrfs_join_transaction(fs_info->tree_root);
3360 if (IS_ERR(trans))
3361 return PTR_ERR(trans);
3362
3363 ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA);
3364 btrfs_end_transaction(trans);
3365 if (ret < 0)
3366 return ret;
3367 return 1;
3368 }
3369
3370 return 0;
3371}
3372
3373static int insert_balance_item(struct btrfs_fs_info *fs_info,
3374 struct btrfs_balance_control *bctl)
3375{
3376 struct btrfs_root *root = fs_info->tree_root;
3377 struct btrfs_trans_handle *trans;
3378 struct btrfs_balance_item *item;
3379 struct btrfs_disk_balance_args disk_bargs;
3380 struct btrfs_path *path;
3381 struct extent_buffer *leaf;
3382 struct btrfs_key key;
3383 int ret, err;
3384
3385 path = btrfs_alloc_path();
3386 if (!path)
3387 return -ENOMEM;
3388
3389 trans = btrfs_start_transaction(root, 0);
3390 if (IS_ERR(trans)) {
3391 btrfs_free_path(path);
3392 return PTR_ERR(trans);
3393 }
3394
3395 key.objectid = BTRFS_BALANCE_OBJECTID;
3396 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3397 key.offset = 0;
3398
3399 ret = btrfs_insert_empty_item(trans, root, path, &key,
3400 sizeof(*item));
3401 if (ret)
3402 goto out;
3403
3404 leaf = path->nodes[0];
3405 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3406
3407 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3408
3409 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3410 btrfs_set_balance_data(leaf, item, &disk_bargs);
3411 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3412 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3413 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3414 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3415
3416 btrfs_set_balance_flags(leaf, item, bctl->flags);
3417
3418 btrfs_mark_buffer_dirty(leaf);
3419out:
3420 btrfs_free_path(path);
3421 err = btrfs_commit_transaction(trans);
3422 if (err && !ret)
3423 ret = err;
3424 return ret;
3425}
3426
3427static int del_balance_item(struct btrfs_fs_info *fs_info)
3428{
3429 struct btrfs_root *root = fs_info->tree_root;
3430 struct btrfs_trans_handle *trans;
3431 struct btrfs_path *path;
3432 struct btrfs_key key;
3433 int ret, err;
3434
3435 path = btrfs_alloc_path();
3436 if (!path)
3437 return -ENOMEM;
3438
3439 trans = btrfs_start_transaction_fallback_global_rsv(root, 0);
3440 if (IS_ERR(trans)) {
3441 btrfs_free_path(path);
3442 return PTR_ERR(trans);
3443 }
3444
3445 key.objectid = BTRFS_BALANCE_OBJECTID;
3446 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3447 key.offset = 0;
3448
3449 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3450 if (ret < 0)
3451 goto out;
3452 if (ret > 0) {
3453 ret = -ENOENT;
3454 goto out;
3455 }
3456
3457 ret = btrfs_del_item(trans, root, path);
3458out:
3459 btrfs_free_path(path);
3460 err = btrfs_commit_transaction(trans);
3461 if (err && !ret)
3462 ret = err;
3463 return ret;
3464}
3465
3466
3467
3468
3469
3470static void update_balance_args(struct btrfs_balance_control *bctl)
3471{
3472
3473
3474
3475 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3476 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3477 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3478 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3479 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3480 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3481
3482
3483
3484
3485
3486
3487
3488
3489 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3490 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3491 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3492 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3493 bctl->data.usage = 90;
3494 }
3495 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3496 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3497 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3498 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3499 bctl->sys.usage = 90;
3500 }
3501 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3502 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3503 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3504 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3505 bctl->meta.usage = 90;
3506 }
3507}
3508
3509
3510
3511
3512static void reset_balance_state(struct btrfs_fs_info *fs_info)
3513{
3514 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3515 int ret;
3516
3517 BUG_ON(!fs_info->balance_ctl);
3518
3519 spin_lock(&fs_info->balance_lock);
3520 fs_info->balance_ctl = NULL;
3521 spin_unlock(&fs_info->balance_lock);
3522
3523 kfree(bctl);
3524 ret = del_balance_item(fs_info);
3525 if (ret)
3526 btrfs_handle_fs_error(fs_info, ret, NULL);
3527}
3528
3529
3530
3531
3532
3533static int chunk_profiles_filter(u64 chunk_type,
3534 struct btrfs_balance_args *bargs)
3535{
3536 chunk_type = chunk_to_extended(chunk_type) &
3537 BTRFS_EXTENDED_PROFILE_MASK;
3538
3539 if (bargs->profiles & chunk_type)
3540 return 0;
3541
3542 return 1;
3543}
3544
3545static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3546 struct btrfs_balance_args *bargs)
3547{
3548 struct btrfs_block_group *cache;
3549 u64 chunk_used;
3550 u64 user_thresh_min;
3551 u64 user_thresh_max;
3552 int ret = 1;
3553
3554 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3555 chunk_used = cache->used;
3556
3557 if (bargs->usage_min == 0)
3558 user_thresh_min = 0;
3559 else
3560 user_thresh_min = div_factor_fine(cache->length,
3561 bargs->usage_min);
3562
3563 if (bargs->usage_max == 0)
3564 user_thresh_max = 1;
3565 else if (bargs->usage_max > 100)
3566 user_thresh_max = cache->length;
3567 else
3568 user_thresh_max = div_factor_fine(cache->length,
3569 bargs->usage_max);
3570
3571 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3572 ret = 0;
3573
3574 btrfs_put_block_group(cache);
3575 return ret;
3576}
3577
3578static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3579 u64 chunk_offset, struct btrfs_balance_args *bargs)
3580{
3581 struct btrfs_block_group *cache;
3582 u64 chunk_used, user_thresh;
3583 int ret = 1;
3584
3585 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3586 chunk_used = cache->used;
3587
3588 if (bargs->usage_min == 0)
3589 user_thresh = 1;
3590 else if (bargs->usage > 100)
3591 user_thresh = cache->length;
3592 else
3593 user_thresh = div_factor_fine(cache->length, bargs->usage);
3594
3595 if (chunk_used < user_thresh)
3596 ret = 0;
3597
3598 btrfs_put_block_group(cache);
3599 return ret;
3600}
3601
3602static int chunk_devid_filter(struct extent_buffer *leaf,
3603 struct btrfs_chunk *chunk,
3604 struct btrfs_balance_args *bargs)
3605{
3606 struct btrfs_stripe *stripe;
3607 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3608 int i;
3609
3610 for (i = 0; i < num_stripes; i++) {
3611 stripe = btrfs_stripe_nr(chunk, i);
3612 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3613 return 0;
3614 }
3615
3616 return 1;
3617}
3618
3619static u64 calc_data_stripes(u64 type, int num_stripes)
3620{
3621 const int index = btrfs_bg_flags_to_raid_index(type);
3622 const int ncopies = btrfs_raid_array[index].ncopies;
3623 const int nparity = btrfs_raid_array[index].nparity;
3624
3625 if (nparity)
3626 return num_stripes - nparity;
3627 else
3628 return num_stripes / ncopies;
3629}
3630
3631
3632static int chunk_drange_filter(struct extent_buffer *leaf,
3633 struct btrfs_chunk *chunk,
3634 struct btrfs_balance_args *bargs)
3635{
3636 struct btrfs_stripe *stripe;
3637 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3638 u64 stripe_offset;
3639 u64 stripe_length;
3640 u64 type;
3641 int factor;
3642 int i;
3643
3644 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3645 return 0;
3646
3647 type = btrfs_chunk_type(leaf, chunk);
3648 factor = calc_data_stripes(type, num_stripes);
3649
3650 for (i = 0; i < num_stripes; i++) {
3651 stripe = btrfs_stripe_nr(chunk, i);
3652 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3653 continue;
3654
3655 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3656 stripe_length = btrfs_chunk_length(leaf, chunk);
3657 stripe_length = div_u64(stripe_length, factor);
3658
3659 if (stripe_offset < bargs->pend &&
3660 stripe_offset + stripe_length > bargs->pstart)
3661 return 0;
3662 }
3663
3664 return 1;
3665}
3666
3667
3668static int chunk_vrange_filter(struct extent_buffer *leaf,
3669 struct btrfs_chunk *chunk,
3670 u64 chunk_offset,
3671 struct btrfs_balance_args *bargs)
3672{
3673 if (chunk_offset < bargs->vend &&
3674 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3675
3676 return 0;
3677
3678 return 1;
3679}
3680
3681static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3682 struct btrfs_chunk *chunk,
3683 struct btrfs_balance_args *bargs)
3684{
3685 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3686
3687 if (bargs->stripes_min <= num_stripes
3688 && num_stripes <= bargs->stripes_max)
3689 return 0;
3690
3691 return 1;
3692}
3693
3694static int chunk_soft_convert_filter(u64 chunk_type,
3695 struct btrfs_balance_args *bargs)
3696{
3697 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3698 return 0;
3699
3700 chunk_type = chunk_to_extended(chunk_type) &
3701 BTRFS_EXTENDED_PROFILE_MASK;
3702
3703 if (bargs->target == chunk_type)
3704 return 1;
3705
3706 return 0;
3707}
3708
3709static int should_balance_chunk(struct extent_buffer *leaf,
3710 struct btrfs_chunk *chunk, u64 chunk_offset)
3711{
3712 struct btrfs_fs_info *fs_info = leaf->fs_info;
3713 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3714 struct btrfs_balance_args *bargs = NULL;
3715 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3716
3717
3718 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3719 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3720 return 0;
3721 }
3722
3723 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3724 bargs = &bctl->data;
3725 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3726 bargs = &bctl->sys;
3727 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3728 bargs = &bctl->meta;
3729
3730
3731 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3732 chunk_profiles_filter(chunk_type, bargs)) {
3733 return 0;
3734 }
3735
3736
3737 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3738 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3739 return 0;
3740 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3741 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3742 return 0;
3743 }
3744
3745
3746 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3747 chunk_devid_filter(leaf, chunk, bargs)) {
3748 return 0;
3749 }
3750
3751
3752 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3753 chunk_drange_filter(leaf, chunk, bargs)) {
3754 return 0;
3755 }
3756
3757
3758 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3759 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3760 return 0;
3761 }
3762
3763
3764 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3765 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3766 return 0;
3767 }
3768
3769
3770 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3771 chunk_soft_convert_filter(chunk_type, bargs)) {
3772 return 0;
3773 }
3774
3775
3776
3777
3778 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3779 if (bargs->limit == 0)
3780 return 0;
3781 else
3782 bargs->limit--;
3783 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3784
3785
3786
3787
3788
3789 if (bargs->limit_max == 0)
3790 return 0;
3791 else
3792 bargs->limit_max--;
3793 }
3794
3795 return 1;
3796}
3797
3798static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3799{
3800 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3801 struct btrfs_root *chunk_root = fs_info->chunk_root;
3802 u64 chunk_type;
3803 struct btrfs_chunk *chunk;
3804 struct btrfs_path *path = NULL;
3805 struct btrfs_key key;
3806 struct btrfs_key found_key;
3807 struct extent_buffer *leaf;
3808 int slot;
3809 int ret;
3810 int enospc_errors = 0;
3811 bool counting = true;
3812
3813 u64 limit_data = bctl->data.limit;
3814 u64 limit_meta = bctl->meta.limit;
3815 u64 limit_sys = bctl->sys.limit;
3816 u32 count_data = 0;
3817 u32 count_meta = 0;
3818 u32 count_sys = 0;
3819 int chunk_reserved = 0;
3820
3821 path = btrfs_alloc_path();
3822 if (!path) {
3823 ret = -ENOMEM;
3824 goto error;
3825 }
3826
3827
3828 spin_lock(&fs_info->balance_lock);
3829 memset(&bctl->stat, 0, sizeof(bctl->stat));
3830 spin_unlock(&fs_info->balance_lock);
3831again:
3832 if (!counting) {
3833
3834
3835
3836
3837 bctl->data.limit = limit_data;
3838 bctl->meta.limit = limit_meta;
3839 bctl->sys.limit = limit_sys;
3840 }
3841 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3842 key.offset = (u64)-1;
3843 key.type = BTRFS_CHUNK_ITEM_KEY;
3844
3845 while (1) {
3846 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3847 atomic_read(&fs_info->balance_cancel_req)) {
3848 ret = -ECANCELED;
3849 goto error;
3850 }
3851
3852 mutex_lock(&fs_info->reclaim_bgs_lock);
3853 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3854 if (ret < 0) {
3855 mutex_unlock(&fs_info->reclaim_bgs_lock);
3856 goto error;
3857 }
3858
3859
3860
3861
3862
3863 if (ret == 0)
3864 BUG();
3865
3866 ret = btrfs_previous_item(chunk_root, path, 0,
3867 BTRFS_CHUNK_ITEM_KEY);
3868 if (ret) {
3869 mutex_unlock(&fs_info->reclaim_bgs_lock);
3870 ret = 0;
3871 break;
3872 }
3873
3874 leaf = path->nodes[0];
3875 slot = path->slots[0];
3876 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3877
3878 if (found_key.objectid != key.objectid) {
3879 mutex_unlock(&fs_info->reclaim_bgs_lock);
3880 break;
3881 }
3882
3883 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3884 chunk_type = btrfs_chunk_type(leaf, chunk);
3885
3886 if (!counting) {
3887 spin_lock(&fs_info->balance_lock);
3888 bctl->stat.considered++;
3889 spin_unlock(&fs_info->balance_lock);
3890 }
3891
3892 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3893
3894 btrfs_release_path(path);
3895 if (!ret) {
3896 mutex_unlock(&fs_info->reclaim_bgs_lock);
3897 goto loop;
3898 }
3899
3900 if (counting) {
3901 mutex_unlock(&fs_info->reclaim_bgs_lock);
3902 spin_lock(&fs_info->balance_lock);
3903 bctl->stat.expected++;
3904 spin_unlock(&fs_info->balance_lock);
3905
3906 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3907 count_data++;
3908 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3909 count_sys++;
3910 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3911 count_meta++;
3912
3913 goto loop;
3914 }
3915
3916
3917
3918
3919
3920 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3921 count_data < bctl->data.limit_min)
3922 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3923 count_meta < bctl->meta.limit_min)
3924 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3925 count_sys < bctl->sys.limit_min)) {
3926 mutex_unlock(&fs_info->reclaim_bgs_lock);
3927 goto loop;
3928 }
3929
3930 if (!chunk_reserved) {
3931
3932
3933
3934
3935
3936
3937 ret = btrfs_may_alloc_data_chunk(fs_info,
3938 found_key.offset);
3939 if (ret < 0) {
3940 mutex_unlock(&fs_info->reclaim_bgs_lock);
3941 goto error;
3942 } else if (ret == 1) {
3943 chunk_reserved = 1;
3944 }
3945 }
3946
3947 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3948 mutex_unlock(&fs_info->reclaim_bgs_lock);
3949 if (ret == -ENOSPC) {
3950 enospc_errors++;
3951 } else if (ret == -ETXTBSY) {
3952 btrfs_info(fs_info,
3953 "skipping relocation of block group %llu due to active swapfile",
3954 found_key.offset);
3955 ret = 0;
3956 } else if (ret) {
3957 goto error;
3958 } else {
3959 spin_lock(&fs_info->balance_lock);
3960 bctl->stat.completed++;
3961 spin_unlock(&fs_info->balance_lock);
3962 }
3963loop:
3964 if (found_key.offset == 0)
3965 break;
3966 key.offset = found_key.offset - 1;
3967 }
3968
3969 if (counting) {
3970 btrfs_release_path(path);
3971 counting = false;
3972 goto again;
3973 }
3974error:
3975 btrfs_free_path(path);
3976 if (enospc_errors) {
3977 btrfs_info(fs_info, "%d enospc errors during balance",
3978 enospc_errors);
3979 if (!ret)
3980 ret = -ENOSPC;
3981 }
3982
3983 return ret;
3984}
3985
3986
3987
3988
3989
3990
3991static int alloc_profile_is_valid(u64 flags, int extended)
3992{
3993 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3994 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3995
3996 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3997
3998
3999 if (flags & ~mask)
4000 return 0;
4001
4002
4003 if (flags == 0)
4004 return !extended;
4005
4006 return has_single_bit_set(flags);
4007}
4008
4009static inline int balance_need_close(struct btrfs_fs_info *fs_info)
4010{
4011
4012 return atomic_read(&fs_info->balance_cancel_req) ||
4013 (atomic_read(&fs_info->balance_pause_req) == 0 &&
4014 atomic_read(&fs_info->balance_cancel_req) == 0);
4015}
4016
4017
4018
4019
4020
4021static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
4022 const struct btrfs_balance_args *bargs,
4023 u64 allowed, const char *type)
4024{
4025 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
4026 return true;
4027
4028
4029 if (alloc_profile_is_valid(bargs->target, 1) &&
4030 (bargs->target & ~allowed) == 0)
4031 return true;
4032
4033 btrfs_err(fs_info, "balance: invalid convert %s profile %s",
4034 type, btrfs_bg_type_to_raid_name(bargs->target));
4035 return false;
4036}
4037
4038
4039
4040
4041
4042
4043static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
4044 u32 size_buf)
4045{
4046 int ret;
4047 u32 size_bp = size_buf;
4048 char *bp = buf;
4049 u64 flags = bargs->flags;
4050 char tmp_buf[128] = {'\0'};
4051
4052 if (!flags)
4053 return;
4054
4055#define CHECK_APPEND_NOARG(a) \
4056 do { \
4057 ret = snprintf(bp, size_bp, (a)); \
4058 if (ret < 0 || ret >= size_bp) \
4059 goto out_overflow; \
4060 size_bp -= ret; \
4061 bp += ret; \
4062 } while (0)
4063
4064#define CHECK_APPEND_1ARG(a, v1) \
4065 do { \
4066 ret = snprintf(bp, size_bp, (a), (v1)); \
4067 if (ret < 0 || ret >= size_bp) \
4068 goto out_overflow; \
4069 size_bp -= ret; \
4070 bp += ret; \
4071 } while (0)
4072
4073#define CHECK_APPEND_2ARG(a, v1, v2) \
4074 do { \
4075 ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
4076 if (ret < 0 || ret >= size_bp) \
4077 goto out_overflow; \
4078 size_bp -= ret; \
4079 bp += ret; \
4080 } while (0)
4081
4082 if (flags & BTRFS_BALANCE_ARGS_CONVERT)
4083 CHECK_APPEND_1ARG("convert=%s,",
4084 btrfs_bg_type_to_raid_name(bargs->target));
4085
4086 if (flags & BTRFS_BALANCE_ARGS_SOFT)
4087 CHECK_APPEND_NOARG("soft,");
4088
4089 if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
4090 btrfs_describe_block_groups(bargs->profiles, tmp_buf,
4091 sizeof(tmp_buf));
4092 CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
4093 }
4094
4095 if (flags & BTRFS_BALANCE_ARGS_USAGE)
4096 CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
4097
4098 if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
4099 CHECK_APPEND_2ARG("usage=%u..%u,",
4100 bargs->usage_min, bargs->usage_max);
4101
4102 if (flags & BTRFS_BALANCE_ARGS_DEVID)
4103 CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
4104
4105 if (flags & BTRFS_BALANCE_ARGS_DRANGE)
4106 CHECK_APPEND_2ARG("drange=%llu..%llu,",
4107 bargs->pstart, bargs->pend);
4108
4109 if (flags & BTRFS_BALANCE_ARGS_VRANGE)
4110 CHECK_APPEND_2ARG("vrange=%llu..%llu,",
4111 bargs->vstart, bargs->vend);
4112
4113 if (flags & BTRFS_BALANCE_ARGS_LIMIT)
4114 CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
4115
4116 if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
4117 CHECK_APPEND_2ARG("limit=%u..%u,",
4118 bargs->limit_min, bargs->limit_max);
4119
4120 if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
4121 CHECK_APPEND_2ARG("stripes=%u..%u,",
4122 bargs->stripes_min, bargs->stripes_max);
4123
4124#undef CHECK_APPEND_2ARG
4125#undef CHECK_APPEND_1ARG
4126#undef CHECK_APPEND_NOARG
4127
4128out_overflow:
4129
4130 if (size_bp < size_buf)
4131 buf[size_buf - size_bp - 1] = '\0';
4132 else
4133 buf[0] = '\0';
4134}
4135
4136static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
4137{
4138 u32 size_buf = 1024;
4139 char tmp_buf[192] = {'\0'};
4140 char *buf;
4141 char *bp;
4142 u32 size_bp = size_buf;
4143 int ret;
4144 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
4145
4146 buf = kzalloc(size_buf, GFP_KERNEL);
4147 if (!buf)
4148 return;
4149
4150 bp = buf;
4151
4152#define CHECK_APPEND_1ARG(a, v1) \
4153 do { \
4154 ret = snprintf(bp, size_bp, (a), (v1)); \
4155 if (ret < 0 || ret >= size_bp) \
4156 goto out_overflow; \
4157 size_bp -= ret; \
4158 bp += ret; \
4159 } while (0)
4160
4161 if (bctl->flags & BTRFS_BALANCE_FORCE)
4162 CHECK_APPEND_1ARG("%s", "-f ");
4163
4164 if (bctl->flags & BTRFS_BALANCE_DATA) {
4165 describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
4166 CHECK_APPEND_1ARG("-d%s ", tmp_buf);
4167 }
4168
4169 if (bctl->flags & BTRFS_BALANCE_METADATA) {
4170 describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
4171 CHECK_APPEND_1ARG("-m%s ", tmp_buf);
4172 }
4173
4174 if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
4175 describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
4176 CHECK_APPEND_1ARG("-s%s ", tmp_buf);
4177 }
4178
4179#undef CHECK_APPEND_1ARG
4180
4181out_overflow:
4182
4183 if (size_bp < size_buf)
4184 buf[size_buf - size_bp - 1] = '\0';
4185 btrfs_info(fs_info, "balance: %s %s",
4186 (bctl->flags & BTRFS_BALANCE_RESUME) ?
4187 "resume" : "start", buf);
4188
4189 kfree(buf);
4190}
4191
4192
4193
4194
4195int btrfs_balance(struct btrfs_fs_info *fs_info,
4196 struct btrfs_balance_control *bctl,
4197 struct btrfs_ioctl_balance_args *bargs)
4198{
4199 u64 meta_target, data_target;
4200 u64 allowed;
4201 int mixed = 0;
4202 int ret;
4203 u64 num_devices;
4204 unsigned seq;
4205 bool reducing_redundancy;
4206 int i;
4207
4208 if (btrfs_fs_closing(fs_info) ||
4209 atomic_read(&fs_info->balance_pause_req) ||
4210 btrfs_should_cancel_balance(fs_info)) {
4211 ret = -EINVAL;
4212 goto out;
4213 }
4214
4215 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
4216 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
4217 mixed = 1;
4218
4219
4220
4221
4222
4223 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
4224 if (mixed && (bctl->flags & allowed)) {
4225 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
4226 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
4227 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
4228 btrfs_err(fs_info,
4229 "balance: mixed groups data and metadata options must be the same");
4230 ret = -EINVAL;
4231 goto out;
4232 }
4233 }
4234
4235
4236
4237
4238
4239 num_devices = fs_info->fs_devices->rw_devices;
4240
4241
4242
4243
4244
4245
4246 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
4247 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
4248 if (num_devices >= btrfs_raid_array[i].devs_min)
4249 allowed |= btrfs_raid_array[i].bg_flag;
4250
4251 if (!validate_convert_profile(fs_info, &bctl->data, allowed, "data") ||
4252 !validate_convert_profile(fs_info, &bctl->meta, allowed, "metadata") ||
4253 !validate_convert_profile(fs_info, &bctl->sys, allowed, "system")) {
4254 ret = -EINVAL;
4255 goto out;
4256 }
4257
4258
4259
4260
4261
4262 allowed = 0;
4263 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
4264 if (btrfs_raid_array[i].ncopies >= 2 ||
4265 btrfs_raid_array[i].tolerated_failures >= 1)
4266 allowed |= btrfs_raid_array[i].bg_flag;
4267 }
4268 do {
4269 seq = read_seqbegin(&fs_info->profiles_lock);
4270
4271 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4272 (fs_info->avail_system_alloc_bits & allowed) &&
4273 !(bctl->sys.target & allowed)) ||
4274 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4275 (fs_info->avail_metadata_alloc_bits & allowed) &&
4276 !(bctl->meta.target & allowed)))
4277 reducing_redundancy = true;
4278 else
4279 reducing_redundancy = false;
4280
4281
4282 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4283 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
4284 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4285 bctl->data.target : fs_info->avail_data_alloc_bits;
4286 } while (read_seqretry(&fs_info->profiles_lock, seq));
4287
4288 if (reducing_redundancy) {
4289 if (bctl->flags & BTRFS_BALANCE_FORCE) {
4290 btrfs_info(fs_info,
4291 "balance: force reducing metadata redundancy");
4292 } else {
4293 btrfs_err(fs_info,
4294 "balance: reduces metadata redundancy, use --force if you want this");
4295 ret = -EINVAL;
4296 goto out;
4297 }
4298 }
4299
4300 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
4301 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
4302 btrfs_warn(fs_info,
4303 "balance: metadata profile %s has lower redundancy than data profile %s",
4304 btrfs_bg_type_to_raid_name(meta_target),
4305 btrfs_bg_type_to_raid_name(data_target));
4306 }
4307
4308 ret = insert_balance_item(fs_info, bctl);
4309 if (ret && ret != -EEXIST)
4310 goto out;
4311
4312 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
4313 BUG_ON(ret == -EEXIST);
4314 BUG_ON(fs_info->balance_ctl);
4315 spin_lock(&fs_info->balance_lock);
4316 fs_info->balance_ctl = bctl;
4317 spin_unlock(&fs_info->balance_lock);
4318 } else {
4319 BUG_ON(ret != -EEXIST);
4320 spin_lock(&fs_info->balance_lock);
4321 update_balance_args(bctl);
4322 spin_unlock(&fs_info->balance_lock);
4323 }
4324
4325 ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4326 set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4327 describe_balance_start_or_resume(fs_info);
4328 mutex_unlock(&fs_info->balance_mutex);
4329
4330 ret = __btrfs_balance(fs_info);
4331
4332 mutex_lock(&fs_info->balance_mutex);
4333 if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
4334 btrfs_info(fs_info, "balance: paused");
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350 else if (ret == -ECANCELED || ret == -EINTR)
4351 btrfs_info(fs_info, "balance: canceled");
4352 else
4353 btrfs_info(fs_info, "balance: ended with status: %d", ret);
4354
4355 clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4356
4357 if (bargs) {
4358 memset(bargs, 0, sizeof(*bargs));
4359 btrfs_update_ioctl_balance_args(fs_info, bargs);
4360 }
4361
4362 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4363 balance_need_close(fs_info)) {
4364 reset_balance_state(fs_info);
4365 btrfs_exclop_finish(fs_info);
4366 }
4367
4368 wake_up(&fs_info->balance_wait_q);
4369
4370 return ret;
4371out:
4372 if (bctl->flags & BTRFS_BALANCE_RESUME)
4373 reset_balance_state(fs_info);
4374 else
4375 kfree(bctl);
4376 btrfs_exclop_finish(fs_info);
4377
4378 return ret;
4379}
4380
4381static int balance_kthread(void *data)
4382{
4383 struct btrfs_fs_info *fs_info = data;
4384 int ret = 0;
4385
4386 mutex_lock(&fs_info->balance_mutex);
4387 if (fs_info->balance_ctl)
4388 ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
4389 mutex_unlock(&fs_info->balance_mutex);
4390
4391 return ret;
4392}
4393
4394int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4395{
4396 struct task_struct *tsk;
4397
4398 mutex_lock(&fs_info->balance_mutex);
4399 if (!fs_info->balance_ctl) {
4400 mutex_unlock(&fs_info->balance_mutex);
4401 return 0;
4402 }
4403 mutex_unlock(&fs_info->balance_mutex);
4404
4405 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4406 btrfs_info(fs_info, "balance: resume skipped");
4407 return 0;
4408 }
4409
4410
4411
4412
4413
4414
4415 spin_lock(&fs_info->balance_lock);
4416 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4417 spin_unlock(&fs_info->balance_lock);
4418
4419 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4420 return PTR_ERR_OR_ZERO(tsk);
4421}
4422
4423int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4424{
4425 struct btrfs_balance_control *bctl;
4426 struct btrfs_balance_item *item;
4427 struct btrfs_disk_balance_args disk_bargs;
4428 struct btrfs_path *path;
4429 struct extent_buffer *leaf;
4430 struct btrfs_key key;
4431 int ret;
4432
4433 path = btrfs_alloc_path();
4434 if (!path)
4435 return -ENOMEM;
4436
4437 key.objectid = BTRFS_BALANCE_OBJECTID;
4438 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4439 key.offset = 0;
4440
4441 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4442 if (ret < 0)
4443 goto out;
4444 if (ret > 0) {
4445 ret = 0;
4446 goto out;
4447 }
4448
4449 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4450 if (!bctl) {
4451 ret = -ENOMEM;
4452 goto out;
4453 }
4454
4455 leaf = path->nodes[0];
4456 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4457
4458 bctl->flags = btrfs_balance_flags(leaf, item);
4459 bctl->flags |= BTRFS_BALANCE_RESUME;
4460
4461 btrfs_balance_data(leaf, item, &disk_bargs);
4462 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4463 btrfs_balance_meta(leaf, item, &disk_bargs);
4464 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4465 btrfs_balance_sys(leaf, item, &disk_bargs);
4466 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4467
4468
4469
4470
4471
4472
4473
4474
4475
4476
4477
4478 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
4479 btrfs_warn(fs_info,
4480 "balance: cannot set exclusive op status, resume manually");
4481
4482 btrfs_release_path(path);
4483
4484 mutex_lock(&fs_info->balance_mutex);
4485 BUG_ON(fs_info->balance_ctl);
4486 spin_lock(&fs_info->balance_lock);
4487 fs_info->balance_ctl = bctl;
4488 spin_unlock(&fs_info->balance_lock);
4489 mutex_unlock(&fs_info->balance_mutex);
4490out:
4491 btrfs_free_path(path);
4492 return ret;
4493}
4494
4495int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4496{
4497 int ret = 0;
4498
4499 mutex_lock(&fs_info->balance_mutex);
4500 if (!fs_info->balance_ctl) {
4501 mutex_unlock(&fs_info->balance_mutex);
4502 return -ENOTCONN;
4503 }
4504
4505 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4506 atomic_inc(&fs_info->balance_pause_req);
4507 mutex_unlock(&fs_info->balance_mutex);
4508
4509 wait_event(fs_info->balance_wait_q,
4510 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4511
4512 mutex_lock(&fs_info->balance_mutex);
4513
4514 BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4515 atomic_dec(&fs_info->balance_pause_req);
4516 } else {
4517 ret = -ENOTCONN;
4518 }
4519
4520 mutex_unlock(&fs_info->balance_mutex);
4521 return ret;
4522}
4523
4524int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4525{
4526 mutex_lock(&fs_info->balance_mutex);
4527 if (!fs_info->balance_ctl) {
4528 mutex_unlock(&fs_info->balance_mutex);
4529 return -ENOTCONN;
4530 }
4531
4532
4533
4534
4535
4536
4537 if (sb_rdonly(fs_info->sb)) {
4538 mutex_unlock(&fs_info->balance_mutex);
4539 return -EROFS;
4540 }
4541
4542 atomic_inc(&fs_info->balance_cancel_req);
4543
4544
4545
4546
4547 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4548 mutex_unlock(&fs_info->balance_mutex);
4549 wait_event(fs_info->balance_wait_q,
4550 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4551 mutex_lock(&fs_info->balance_mutex);
4552 } else {
4553 mutex_unlock(&fs_info->balance_mutex);
4554
4555
4556
4557
4558 mutex_lock(&fs_info->balance_mutex);
4559
4560 if (fs_info->balance_ctl) {
4561 reset_balance_state(fs_info);
4562 btrfs_exclop_finish(fs_info);
4563 btrfs_info(fs_info, "balance: canceled");
4564 }
4565 }
4566
4567 BUG_ON(fs_info->balance_ctl ||
4568 test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4569 atomic_dec(&fs_info->balance_cancel_req);
4570 mutex_unlock(&fs_info->balance_mutex);
4571 return 0;
4572}
4573
4574int btrfs_uuid_scan_kthread(void *data)
4575{
4576 struct btrfs_fs_info *fs_info = data;
4577 struct btrfs_root *root = fs_info->tree_root;
4578 struct btrfs_key key;
4579 struct btrfs_path *path = NULL;
4580 int ret = 0;
4581 struct extent_buffer *eb;
4582 int slot;
4583 struct btrfs_root_item root_item;
4584 u32 item_size;
4585 struct btrfs_trans_handle *trans = NULL;
4586 bool closing = false;
4587
4588 path = btrfs_alloc_path();
4589 if (!path) {
4590 ret = -ENOMEM;
4591 goto out;
4592 }
4593
4594 key.objectid = 0;
4595 key.type = BTRFS_ROOT_ITEM_KEY;
4596 key.offset = 0;
4597
4598 while (1) {
4599 if (btrfs_fs_closing(fs_info)) {
4600 closing = true;
4601 break;
4602 }
4603 ret = btrfs_search_forward(root, &key, path,
4604 BTRFS_OLDEST_GENERATION);
4605 if (ret) {
4606 if (ret > 0)
4607 ret = 0;
4608 break;
4609 }
4610
4611 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4612 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4613 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4614 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4615 goto skip;
4616
4617 eb = path->nodes[0];
4618 slot = path->slots[0];
4619 item_size = btrfs_item_size_nr(eb, slot);
4620 if (item_size < sizeof(root_item))
4621 goto skip;
4622
4623 read_extent_buffer(eb, &root_item,
4624 btrfs_item_ptr_offset(eb, slot),
4625 (int)sizeof(root_item));
4626 if (btrfs_root_refs(&root_item) == 0)
4627 goto skip;
4628
4629 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4630 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4631 if (trans)
4632 goto update_tree;
4633
4634 btrfs_release_path(path);
4635
4636
4637
4638
4639 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4640 if (IS_ERR(trans)) {
4641 ret = PTR_ERR(trans);
4642 break;
4643 }
4644 continue;
4645 } else {
4646 goto skip;
4647 }
4648update_tree:
4649 btrfs_release_path(path);
4650 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4651 ret = btrfs_uuid_tree_add(trans, root_item.uuid,
4652 BTRFS_UUID_KEY_SUBVOL,
4653 key.objectid);
4654 if (ret < 0) {
4655 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4656 ret);
4657 break;
4658 }
4659 }
4660
4661 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4662 ret = btrfs_uuid_tree_add(trans,
4663 root_item.received_uuid,
4664 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4665 key.objectid);
4666 if (ret < 0) {
4667 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4668 ret);
4669 break;
4670 }
4671 }
4672
4673skip:
4674 btrfs_release_path(path);
4675 if (trans) {
4676 ret = btrfs_end_transaction(trans);
4677 trans = NULL;
4678 if (ret)
4679 break;
4680 }
4681
4682 if (key.offset < (u64)-1) {
4683 key.offset++;
4684 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4685 key.offset = 0;
4686 key.type = BTRFS_ROOT_ITEM_KEY;
4687 } else if (key.objectid < (u64)-1) {
4688 key.offset = 0;
4689 key.type = BTRFS_ROOT_ITEM_KEY;
4690 key.objectid++;
4691 } else {
4692 break;
4693 }
4694 cond_resched();
4695 }
4696
4697out:
4698 btrfs_free_path(path);
4699 if (trans && !IS_ERR(trans))
4700 btrfs_end_transaction(trans);
4701 if (ret)
4702 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4703 else if (!closing)
4704 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4705 up(&fs_info->uuid_tree_rescan_sem);
4706 return 0;
4707}
4708
4709int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4710{
4711 struct btrfs_trans_handle *trans;
4712 struct btrfs_root *tree_root = fs_info->tree_root;
4713 struct btrfs_root *uuid_root;
4714 struct task_struct *task;
4715 int ret;
4716
4717
4718
4719
4720
4721 trans = btrfs_start_transaction(tree_root, 2);
4722 if (IS_ERR(trans))
4723 return PTR_ERR(trans);
4724
4725 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4726 if (IS_ERR(uuid_root)) {
4727 ret = PTR_ERR(uuid_root);
4728 btrfs_abort_transaction(trans, ret);
4729 btrfs_end_transaction(trans);
4730 return ret;
4731 }
4732
4733 fs_info->uuid_root = uuid_root;
4734
4735 ret = btrfs_commit_transaction(trans);
4736 if (ret)
4737 return ret;
4738
4739 down(&fs_info->uuid_tree_rescan_sem);
4740 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4741 if (IS_ERR(task)) {
4742
4743 btrfs_warn(fs_info, "failed to start uuid_scan task");
4744 up(&fs_info->uuid_tree_rescan_sem);
4745 return PTR_ERR(task);
4746 }
4747
4748 return 0;
4749}
4750
4751
4752
4753
4754
4755
4756int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4757{
4758 struct btrfs_fs_info *fs_info = device->fs_info;
4759 struct btrfs_root *root = fs_info->dev_root;
4760 struct btrfs_trans_handle *trans;
4761 struct btrfs_dev_extent *dev_extent = NULL;
4762 struct btrfs_path *path;
4763 u64 length;
4764 u64 chunk_offset;
4765 int ret;
4766 int slot;
4767 int failed = 0;
4768 bool retried = false;
4769 struct extent_buffer *l;
4770 struct btrfs_key key;
4771 struct btrfs_super_block *super_copy = fs_info->super_copy;
4772 u64 old_total = btrfs_super_total_bytes(super_copy);
4773 u64 old_size = btrfs_device_get_total_bytes(device);
4774 u64 diff;
4775 u64 start;
4776
4777 new_size = round_down(new_size, fs_info->sectorsize);
4778 start = new_size;
4779 diff = round_down(old_size - new_size, fs_info->sectorsize);
4780
4781 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4782 return -EINVAL;
4783
4784 path = btrfs_alloc_path();
4785 if (!path)
4786 return -ENOMEM;
4787
4788 path->reada = READA_BACK;
4789
4790 trans = btrfs_start_transaction(root, 0);
4791 if (IS_ERR(trans)) {
4792 btrfs_free_path(path);
4793 return PTR_ERR(trans);
4794 }
4795
4796 mutex_lock(&fs_info->chunk_mutex);
4797
4798 btrfs_device_set_total_bytes(device, new_size);
4799 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4800 device->fs_devices->total_rw_bytes -= diff;
4801 atomic64_sub(diff, &fs_info->free_chunk_space);
4802 }
4803
4804
4805
4806
4807
4808
4809 if (contains_pending_extent(device, &start, diff)) {
4810 mutex_unlock(&fs_info->chunk_mutex);
4811 ret = btrfs_commit_transaction(trans);
4812 if (ret)
4813 goto done;
4814 } else {
4815 mutex_unlock(&fs_info->chunk_mutex);
4816 btrfs_end_transaction(trans);
4817 }
4818
4819again:
4820 key.objectid = device->devid;
4821 key.offset = (u64)-1;
4822 key.type = BTRFS_DEV_EXTENT_KEY;
4823
4824 do {
4825 mutex_lock(&fs_info->reclaim_bgs_lock);
4826 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4827 if (ret < 0) {
4828 mutex_unlock(&fs_info->reclaim_bgs_lock);
4829 goto done;
4830 }
4831
4832 ret = btrfs_previous_item(root, path, 0, key.type);
4833 if (ret) {
4834 mutex_unlock(&fs_info->reclaim_bgs_lock);
4835 if (ret < 0)
4836 goto done;
4837 ret = 0;
4838 btrfs_release_path(path);
4839 break;
4840 }
4841
4842 l = path->nodes[0];
4843 slot = path->slots[0];
4844 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4845
4846 if (key.objectid != device->devid) {
4847 mutex_unlock(&fs_info->reclaim_bgs_lock);
4848 btrfs_release_path(path);
4849 break;
4850 }
4851
4852 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4853 length = btrfs_dev_extent_length(l, dev_extent);
4854
4855 if (key.offset + length <= new_size) {
4856 mutex_unlock(&fs_info->reclaim_bgs_lock);
4857 btrfs_release_path(path);
4858 break;
4859 }
4860
4861 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4862 btrfs_release_path(path);
4863
4864
4865
4866
4867
4868
4869
4870 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4871 if (ret < 0) {
4872 mutex_unlock(&fs_info->reclaim_bgs_lock);
4873 goto done;
4874 }
4875
4876 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4877 mutex_unlock(&fs_info->reclaim_bgs_lock);
4878 if (ret == -ENOSPC) {
4879 failed++;
4880 } else if (ret) {
4881 if (ret == -ETXTBSY) {
4882 btrfs_warn(fs_info,
4883 "could not shrink block group %llu due to active swapfile",
4884 chunk_offset);
4885 }
4886 goto done;
4887 }
4888 } while (key.offset-- > 0);
4889
4890 if (failed && !retried) {
4891 failed = 0;
4892 retried = true;
4893 goto again;
4894 } else if (failed && retried) {
4895 ret = -ENOSPC;
4896 goto done;
4897 }
4898
4899
4900 trans = btrfs_start_transaction(root, 0);
4901 if (IS_ERR(trans)) {
4902 ret = PTR_ERR(trans);
4903 goto done;
4904 }
4905
4906 mutex_lock(&fs_info->chunk_mutex);
4907
4908 clear_extent_bits(&device->alloc_state, new_size, (u64)-1,
4909 CHUNK_STATE_MASK);
4910
4911 btrfs_device_set_disk_total_bytes(device, new_size);
4912 if (list_empty(&device->post_commit_list))
4913 list_add_tail(&device->post_commit_list,
4914 &trans->transaction->dev_update_list);
4915
4916 WARN_ON(diff > old_total);
4917 btrfs_set_super_total_bytes(super_copy,
4918 round_down(old_total - diff, fs_info->sectorsize));
4919 mutex_unlock(&fs_info->chunk_mutex);
4920
4921
4922 ret = btrfs_update_device(trans, device);
4923 if (ret < 0) {
4924 btrfs_abort_transaction(trans, ret);
4925 btrfs_end_transaction(trans);
4926 } else {
4927 ret = btrfs_commit_transaction(trans);
4928 }
4929done:
4930 btrfs_free_path(path);
4931 if (ret) {
4932 mutex_lock(&fs_info->chunk_mutex);
4933 btrfs_device_set_total_bytes(device, old_size);
4934 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4935 device->fs_devices->total_rw_bytes += diff;
4936 atomic64_add(diff, &fs_info->free_chunk_space);
4937 mutex_unlock(&fs_info->chunk_mutex);
4938 }
4939 return ret;
4940}
4941
4942static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4943 struct btrfs_key *key,
4944 struct btrfs_chunk *chunk, int item_size)
4945{
4946 struct btrfs_super_block *super_copy = fs_info->super_copy;
4947 struct btrfs_disk_key disk_key;
4948 u32 array_size;
4949 u8 *ptr;
4950
4951 lockdep_assert_held(&fs_info->chunk_mutex);
4952
4953 array_size = btrfs_super_sys_array_size(super_copy);
4954 if (array_size + item_size + sizeof(disk_key)
4955 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
4956 return -EFBIG;
4957
4958 ptr = super_copy->sys_chunk_array + array_size;
4959 btrfs_cpu_key_to_disk(&disk_key, key);
4960 memcpy(ptr, &disk_key, sizeof(disk_key));
4961 ptr += sizeof(disk_key);
4962 memcpy(ptr, chunk, item_size);
4963 item_size += sizeof(disk_key);
4964 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4965
4966 return 0;
4967}
4968
4969
4970
4971
4972static int btrfs_cmp_device_info(const void *a, const void *b)
4973{
4974 const struct btrfs_device_info *di_a = a;
4975 const struct btrfs_device_info *di_b = b;
4976
4977 if (di_a->max_avail > di_b->max_avail)
4978 return -1;
4979 if (di_a->max_avail < di_b->max_avail)
4980 return 1;
4981 if (di_a->total_avail > di_b->total_avail)
4982 return -1;
4983 if (di_a->total_avail < di_b->total_avail)
4984 return 1;
4985 return 0;
4986}
4987
4988static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4989{
4990 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4991 return;
4992
4993 btrfs_set_fs_incompat(info, RAID56);
4994}
4995
4996static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
4997{
4998 if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)))
4999 return;
5000
5001 btrfs_set_fs_incompat(info, RAID1C34);
5002}
5003
5004
5005
5006
5007
5008struct alloc_chunk_ctl {
5009 u64 start;
5010 u64 type;
5011
5012 int num_stripes;
5013
5014 int sub_stripes;
5015
5016 int dev_stripes;
5017
5018 int devs_max;
5019
5020 int devs_min;
5021
5022 int devs_increment;
5023
5024 int ncopies;
5025
5026 int nparity;
5027 u64 max_stripe_size;
5028 u64 max_chunk_size;
5029 u64 dev_extent_min;
5030 u64 stripe_size;
5031 u64 chunk_size;
5032 int ndevs;
5033};
5034
5035static void init_alloc_chunk_ctl_policy_regular(
5036 struct btrfs_fs_devices *fs_devices,
5037 struct alloc_chunk_ctl *ctl)
5038{
5039 u64 type = ctl->type;
5040
5041 if (type & BTRFS_BLOCK_GROUP_DATA) {
5042 ctl->max_stripe_size = SZ_1G;
5043 ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
5044 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
5045
5046 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
5047 ctl->max_stripe_size = SZ_1G;
5048 else
5049 ctl->max_stripe_size = SZ_256M;
5050 ctl->max_chunk_size = ctl->max_stripe_size;
5051 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
5052 ctl->max_stripe_size = SZ_32M;
5053 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
5054 ctl->devs_max = min_t(int, ctl->devs_max,
5055 BTRFS_MAX_DEVS_SYS_CHUNK);
5056 } else {
5057 BUG();
5058 }
5059
5060
5061 ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
5062 ctl->max_chunk_size);
5063 ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
5064}
5065
5066static void init_alloc_chunk_ctl_policy_zoned(
5067 struct btrfs_fs_devices *fs_devices,
5068 struct alloc_chunk_ctl *ctl)
5069{
5070 u64 zone_size = fs_devices->fs_info->zone_size;
5071 u64 limit;
5072 int min_num_stripes = ctl->devs_min * ctl->dev_stripes;
5073 int min_data_stripes = (min_num_stripes - ctl->nparity) / ctl->ncopies;
5074 u64 min_chunk_size = min_data_stripes * zone_size;
5075 u64 type = ctl->type;
5076
5077 ctl->max_stripe_size = zone_size;
5078 if (type & BTRFS_BLOCK_GROUP_DATA) {
5079 ctl->max_chunk_size = round_down(BTRFS_MAX_DATA_CHUNK_SIZE,
5080 zone_size);
5081 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
5082 ctl->max_chunk_size = ctl->max_stripe_size;
5083 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
5084 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
5085 ctl->devs_max = min_t(int, ctl->devs_max,
5086 BTRFS_MAX_DEVS_SYS_CHUNK);
5087 } else {
5088 BUG();
5089 }
5090
5091
5092 limit = max(round_down(div_factor(fs_devices->total_rw_bytes, 1),
5093 zone_size),
5094 min_chunk_size);
5095 ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
5096 ctl->dev_extent_min = zone_size * ctl->dev_stripes;
5097}
5098
5099static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
5100 struct alloc_chunk_ctl *ctl)
5101{
5102 int index = btrfs_bg_flags_to_raid_index(ctl->type);
5103
5104 ctl->sub_stripes = btrfs_raid_array[index].sub_stripes;
5105 ctl->dev_stripes = btrfs_raid_array[index].dev_stripes;
5106 ctl->devs_max = btrfs_raid_array[index].devs_max;
5107 if (!ctl->devs_max)
5108 ctl->devs_max = BTRFS_MAX_DEVS(fs_devices->fs_info);
5109 ctl->devs_min = btrfs_raid_array[index].devs_min;
5110 ctl->devs_increment = btrfs_raid_array[index].devs_increment;
5111 ctl->ncopies = btrfs_raid_array[index].ncopies;
5112 ctl->nparity = btrfs_raid_array[index].nparity;
5113 ctl->ndevs = 0;
5114
5115 switch (fs_devices->chunk_alloc_policy) {
5116 case BTRFS_CHUNK_ALLOC_REGULAR:
5117 init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
5118 break;
5119 case BTRFS_CHUNK_ALLOC_ZONED:
5120 init_alloc_chunk_ctl_policy_zoned(fs_devices, ctl);
5121 break;
5122 default:
5123 BUG();
5124 }
5125}
5126
5127static int gather_device_info(struct btrfs_fs_devices *fs_devices,
5128 struct alloc_chunk_ctl *ctl,
5129 struct btrfs_device_info *devices_info)
5130{
5131 struct btrfs_fs_info *info = fs_devices->fs_info;
5132 struct btrfs_device *device;
5133 u64 total_avail;
5134 u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes;
5135 int ret;
5136 int ndevs = 0;
5137 u64 max_avail;
5138 u64 dev_offset;
5139
5140
5141
5142
5143
5144 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
5145 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
5146 WARN(1, KERN_ERR
5147 "BTRFS: read-only device in alloc_list\n");
5148 continue;
5149 }
5150
5151 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
5152 &device->dev_state) ||
5153 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
5154 continue;
5155
5156 if (device->total_bytes > device->bytes_used)
5157 total_avail = device->total_bytes - device->bytes_used;
5158 else
5159 total_avail = 0;
5160
5161
5162 if (total_avail < ctl->dev_extent_min)
5163 continue;
5164
5165 ret = find_free_dev_extent(device, dev_extent_want, &dev_offset,
5166 &max_avail);
5167 if (ret && ret != -ENOSPC)
5168 return ret;
5169
5170 if (ret == 0)
5171 max_avail = dev_extent_want;
5172
5173 if (max_avail < ctl->dev_extent_min) {
5174 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5175 btrfs_debug(info,
5176 "%s: devid %llu has no free space, have=%llu want=%llu",
5177 __func__, device->devid, max_avail,
5178 ctl->dev_extent_min);
5179 continue;
5180 }
5181
5182 if (ndevs == fs_devices->rw_devices) {
5183 WARN(1, "%s: found more than %llu devices\n",
5184 __func__, fs_devices->rw_devices);
5185 break;
5186 }
5187 devices_info[ndevs].dev_offset = dev_offset;
5188 devices_info[ndevs].max_avail = max_avail;
5189 devices_info[ndevs].total_avail = total_avail;
5190 devices_info[ndevs].dev = device;
5191 ++ndevs;
5192 }
5193 ctl->ndevs = ndevs;
5194
5195
5196
5197
5198 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
5199 btrfs_cmp_device_info, NULL);
5200
5201 return 0;
5202}
5203
5204static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
5205 struct btrfs_device_info *devices_info)
5206{
5207
5208 int data_stripes;
5209
5210
5211
5212
5213
5214
5215
5216
5217 ctl->stripe_size = div_u64(devices_info[ctl->ndevs - 1].max_avail,
5218 ctl->dev_stripes);
5219 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5220
5221
5222 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5223
5224
5225
5226
5227
5228
5229
5230 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
5231
5232
5233
5234
5235
5236 ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
5237 data_stripes), SZ_16M),
5238 ctl->stripe_size);
5239 }
5240
5241
5242 ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
5243 ctl->chunk_size = ctl->stripe_size * data_stripes;
5244
5245 return 0;
5246}
5247
5248static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
5249 struct btrfs_device_info *devices_info)
5250{
5251 u64 zone_size = devices_info[0].dev->zone_info->zone_size;
5252
5253 int data_stripes;
5254
5255
5256
5257
5258
5259 ASSERT(devices_info[ctl->ndevs - 1].max_avail == ctl->dev_extent_min);
5260
5261 ctl->stripe_size = zone_size;
5262 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5263 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5264
5265
5266 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
5267 ctl->ndevs = div_u64(div_u64(ctl->max_chunk_size * ctl->ncopies,
5268 ctl->stripe_size) + ctl->nparity,
5269 ctl->dev_stripes);
5270 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
5271 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
5272 ASSERT(ctl->stripe_size * data_stripes <= ctl->max_chunk_size);
5273 }
5274
5275 ctl->chunk_size = ctl->stripe_size * data_stripes;
5276
5277 return 0;
5278}
5279
5280static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
5281 struct alloc_chunk_ctl *ctl,
5282 struct btrfs_device_info *devices_info)
5283{
5284 struct btrfs_fs_info *info = fs_devices->fs_info;
5285
5286
5287
5288
5289
5290
5291 ctl->ndevs = rounddown(ctl->ndevs, ctl->devs_increment);
5292
5293 if (ctl->ndevs < ctl->devs_min) {
5294 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
5295 btrfs_debug(info,
5296 "%s: not enough devices with free space: have=%d minimum required=%d",
5297 __func__, ctl->ndevs, ctl->devs_min);
5298 }
5299 return -ENOSPC;
5300 }
5301
5302 ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
5303
5304 switch (fs_devices->chunk_alloc_policy) {
5305 case BTRFS_CHUNK_ALLOC_REGULAR:
5306 return decide_stripe_size_regular(ctl, devices_info);
5307 case BTRFS_CHUNK_ALLOC_ZONED:
5308 return decide_stripe_size_zoned(ctl, devices_info);
5309 default:
5310 BUG();
5311 }
5312}
5313
5314static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
5315 struct alloc_chunk_ctl *ctl,
5316 struct btrfs_device_info *devices_info)
5317{
5318 struct btrfs_fs_info *info = trans->fs_info;
5319 struct map_lookup *map = NULL;
5320 struct extent_map_tree *em_tree;
5321 struct btrfs_block_group *block_group;
5322 struct extent_map *em;
5323 u64 start = ctl->start;
5324 u64 type = ctl->type;
5325 int ret;
5326 int i;
5327 int j;
5328
5329 map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS);
5330 if (!map)
5331 return ERR_PTR(-ENOMEM);
5332 map->num_stripes = ctl->num_stripes;
5333
5334 for (i = 0; i < ctl->ndevs; ++i) {
5335 for (j = 0; j < ctl->dev_stripes; ++j) {
5336 int s = i * ctl->dev_stripes + j;
5337 map->stripes[s].dev = devices_info[i].dev;
5338 map->stripes[s].physical = devices_info[i].dev_offset +
5339 j * ctl->stripe_size;
5340 }
5341 }
5342 map->stripe_len = BTRFS_STRIPE_LEN;
5343 map->io_align = BTRFS_STRIPE_LEN;
5344 map->io_width = BTRFS_STRIPE_LEN;
5345 map->type = type;
5346 map->sub_stripes = ctl->sub_stripes;
5347
5348 trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size);
5349
5350 em = alloc_extent_map();
5351 if (!em) {
5352 kfree(map);
5353 return ERR_PTR(-ENOMEM);
5354 }
5355 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5356 em->map_lookup = map;
5357 em->start = start;
5358 em->len = ctl->chunk_size;
5359 em->block_start = 0;
5360 em->block_len = em->len;
5361 em->orig_block_len = ctl->stripe_size;
5362
5363 em_tree = &info->mapping_tree;
5364 write_lock(&em_tree->lock);
5365 ret = add_extent_mapping(em_tree, em, 0);
5366 if (ret) {
5367 write_unlock(&em_tree->lock);
5368 free_extent_map(em);
5369 return ERR_PTR(ret);
5370 }
5371 write_unlock(&em_tree->lock);
5372
5373 block_group = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size);
5374 if (IS_ERR(block_group))
5375 goto error_del_extent;
5376
5377 for (i = 0; i < map->num_stripes; i++) {
5378 struct btrfs_device *dev = map->stripes[i].dev;
5379
5380 btrfs_device_set_bytes_used(dev,
5381 dev->bytes_used + ctl->stripe_size);
5382 if (list_empty(&dev->post_commit_list))
5383 list_add_tail(&dev->post_commit_list,
5384 &trans->transaction->dev_update_list);
5385 }
5386
5387 atomic64_sub(ctl->stripe_size * map->num_stripes,
5388 &info->free_chunk_space);
5389
5390 free_extent_map(em);
5391 check_raid56_incompat_flag(info, type);
5392 check_raid1c34_incompat_flag(info, type);
5393
5394 return block_group;
5395
5396error_del_extent:
5397 write_lock(&em_tree->lock);
5398 remove_extent_mapping(em_tree, em);
5399 write_unlock(&em_tree->lock);
5400
5401
5402 free_extent_map(em);
5403
5404 free_extent_map(em);
5405
5406 return block_group;
5407}
5408
5409struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
5410 u64 type)
5411{
5412 struct btrfs_fs_info *info = trans->fs_info;
5413 struct btrfs_fs_devices *fs_devices = info->fs_devices;
5414 struct btrfs_device_info *devices_info = NULL;
5415 struct alloc_chunk_ctl ctl;
5416 struct btrfs_block_group *block_group;
5417 int ret;
5418
5419 lockdep_assert_held(&info->chunk_mutex);
5420
5421 if (!alloc_profile_is_valid(type, 0)) {
5422 ASSERT(0);
5423 return ERR_PTR(-EINVAL);
5424 }
5425
5426 if (list_empty(&fs_devices->alloc_list)) {
5427 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5428 btrfs_debug(info, "%s: no writable device", __func__);
5429 return ERR_PTR(-ENOSPC);
5430 }
5431
5432 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
5433 btrfs_err(info, "invalid chunk type 0x%llx requested", type);
5434 ASSERT(0);
5435 return ERR_PTR(-EINVAL);
5436 }
5437
5438 ctl.start = find_next_chunk(info);
5439 ctl.type = type;
5440 init_alloc_chunk_ctl(fs_devices, &ctl);
5441
5442 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
5443 GFP_NOFS);
5444 if (!devices_info)
5445 return ERR_PTR(-ENOMEM);
5446
5447 ret = gather_device_info(fs_devices, &ctl, devices_info);
5448 if (ret < 0) {
5449 block_group = ERR_PTR(ret);
5450 goto out;
5451 }
5452
5453 ret = decide_stripe_size(fs_devices, &ctl, devices_info);
5454 if (ret < 0) {
5455 block_group = ERR_PTR(ret);
5456 goto out;
5457 }
5458
5459 block_group = create_chunk(trans, &ctl, devices_info);
5460
5461out:
5462 kfree(devices_info);
5463 return block_group;
5464}
5465
5466
5467
5468
5469
5470
5471
5472int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
5473 u64 chunk_offset, u64 chunk_size)
5474{
5475 struct btrfs_fs_info *fs_info = trans->fs_info;
5476 struct btrfs_device *device;
5477 struct extent_map *em;
5478 struct map_lookup *map;
5479 u64 dev_offset;
5480 u64 stripe_size;
5481 int i;
5482 int ret = 0;
5483
5484 em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
5485 if (IS_ERR(em))
5486 return PTR_ERR(em);
5487
5488 map = em->map_lookup;
5489 stripe_size = em->orig_block_len;
5490
5491
5492
5493
5494
5495
5496
5497
5498
5499
5500 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5501 for (i = 0; i < map->num_stripes; i++) {
5502 device = map->stripes[i].dev;
5503 dev_offset = map->stripes[i].physical;
5504
5505 ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5506 dev_offset, stripe_size);
5507 if (ret)
5508 break;
5509 }
5510 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5511
5512 free_extent_map(em);
5513 return ret;
5514}
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
5525 struct btrfs_block_group *bg)
5526{
5527 struct btrfs_fs_info *fs_info = trans->fs_info;
5528 struct btrfs_root *extent_root = fs_info->extent_root;
5529 struct btrfs_root *chunk_root = fs_info->chunk_root;
5530 struct btrfs_key key;
5531 struct btrfs_chunk *chunk;
5532 struct btrfs_stripe *stripe;
5533 struct extent_map *em;
5534 struct map_lookup *map;
5535 size_t item_size;
5536 int i;
5537 int ret;
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561 lockdep_assert_held(&fs_info->chunk_mutex);
5562
5563 em = btrfs_get_chunk_map(fs_info, bg->start, bg->length);
5564 if (IS_ERR(em)) {
5565 ret = PTR_ERR(em);
5566 btrfs_abort_transaction(trans, ret);
5567 return ret;
5568 }
5569
5570 map = em->map_lookup;
5571 item_size = btrfs_chunk_item_size(map->num_stripes);
5572
5573 chunk = kzalloc(item_size, GFP_NOFS);
5574 if (!chunk) {
5575 ret = -ENOMEM;
5576 btrfs_abort_transaction(trans, ret);
5577 goto out;
5578 }
5579
5580 for (i = 0; i < map->num_stripes; i++) {
5581 struct btrfs_device *device = map->stripes[i].dev;
5582
5583 ret = btrfs_update_device(trans, device);
5584 if (ret)
5585 goto out;
5586 }
5587
5588 stripe = &chunk->stripe;
5589 for (i = 0; i < map->num_stripes; i++) {
5590 struct btrfs_device *device = map->stripes[i].dev;
5591 const u64 dev_offset = map->stripes[i].physical;
5592
5593 btrfs_set_stack_stripe_devid(stripe, device->devid);
5594 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5595 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5596 stripe++;
5597 }
5598
5599 btrfs_set_stack_chunk_length(chunk, bg->length);
5600 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5601 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5602 btrfs_set_stack_chunk_type(chunk, map->type);
5603 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5604 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5605 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5606 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5607 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5608
5609 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5610 key.type = BTRFS_CHUNK_ITEM_KEY;
5611 key.offset = bg->start;
5612
5613 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5614 if (ret)
5615 goto out;
5616
5617 bg->chunk_item_inserted = 1;
5618
5619 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5620 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5621 if (ret)
5622 goto out;
5623 }
5624
5625out:
5626 kfree(chunk);
5627 free_extent_map(em);
5628 return ret;
5629}
5630
5631static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5632{
5633 struct btrfs_fs_info *fs_info = trans->fs_info;
5634 u64 alloc_profile;
5635 struct btrfs_block_group *meta_bg;
5636 struct btrfs_block_group *sys_bg;
5637
5638
5639
5640
5641
5642
5643
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656
5657
5658
5659 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5660 meta_bg = btrfs_alloc_chunk(trans, alloc_profile);
5661 if (IS_ERR(meta_bg))
5662 return PTR_ERR(meta_bg);
5663
5664 alloc_profile = btrfs_system_alloc_profile(fs_info);
5665 sys_bg = btrfs_alloc_chunk(trans, alloc_profile);
5666 if (IS_ERR(sys_bg))
5667 return PTR_ERR(sys_bg);
5668
5669 return 0;
5670}
5671
5672static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5673{
5674 const int index = btrfs_bg_flags_to_raid_index(map->type);
5675
5676 return btrfs_raid_array[index].tolerated_failures;
5677}
5678
5679int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5680{
5681 struct extent_map *em;
5682 struct map_lookup *map;
5683 int readonly = 0;
5684 int miss_ndevs = 0;
5685 int i;
5686
5687 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
5688 if (IS_ERR(em))
5689 return 1;
5690
5691 map = em->map_lookup;
5692 for (i = 0; i < map->num_stripes; i++) {
5693 if (test_bit(BTRFS_DEV_STATE_MISSING,
5694 &map->stripes[i].dev->dev_state)) {
5695 miss_ndevs++;
5696 continue;
5697 }
5698 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5699 &map->stripes[i].dev->dev_state)) {
5700 readonly = 1;
5701 goto end;
5702 }
5703 }
5704
5705
5706
5707
5708
5709
5710 if (miss_ndevs > btrfs_chunk_max_errors(map))
5711 readonly = 1;
5712end:
5713 free_extent_map(em);
5714 return readonly;
5715}
5716
5717void btrfs_mapping_tree_free(struct extent_map_tree *tree)
5718{
5719 struct extent_map *em;
5720
5721 while (1) {
5722 write_lock(&tree->lock);
5723 em = lookup_extent_mapping(tree, 0, (u64)-1);
5724 if (em)
5725 remove_extent_mapping(tree, em);
5726 write_unlock(&tree->lock);
5727 if (!em)
5728 break;
5729
5730 free_extent_map(em);
5731
5732 free_extent_map(em);
5733 }
5734}
5735
5736int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5737{
5738 struct extent_map *em;
5739 struct map_lookup *map;
5740 int ret;
5741
5742 em = btrfs_get_chunk_map(fs_info, logical, len);
5743 if (IS_ERR(em))
5744
5745
5746
5747
5748
5749
5750 return 1;
5751
5752 map = em->map_lookup;
5753 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
5754 ret = map->num_stripes;
5755 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5756 ret = map->sub_stripes;
5757 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5758 ret = 2;
5759 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5760
5761
5762
5763
5764
5765
5766
5767 ret = map->num_stripes;
5768 else
5769 ret = 1;
5770 free_extent_map(em);
5771
5772 down_read(&fs_info->dev_replace.rwsem);
5773 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5774 fs_info->dev_replace.tgtdev)
5775 ret++;
5776 up_read(&fs_info->dev_replace.rwsem);
5777
5778 return ret;
5779}
5780
5781unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5782 u64 logical)
5783{
5784 struct extent_map *em;
5785 struct map_lookup *map;
5786 unsigned long len = fs_info->sectorsize;
5787
5788 em = btrfs_get_chunk_map(fs_info, logical, len);
5789
5790 if (!WARN_ON(IS_ERR(em))) {
5791 map = em->map_lookup;
5792 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5793 len = map->stripe_len * nr_data_stripes(map);
5794 free_extent_map(em);
5795 }
5796 return len;
5797}
5798
5799int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5800{
5801 struct extent_map *em;
5802 struct map_lookup *map;
5803 int ret = 0;
5804
5805 em = btrfs_get_chunk_map(fs_info, logical, len);
5806
5807 if(!WARN_ON(IS_ERR(em))) {
5808 map = em->map_lookup;
5809 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5810 ret = 1;
5811 free_extent_map(em);
5812 }
5813 return ret;
5814}
5815
5816static int find_live_mirror(struct btrfs_fs_info *fs_info,
5817 struct map_lookup *map, int first,
5818 int dev_replace_is_ongoing)
5819{
5820 int i;
5821 int num_stripes;
5822 int preferred_mirror;
5823 int tolerance;
5824 struct btrfs_device *srcdev;
5825
5826 ASSERT((map->type &
5827 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
5828
5829 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5830 num_stripes = map->sub_stripes;
5831 else
5832 num_stripes = map->num_stripes;
5833
5834 switch (fs_info->fs_devices->read_policy) {
5835 default:
5836
5837 btrfs_warn_rl(fs_info,
5838 "unknown read_policy type %u, reset to pid",
5839 fs_info->fs_devices->read_policy);
5840 fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
5841 fallthrough;
5842 case BTRFS_READ_POLICY_PID:
5843 preferred_mirror = first + (current->pid % num_stripes);
5844 break;
5845 }
5846
5847 if (dev_replace_is_ongoing &&
5848 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5849 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5850 srcdev = fs_info->dev_replace.srcdev;
5851 else
5852 srcdev = NULL;
5853
5854
5855
5856
5857
5858
5859 for (tolerance = 0; tolerance < 2; tolerance++) {
5860 if (map->stripes[preferred_mirror].dev->bdev &&
5861 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5862 return preferred_mirror;
5863 for (i = first; i < first + num_stripes; i++) {
5864 if (map->stripes[i].dev->bdev &&
5865 (tolerance || map->stripes[i].dev != srcdev))
5866 return i;
5867 }
5868 }
5869
5870
5871
5872
5873 return preferred_mirror;
5874}
5875
5876
5877static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5878{
5879 int i;
5880 int again = 1;
5881
5882 while (again) {
5883 again = 0;
5884 for (i = 0; i < num_stripes - 1; i++) {
5885
5886 if (bbio->raid_map[i] > bbio->raid_map[i + 1]) {
5887 swap(bbio->stripes[i], bbio->stripes[i + 1]);
5888 swap(bbio->raid_map[i], bbio->raid_map[i + 1]);
5889 again = 1;
5890 }
5891 }
5892 }
5893}
5894
5895static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5896{
5897 struct btrfs_bio *bbio = kzalloc(
5898
5899 sizeof(struct btrfs_bio) +
5900
5901 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5902
5903 sizeof(int) * (real_stripes) +
5904
5905
5906
5907
5908 sizeof(u64) * (total_stripes),
5909 GFP_NOFS|__GFP_NOFAIL);
5910
5911 atomic_set(&bbio->error, 0);
5912 refcount_set(&bbio->refs, 1);
5913
5914 bbio->tgtdev_map = (int *)(bbio->stripes + total_stripes);
5915 bbio->raid_map = (u64 *)(bbio->tgtdev_map + real_stripes);
5916
5917 return bbio;
5918}
5919
5920void btrfs_get_bbio(struct btrfs_bio *bbio)
5921{
5922 WARN_ON(!refcount_read(&bbio->refs));
5923 refcount_inc(&bbio->refs);
5924}
5925
5926void btrfs_put_bbio(struct btrfs_bio *bbio)
5927{
5928 if (!bbio)
5929 return;
5930 if (refcount_dec_and_test(&bbio->refs))
5931 kfree(bbio);
5932}
5933
5934
5935
5936
5937
5938
5939static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5940 u64 logical, u64 *length_ret,
5941 struct btrfs_bio **bbio_ret)
5942{
5943 struct extent_map *em;
5944 struct map_lookup *map;
5945 struct btrfs_bio *bbio;
5946 u64 length = *length_ret;
5947 u64 offset;
5948 u64 stripe_nr;
5949 u64 stripe_nr_end;
5950 u64 stripe_end_offset;
5951 u64 stripe_cnt;
5952 u64 stripe_len;
5953 u64 stripe_offset;
5954 u64 num_stripes;
5955 u32 stripe_index;
5956 u32 factor = 0;
5957 u32 sub_stripes = 0;
5958 u64 stripes_per_dev = 0;
5959 u32 remaining_stripes = 0;
5960 u32 last_stripe = 0;
5961 int ret = 0;
5962 int i;
5963
5964
5965 ASSERT(bbio_ret);
5966
5967 em = btrfs_get_chunk_map(fs_info, logical, length);
5968 if (IS_ERR(em))
5969 return PTR_ERR(em);
5970
5971 map = em->map_lookup;
5972
5973 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5974 ret = -EOPNOTSUPP;
5975 goto out;
5976 }
5977
5978 offset = logical - em->start;
5979 length = min_t(u64, em->start + em->len - logical, length);
5980 *length_ret = length;
5981
5982 stripe_len = map->stripe_len;
5983
5984
5985
5986
5987 stripe_nr = div64_u64(offset, stripe_len);
5988
5989
5990 stripe_offset = offset - stripe_nr * stripe_len;
5991
5992 stripe_nr_end = round_up(offset + length, map->stripe_len);
5993 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5994 stripe_cnt = stripe_nr_end - stripe_nr;
5995 stripe_end_offset = stripe_nr_end * map->stripe_len -
5996 (offset + length);
5997
5998
5999
6000
6001
6002 num_stripes = 1;
6003 stripe_index = 0;
6004 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
6005 BTRFS_BLOCK_GROUP_RAID10)) {
6006 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
6007 sub_stripes = 1;
6008 else
6009 sub_stripes = map->sub_stripes;
6010
6011 factor = map->num_stripes / sub_stripes;
6012 num_stripes = min_t(u64, map->num_stripes,
6013 sub_stripes * stripe_cnt);
6014 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6015 stripe_index *= sub_stripes;
6016 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
6017 &remaining_stripes);
6018 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
6019 last_stripe *= sub_stripes;
6020 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
6021 BTRFS_BLOCK_GROUP_DUP)) {
6022 num_stripes = map->num_stripes;
6023 } else {
6024 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6025 &stripe_index);
6026 }
6027
6028 bbio = alloc_btrfs_bio(num_stripes, 0);
6029 if (!bbio) {
6030 ret = -ENOMEM;
6031 goto out;
6032 }
6033
6034 for (i = 0; i < num_stripes; i++) {
6035 bbio->stripes[i].physical =
6036 map->stripes[stripe_index].physical +
6037 stripe_offset + stripe_nr * map->stripe_len;
6038 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
6039
6040 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
6041 BTRFS_BLOCK_GROUP_RAID10)) {
6042 bbio->stripes[i].length = stripes_per_dev *
6043 map->stripe_len;
6044
6045 if (i / sub_stripes < remaining_stripes)
6046 bbio->stripes[i].length +=
6047 map->stripe_len;
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057 if (i < sub_stripes)
6058 bbio->stripes[i].length -=
6059 stripe_offset;
6060
6061 if (stripe_index >= last_stripe &&
6062 stripe_index <= (last_stripe +
6063 sub_stripes - 1))
6064 bbio->stripes[i].length -=
6065 stripe_end_offset;
6066
6067 if (i == sub_stripes - 1)
6068 stripe_offset = 0;
6069 } else {
6070 bbio->stripes[i].length = length;
6071 }
6072
6073 stripe_index++;
6074 if (stripe_index == map->num_stripes) {
6075 stripe_index = 0;
6076 stripe_nr++;
6077 }
6078 }
6079
6080 *bbio_ret = bbio;
6081 bbio->map_type = map->type;
6082 bbio->num_stripes = num_stripes;
6083out:
6084 free_extent_map(em);
6085 return ret;
6086}
6087
6088
6089
6090
6091
6092
6093
6094
6095
6096
6097
6098
6099
6100
6101static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
6102 u64 logical, u64 length,
6103 u64 srcdev_devid, int *mirror_num,
6104 u64 *physical)
6105{
6106 struct btrfs_bio *bbio = NULL;
6107 int num_stripes;
6108 int index_srcdev = 0;
6109 int found = 0;
6110 u64 physical_of_found = 0;
6111 int i;
6112 int ret = 0;
6113
6114 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
6115 logical, &length, &bbio, 0, 0);
6116 if (ret) {
6117 ASSERT(bbio == NULL);
6118 return ret;
6119 }
6120
6121 num_stripes = bbio->num_stripes;
6122 if (*mirror_num > num_stripes) {
6123
6124
6125
6126
6127
6128 btrfs_put_bbio(bbio);
6129 return -EIO;
6130 }
6131
6132
6133
6134
6135
6136
6137 for (i = 0; i < num_stripes; i++) {
6138 if (bbio->stripes[i].dev->devid != srcdev_devid)
6139 continue;
6140
6141
6142
6143
6144
6145 if (found &&
6146 physical_of_found <= bbio->stripes[i].physical)
6147 continue;
6148
6149 index_srcdev = i;
6150 found = 1;
6151 physical_of_found = bbio->stripes[i].physical;
6152 }
6153
6154 btrfs_put_bbio(bbio);
6155
6156 ASSERT(found);
6157 if (!found)
6158 return -EIO;
6159
6160 *mirror_num = index_srcdev + 1;
6161 *physical = physical_of_found;
6162 return ret;
6163}
6164
6165static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
6166{
6167 struct btrfs_block_group *cache;
6168 bool ret;
6169
6170
6171 if (!btrfs_is_zoned(fs_info))
6172 return false;
6173
6174 cache = btrfs_lookup_block_group(fs_info, logical);
6175
6176 spin_lock(&cache->lock);
6177 ret = cache->to_copy;
6178 spin_unlock(&cache->lock);
6179
6180 btrfs_put_block_group(cache);
6181 return ret;
6182}
6183
6184static void handle_ops_on_dev_replace(enum btrfs_map_op op,
6185 struct btrfs_bio **bbio_ret,
6186 struct btrfs_dev_replace *dev_replace,
6187 u64 logical,
6188 int *num_stripes_ret, int *max_errors_ret)
6189{
6190 struct btrfs_bio *bbio = *bbio_ret;
6191 u64 srcdev_devid = dev_replace->srcdev->devid;
6192 int tgtdev_indexes = 0;
6193 int num_stripes = *num_stripes_ret;
6194 int max_errors = *max_errors_ret;
6195 int i;
6196
6197 if (op == BTRFS_MAP_WRITE) {
6198 int index_where_to_add;
6199
6200
6201
6202
6203
6204 if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical))
6205 return;
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218 index_where_to_add = num_stripes;
6219 for (i = 0; i < num_stripes; i++) {
6220 if (bbio->stripes[i].dev->devid == srcdev_devid) {
6221
6222 struct btrfs_bio_stripe *new =
6223 bbio->stripes + index_where_to_add;
6224 struct btrfs_bio_stripe *old =
6225 bbio->stripes + i;
6226
6227 new->physical = old->physical;
6228 new->length = old->length;
6229 new->dev = dev_replace->tgtdev;
6230 bbio->tgtdev_map[i] = index_where_to_add;
6231 index_where_to_add++;
6232 max_errors++;
6233 tgtdev_indexes++;
6234 }
6235 }
6236 num_stripes = index_where_to_add;
6237 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
6238 int index_srcdev = 0;
6239 int found = 0;
6240 u64 physical_of_found = 0;
6241
6242
6243
6244
6245
6246
6247
6248
6249 for (i = 0; i < num_stripes; i++) {
6250 if (bbio->stripes[i].dev->devid == srcdev_devid) {
6251
6252
6253
6254
6255
6256 if (found &&
6257 physical_of_found <=
6258 bbio->stripes[i].physical)
6259 continue;
6260 index_srcdev = i;
6261 found = 1;
6262 physical_of_found = bbio->stripes[i].physical;
6263 }
6264 }
6265 if (found) {
6266 struct btrfs_bio_stripe *tgtdev_stripe =
6267 bbio->stripes + num_stripes;
6268
6269 tgtdev_stripe->physical = physical_of_found;
6270 tgtdev_stripe->length =
6271 bbio->stripes[index_srcdev].length;
6272 tgtdev_stripe->dev = dev_replace->tgtdev;
6273 bbio->tgtdev_map[index_srcdev] = num_stripes;
6274
6275 tgtdev_indexes++;
6276 num_stripes++;
6277 }
6278 }
6279
6280 *num_stripes_ret = num_stripes;
6281 *max_errors_ret = max_errors;
6282 bbio->num_tgtdevs = tgtdev_indexes;
6283 *bbio_ret = bbio;
6284}
6285
6286static bool need_full_stripe(enum btrfs_map_op op)
6287{
6288 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
6289}
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300
6301
6302
6303
6304
6305int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
6306 enum btrfs_map_op op, u64 logical,
6307 struct btrfs_io_geometry *io_geom)
6308{
6309 struct map_lookup *map;
6310 u64 len;
6311 u64 offset;
6312 u64 stripe_offset;
6313 u64 stripe_nr;
6314 u64 stripe_len;
6315 u64 raid56_full_stripe_start = (u64)-1;
6316 int data_stripes;
6317
6318 ASSERT(op != BTRFS_MAP_DISCARD);
6319
6320 map = em->map_lookup;
6321
6322 offset = logical - em->start;
6323
6324 stripe_len = map->stripe_len;
6325
6326 stripe_nr = div64_u64(offset, stripe_len);
6327
6328 stripe_offset = stripe_nr * stripe_len;
6329 if (offset < stripe_offset) {
6330 btrfs_crit(fs_info,
6331"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
6332 stripe_offset, offset, em->start, logical, stripe_len);
6333 return -EINVAL;
6334 }
6335
6336
6337 stripe_offset = offset - stripe_offset;
6338 data_stripes = nr_data_stripes(map);
6339
6340 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6341 u64 max_len = stripe_len - stripe_offset;
6342
6343
6344
6345
6346 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6347 unsigned long full_stripe_len = stripe_len * data_stripes;
6348 raid56_full_stripe_start = offset;
6349
6350
6351
6352
6353
6354 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
6355 full_stripe_len);
6356 raid56_full_stripe_start *= full_stripe_len;
6357
6358
6359
6360
6361
6362
6363 if (op == BTRFS_MAP_WRITE) {
6364 max_len = stripe_len * data_stripes -
6365 (offset - raid56_full_stripe_start);
6366 }
6367 }
6368 len = min_t(u64, em->len - offset, max_len);
6369 } else {
6370 len = em->len - offset;
6371 }
6372
6373 io_geom->len = len;
6374 io_geom->offset = offset;
6375 io_geom->stripe_len = stripe_len;
6376 io_geom->stripe_nr = stripe_nr;
6377 io_geom->stripe_offset = stripe_offset;
6378 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
6379
6380 return 0;
6381}
6382
6383static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
6384 enum btrfs_map_op op,
6385 u64 logical, u64 *length,
6386 struct btrfs_bio **bbio_ret,
6387 int mirror_num, int need_raid_map)
6388{
6389 struct extent_map *em;
6390 struct map_lookup *map;
6391 u64 stripe_offset;
6392 u64 stripe_nr;
6393 u64 stripe_len;
6394 u32 stripe_index;
6395 int data_stripes;
6396 int i;
6397 int ret = 0;
6398 int num_stripes;
6399 int max_errors = 0;
6400 int tgtdev_indexes = 0;
6401 struct btrfs_bio *bbio = NULL;
6402 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
6403 int dev_replace_is_ongoing = 0;
6404 int num_alloc_stripes;
6405 int patch_the_first_stripe_for_dev_replace = 0;
6406 u64 physical_to_patch_in_first_stripe = 0;
6407 u64 raid56_full_stripe_start = (u64)-1;
6408 struct btrfs_io_geometry geom;
6409
6410 ASSERT(bbio_ret);
6411 ASSERT(op != BTRFS_MAP_DISCARD);
6412
6413 em = btrfs_get_chunk_map(fs_info, logical, *length);
6414 ASSERT(!IS_ERR(em));
6415
6416 ret = btrfs_get_io_geometry(fs_info, em, op, logical, &geom);
6417 if (ret < 0)
6418 return ret;
6419
6420 map = em->map_lookup;
6421
6422 *length = geom.len;
6423 stripe_len = geom.stripe_len;
6424 stripe_nr = geom.stripe_nr;
6425 stripe_offset = geom.stripe_offset;
6426 raid56_full_stripe_start = geom.raid56_stripe_offset;
6427 data_stripes = nr_data_stripes(map);
6428
6429 down_read(&dev_replace->rwsem);
6430 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
6431
6432
6433
6434
6435 if (!dev_replace_is_ongoing)
6436 up_read(&dev_replace->rwsem);
6437
6438 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
6439 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
6440 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
6441 dev_replace->srcdev->devid,
6442 &mirror_num,
6443 &physical_to_patch_in_first_stripe);
6444 if (ret)
6445 goto out;
6446 else
6447 patch_the_first_stripe_for_dev_replace = 1;
6448 } else if (mirror_num > map->num_stripes) {
6449 mirror_num = 0;
6450 }
6451
6452 num_stripes = 1;
6453 stripe_index = 0;
6454 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6455 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6456 &stripe_index);
6457 if (!need_full_stripe(op))
6458 mirror_num = 1;
6459 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
6460 if (need_full_stripe(op))
6461 num_stripes = map->num_stripes;
6462 else if (mirror_num)
6463 stripe_index = mirror_num - 1;
6464 else {
6465 stripe_index = find_live_mirror(fs_info, map, 0,
6466 dev_replace_is_ongoing);
6467 mirror_num = stripe_index + 1;
6468 }
6469
6470 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
6471 if (need_full_stripe(op)) {
6472 num_stripes = map->num_stripes;
6473 } else if (mirror_num) {
6474 stripe_index = mirror_num - 1;
6475 } else {
6476 mirror_num = 1;
6477 }
6478
6479 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6480 u32 factor = map->num_stripes / map->sub_stripes;
6481
6482 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6483 stripe_index *= map->sub_stripes;
6484
6485 if (need_full_stripe(op))
6486 num_stripes = map->sub_stripes;
6487 else if (mirror_num)
6488 stripe_index += mirror_num - 1;
6489 else {
6490 int old_stripe_index = stripe_index;
6491 stripe_index = find_live_mirror(fs_info, map,
6492 stripe_index,
6493 dev_replace_is_ongoing);
6494 mirror_num = stripe_index - old_stripe_index + 1;
6495 }
6496
6497 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6498 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
6499
6500 stripe_nr = div64_u64(raid56_full_stripe_start,
6501 stripe_len * data_stripes);
6502
6503
6504 num_stripes = map->num_stripes;
6505 max_errors = nr_parity_stripes(map);
6506
6507 *length = map->stripe_len;
6508 stripe_index = 0;
6509 stripe_offset = 0;
6510 } else {
6511
6512
6513
6514
6515
6516 stripe_nr = div_u64_rem(stripe_nr,
6517 data_stripes, &stripe_index);
6518 if (mirror_num > 1)
6519 stripe_index = data_stripes + mirror_num - 2;
6520
6521
6522 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
6523 &stripe_index);
6524 if (!need_full_stripe(op) && mirror_num <= 1)
6525 mirror_num = 1;
6526 }
6527 } else {
6528
6529
6530
6531
6532
6533 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6534 &stripe_index);
6535 mirror_num = stripe_index + 1;
6536 }
6537 if (stripe_index >= map->num_stripes) {
6538 btrfs_crit(fs_info,
6539 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
6540 stripe_index, map->num_stripes);
6541 ret = -EINVAL;
6542 goto out;
6543 }
6544
6545 num_alloc_stripes = num_stripes;
6546 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
6547 if (op == BTRFS_MAP_WRITE)
6548 num_alloc_stripes <<= 1;
6549 if (op == BTRFS_MAP_GET_READ_MIRRORS)
6550 num_alloc_stripes++;
6551 tgtdev_indexes = num_stripes;
6552 }
6553
6554 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
6555 if (!bbio) {
6556 ret = -ENOMEM;
6557 goto out;
6558 }
6559
6560 for (i = 0; i < num_stripes; i++) {
6561 bbio->stripes[i].physical = map->stripes[stripe_index].physical +
6562 stripe_offset + stripe_nr * map->stripe_len;
6563 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
6564 stripe_index++;
6565 }
6566
6567
6568 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
6569 (need_full_stripe(op) || mirror_num > 1)) {
6570 u64 tmp;
6571 unsigned rot;
6572
6573
6574 div_u64_rem(stripe_nr, num_stripes, &rot);
6575
6576
6577 tmp = stripe_nr * data_stripes;
6578 for (i = 0; i < data_stripes; i++)
6579 bbio->raid_map[(i+rot) % num_stripes] =
6580 em->start + (tmp + i) * map->stripe_len;
6581
6582 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
6583 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
6584 bbio->raid_map[(i+rot+1) % num_stripes] =
6585 RAID6_Q_STRIPE;
6586
6587 sort_parity_stripes(bbio, num_stripes);
6588 }
6589
6590 if (need_full_stripe(op))
6591 max_errors = btrfs_chunk_max_errors(map);
6592
6593 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
6594 need_full_stripe(op)) {
6595 handle_ops_on_dev_replace(op, &bbio, dev_replace, logical,
6596 &num_stripes, &max_errors);
6597 }
6598
6599 *bbio_ret = bbio;
6600 bbio->map_type = map->type;
6601 bbio->num_stripes = num_stripes;
6602 bbio->max_errors = max_errors;
6603 bbio->mirror_num = mirror_num;
6604
6605
6606
6607
6608
6609
6610 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6611 WARN_ON(num_stripes > 1);
6612 bbio->stripes[0].dev = dev_replace->tgtdev;
6613 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6614 bbio->mirror_num = map->num_stripes + 1;
6615 }
6616out:
6617 if (dev_replace_is_ongoing) {
6618 lockdep_assert_held(&dev_replace->rwsem);
6619
6620 up_read(&dev_replace->rwsem);
6621 }
6622 free_extent_map(em);
6623 return ret;
6624}
6625
6626int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6627 u64 logical, u64 *length,
6628 struct btrfs_bio **bbio_ret, int mirror_num)
6629{
6630 if (op == BTRFS_MAP_DISCARD)
6631 return __btrfs_map_block_for_discard(fs_info, logical,
6632 length, bbio_ret);
6633
6634 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6635 mirror_num, 0);
6636}
6637
6638
6639int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6640 u64 logical, u64 *length,
6641 struct btrfs_bio **bbio_ret)
6642{
6643 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6644}
6645
6646static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6647{
6648 bio->bi_private = bbio->private;
6649 bio->bi_end_io = bbio->end_io;
6650 bio_endio(bio);
6651
6652 btrfs_put_bbio(bbio);
6653}
6654
6655static void btrfs_end_bio(struct bio *bio)
6656{
6657 struct btrfs_bio *bbio = bio->bi_private;
6658 int is_orig_bio = 0;
6659
6660 if (bio->bi_status) {
6661 atomic_inc(&bbio->error);
6662 if (bio->bi_status == BLK_STS_IOERR ||
6663 bio->bi_status == BLK_STS_TARGET) {
6664 struct btrfs_device *dev = btrfs_io_bio(bio)->device;
6665
6666 ASSERT(dev->bdev);
6667 if (btrfs_op(bio) == BTRFS_MAP_WRITE)
6668 btrfs_dev_stat_inc_and_print(dev,
6669 BTRFS_DEV_STAT_WRITE_ERRS);
6670 else if (!(bio->bi_opf & REQ_RAHEAD))
6671 btrfs_dev_stat_inc_and_print(dev,
6672 BTRFS_DEV_STAT_READ_ERRS);
6673 if (bio->bi_opf & REQ_PREFLUSH)
6674 btrfs_dev_stat_inc_and_print(dev,
6675 BTRFS_DEV_STAT_FLUSH_ERRS);
6676 }
6677 }
6678
6679 if (bio == bbio->orig_bio)
6680 is_orig_bio = 1;
6681
6682 btrfs_bio_counter_dec(bbio->fs_info);
6683
6684 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6685 if (!is_orig_bio) {
6686 bio_put(bio);
6687 bio = bbio->orig_bio;
6688 }
6689
6690 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6691
6692
6693
6694 if (atomic_read(&bbio->error) > bbio->max_errors) {
6695 bio->bi_status = BLK_STS_IOERR;
6696 } else {
6697
6698
6699
6700
6701 bio->bi_status = BLK_STS_OK;
6702 }
6703
6704 btrfs_end_bbio(bbio, bio);
6705 } else if (!is_orig_bio) {
6706 bio_put(bio);
6707 }
6708}
6709
6710static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6711 u64 physical, struct btrfs_device *dev)
6712{
6713 struct btrfs_fs_info *fs_info = bbio->fs_info;
6714
6715 bio->bi_private = bbio;
6716 btrfs_io_bio(bio)->device = dev;
6717 bio->bi_end_io = btrfs_end_bio;
6718 bio->bi_iter.bi_sector = physical >> 9;
6719
6720
6721
6722
6723 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
6724 if (btrfs_dev_is_sequential(dev, physical)) {
6725 u64 zone_start = round_down(physical, fs_info->zone_size);
6726
6727 bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
6728 } else {
6729 bio->bi_opf &= ~REQ_OP_ZONE_APPEND;
6730 bio->bi_opf |= REQ_OP_WRITE;
6731 }
6732 }
6733 btrfs_debug_in_rcu(fs_info,
6734 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6735 bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
6736 (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
6737 dev->devid, bio->bi_iter.bi_size);
6738 bio_set_dev(bio, dev->bdev);
6739
6740 btrfs_bio_counter_inc_noblocked(fs_info);
6741
6742 btrfsic_submit_bio(bio);
6743}
6744
6745static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6746{
6747 atomic_inc(&bbio->error);
6748 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6749
6750 WARN_ON(bio != bbio->orig_bio);
6751
6752 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6753 bio->bi_iter.bi_sector = logical >> 9;
6754 if (atomic_read(&bbio->error) > bbio->max_errors)
6755 bio->bi_status = BLK_STS_IOERR;
6756 else
6757 bio->bi_status = BLK_STS_OK;
6758 btrfs_end_bbio(bbio, bio);
6759 }
6760}
6761
6762blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6763 int mirror_num)
6764{
6765 struct btrfs_device *dev;
6766 struct bio *first_bio = bio;
6767 u64 logical = bio->bi_iter.bi_sector << 9;
6768 u64 length = 0;
6769 u64 map_length;
6770 int ret;
6771 int dev_nr;
6772 int total_devs;
6773 struct btrfs_bio *bbio = NULL;
6774
6775 length = bio->bi_iter.bi_size;
6776 map_length = length;
6777
6778 btrfs_bio_counter_inc_blocked(fs_info);
6779 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6780 &map_length, &bbio, mirror_num, 1);
6781 if (ret) {
6782 btrfs_bio_counter_dec(fs_info);
6783 return errno_to_blk_status(ret);
6784 }
6785
6786 total_devs = bbio->num_stripes;
6787 bbio->orig_bio = first_bio;
6788 bbio->private = first_bio->bi_private;
6789 bbio->end_io = first_bio->bi_end_io;
6790 bbio->fs_info = fs_info;
6791 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6792
6793 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6794 ((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
6795
6796
6797 if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
6798 ret = raid56_parity_write(fs_info, bio, bbio,
6799 map_length);
6800 } else {
6801 ret = raid56_parity_recover(fs_info, bio, bbio,
6802 map_length, mirror_num, 1);
6803 }
6804
6805 btrfs_bio_counter_dec(fs_info);
6806 return errno_to_blk_status(ret);
6807 }
6808
6809 if (map_length < length) {
6810 btrfs_crit(fs_info,
6811 "mapping failed logical %llu bio len %llu len %llu",
6812 logical, length, map_length);
6813 BUG();
6814 }
6815
6816 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6817 dev = bbio->stripes[dev_nr].dev;
6818 if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
6819 &dev->dev_state) ||
6820 (btrfs_op(first_bio) == BTRFS_MAP_WRITE &&
6821 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6822 bbio_error(bbio, first_bio, logical);
6823 continue;
6824 }
6825
6826 if (dev_nr < total_devs - 1)
6827 bio = btrfs_bio_clone(first_bio);
6828 else
6829 bio = first_bio;
6830
6831 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical, dev);
6832 }
6833 btrfs_bio_counter_dec(fs_info);
6834 return BLK_STS_OK;
6835}
6836
6837
6838
6839
6840
6841
6842
6843
6844struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
6845 u64 devid, u8 *uuid, u8 *fsid)
6846{
6847 struct btrfs_device *device;
6848 struct btrfs_fs_devices *seed_devs;
6849
6850 if (!fsid || !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6851 list_for_each_entry(device, &fs_devices->devices, dev_list) {
6852 if (device->devid == devid &&
6853 (!uuid || memcmp(device->uuid, uuid,
6854 BTRFS_UUID_SIZE) == 0))
6855 return device;
6856 }
6857 }
6858
6859 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
6860 if (!fsid ||
6861 !memcmp(seed_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6862 list_for_each_entry(device, &seed_devs->devices,
6863 dev_list) {
6864 if (device->devid == devid &&
6865 (!uuid || memcmp(device->uuid, uuid,
6866 BTRFS_UUID_SIZE) == 0))
6867 return device;
6868 }
6869 }
6870 }
6871
6872 return NULL;
6873}
6874
6875static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6876 u64 devid, u8 *dev_uuid)
6877{
6878 struct btrfs_device *device;
6879 unsigned int nofs_flag;
6880
6881
6882
6883
6884
6885
6886
6887 nofs_flag = memalloc_nofs_save();
6888 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6889 memalloc_nofs_restore(nofs_flag);
6890 if (IS_ERR(device))
6891 return device;
6892
6893 list_add(&device->dev_list, &fs_devices->devices);
6894 device->fs_devices = fs_devices;
6895 fs_devices->num_devices++;
6896
6897 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6898 fs_devices->missing_devices++;
6899
6900 return device;
6901}
6902
6903
6904
6905
6906
6907
6908
6909
6910
6911
6912
6913
6914
6915
6916struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6917 const u64 *devid,
6918 const u8 *uuid)
6919{
6920 struct btrfs_device *dev;
6921 u64 tmp;
6922
6923 if (WARN_ON(!devid && !fs_info))
6924 return ERR_PTR(-EINVAL);
6925
6926 dev = __alloc_device(fs_info);
6927 if (IS_ERR(dev))
6928 return dev;
6929
6930 if (devid)
6931 tmp = *devid;
6932 else {
6933 int ret;
6934
6935 ret = find_next_devid(fs_info, &tmp);
6936 if (ret) {
6937 btrfs_free_device(dev);
6938 return ERR_PTR(ret);
6939 }
6940 }
6941 dev->devid = tmp;
6942
6943 if (uuid)
6944 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6945 else
6946 generate_random_uuid(dev->uuid);
6947
6948 return dev;
6949}
6950
6951static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6952 u64 devid, u8 *uuid, bool error)
6953{
6954 if (error)
6955 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6956 devid, uuid);
6957 else
6958 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6959 devid, uuid);
6960}
6961
6962static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6963{
6964 int index = btrfs_bg_flags_to_raid_index(type);
6965 int ncopies = btrfs_raid_array[index].ncopies;
6966 const int nparity = btrfs_raid_array[index].nparity;
6967 int data_stripes;
6968
6969 if (nparity)
6970 data_stripes = num_stripes - nparity;
6971 else
6972 data_stripes = num_stripes / ncopies;
6973
6974 return div_u64(chunk_len, data_stripes);
6975}
6976
6977#if BITS_PER_LONG == 32
6978
6979
6980
6981
6982
6983
6984
6985static int check_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
6986 u64 logical, u64 length, u64 type)
6987{
6988 if (!(type & BTRFS_BLOCK_GROUP_METADATA))
6989 return 0;
6990
6991 if (logical + length < MAX_LFS_FILESIZE)
6992 return 0;
6993
6994 btrfs_err_32bit_limit(fs_info);
6995 return -EOVERFLOW;
6996}
6997
6998
6999
7000
7001
7002
7003
7004static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
7005 u64 logical, u64 length, u64 type)
7006{
7007 if (!(type & BTRFS_BLOCK_GROUP_METADATA))
7008 return;
7009
7010 if (logical + length < BTRFS_32BIT_EARLY_WARN_THRESHOLD)
7011 return;
7012
7013 btrfs_warn_32bit_limit(fs_info);
7014}
7015#endif
7016
7017static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
7018 struct btrfs_chunk *chunk)
7019{
7020 struct btrfs_fs_info *fs_info = leaf->fs_info;
7021 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
7022 struct map_lookup *map;
7023 struct extent_map *em;
7024 u64 logical;
7025 u64 length;
7026 u64 devid;
7027 u64 type;
7028 u8 uuid[BTRFS_UUID_SIZE];
7029 int num_stripes;
7030 int ret;
7031 int i;
7032
7033 logical = key->offset;
7034 length = btrfs_chunk_length(leaf, chunk);
7035 type = btrfs_chunk_type(leaf, chunk);
7036 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
7037
7038#if BITS_PER_LONG == 32
7039 ret = check_32bit_meta_chunk(fs_info, logical, length, type);
7040 if (ret < 0)
7041 return ret;
7042 warn_32bit_meta_chunk(fs_info, logical, length, type);
7043#endif
7044
7045
7046
7047
7048
7049 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
7050 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
7051 if (ret)
7052 return ret;
7053 }
7054
7055 read_lock(&map_tree->lock);
7056 em = lookup_extent_mapping(map_tree, logical, 1);
7057 read_unlock(&map_tree->lock);
7058
7059
7060 if (em && em->start <= logical && em->start + em->len > logical) {
7061 free_extent_map(em);
7062 return 0;
7063 } else if (em) {
7064 free_extent_map(em);
7065 }
7066
7067 em = alloc_extent_map();
7068 if (!em)
7069 return -ENOMEM;
7070 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
7071 if (!map) {
7072 free_extent_map(em);
7073 return -ENOMEM;
7074 }
7075
7076 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
7077 em->map_lookup = map;
7078 em->start = logical;
7079 em->len = length;
7080 em->orig_start = 0;
7081 em->block_start = 0;
7082 em->block_len = em->len;
7083
7084 map->num_stripes = num_stripes;
7085 map->io_width = btrfs_chunk_io_width(leaf, chunk);
7086 map->io_align = btrfs_chunk_io_align(leaf, chunk);
7087 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
7088 map->type = type;
7089 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
7090 map->verified_stripes = 0;
7091 em->orig_block_len = calc_stripe_length(type, em->len,
7092 map->num_stripes);
7093 for (i = 0; i < num_stripes; i++) {
7094 map->stripes[i].physical =
7095 btrfs_stripe_offset_nr(leaf, chunk, i);
7096 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
7097 read_extent_buffer(leaf, uuid, (unsigned long)
7098 btrfs_stripe_dev_uuid_nr(chunk, i),
7099 BTRFS_UUID_SIZE);
7100 map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
7101 devid, uuid, NULL);
7102 if (!map->stripes[i].dev &&
7103 !btrfs_test_opt(fs_info, DEGRADED)) {
7104 free_extent_map(em);
7105 btrfs_report_missing_device(fs_info, devid, uuid, true);
7106 return -ENOENT;
7107 }
7108 if (!map->stripes[i].dev) {
7109 map->stripes[i].dev =
7110 add_missing_dev(fs_info->fs_devices, devid,
7111 uuid);
7112 if (IS_ERR(map->stripes[i].dev)) {
7113 free_extent_map(em);
7114 btrfs_err(fs_info,
7115 "failed to init missing dev %llu: %ld",
7116 devid, PTR_ERR(map->stripes[i].dev));
7117 return PTR_ERR(map->stripes[i].dev);
7118 }
7119 btrfs_report_missing_device(fs_info, devid, uuid, false);
7120 }
7121 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
7122 &(map->stripes[i].dev->dev_state));
7123
7124 }
7125
7126 write_lock(&map_tree->lock);
7127 ret = add_extent_mapping(map_tree, em, 0);
7128 write_unlock(&map_tree->lock);
7129 if (ret < 0) {
7130 btrfs_err(fs_info,
7131 "failed to add chunk map, start=%llu len=%llu: %d",
7132 em->start, em->len, ret);
7133 }
7134 free_extent_map(em);
7135
7136 return ret;
7137}
7138
7139static void fill_device_from_item(struct extent_buffer *leaf,
7140 struct btrfs_dev_item *dev_item,
7141 struct btrfs_device *device)
7142{
7143 unsigned long ptr;
7144
7145 device->devid = btrfs_device_id(leaf, dev_item);
7146 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
7147 device->total_bytes = device->disk_total_bytes;
7148 device->commit_total_bytes = device->disk_total_bytes;
7149 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
7150 device->commit_bytes_used = device->bytes_used;
7151 device->type = btrfs_device_type(leaf, dev_item);
7152 device->io_align = btrfs_device_io_align(leaf, dev_item);
7153 device->io_width = btrfs_device_io_width(leaf, dev_item);
7154 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
7155 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
7156 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
7157
7158 ptr = btrfs_device_uuid(dev_item);
7159 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
7160}
7161
7162static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
7163 u8 *fsid)
7164{
7165 struct btrfs_fs_devices *fs_devices;
7166 int ret;
7167
7168 lockdep_assert_held(&uuid_mutex);
7169 ASSERT(fsid);
7170
7171
7172 list_for_each_entry(fs_devices, &fs_info->fs_devices->seed_list, seed_list)
7173 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
7174 return fs_devices;
7175
7176
7177 fs_devices = find_fsid(fsid, NULL);
7178 if (!fs_devices) {
7179 if (!btrfs_test_opt(fs_info, DEGRADED))
7180 return ERR_PTR(-ENOENT);
7181
7182 fs_devices = alloc_fs_devices(fsid, NULL);
7183 if (IS_ERR(fs_devices))
7184 return fs_devices;
7185
7186 fs_devices->seeding = true;
7187 fs_devices->opened = 1;
7188 return fs_devices;
7189 }
7190
7191
7192
7193
7194
7195 fs_devices = clone_fs_devices(fs_devices);
7196 if (IS_ERR(fs_devices))
7197 return fs_devices;
7198
7199 ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
7200 if (ret) {
7201 free_fs_devices(fs_devices);
7202 return ERR_PTR(ret);
7203 }
7204
7205 if (!fs_devices->seeding) {
7206 close_fs_devices(fs_devices);
7207 free_fs_devices(fs_devices);
7208 return ERR_PTR(-EINVAL);
7209 }
7210
7211 list_add(&fs_devices->seed_list, &fs_info->fs_devices->seed_list);
7212
7213 return fs_devices;
7214}
7215
7216static int read_one_dev(struct extent_buffer *leaf,
7217 struct btrfs_dev_item *dev_item)
7218{
7219 struct btrfs_fs_info *fs_info = leaf->fs_info;
7220 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7221 struct btrfs_device *device;
7222 u64 devid;
7223 int ret;
7224 u8 fs_uuid[BTRFS_FSID_SIZE];
7225 u8 dev_uuid[BTRFS_UUID_SIZE];
7226
7227 devid = btrfs_device_id(leaf, dev_item);
7228 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
7229 BTRFS_UUID_SIZE);
7230 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
7231 BTRFS_FSID_SIZE);
7232
7233 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
7234 fs_devices = open_seed_devices(fs_info, fs_uuid);
7235 if (IS_ERR(fs_devices))
7236 return PTR_ERR(fs_devices);
7237 }
7238
7239 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
7240 fs_uuid);
7241 if (!device) {
7242 if (!btrfs_test_opt(fs_info, DEGRADED)) {
7243 btrfs_report_missing_device(fs_info, devid,
7244 dev_uuid, true);
7245 return -ENOENT;
7246 }
7247
7248 device = add_missing_dev(fs_devices, devid, dev_uuid);
7249 if (IS_ERR(device)) {
7250 btrfs_err(fs_info,
7251 "failed to add missing dev %llu: %ld",
7252 devid, PTR_ERR(device));
7253 return PTR_ERR(device);
7254 }
7255 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
7256 } else {
7257 if (!device->bdev) {
7258 if (!btrfs_test_opt(fs_info, DEGRADED)) {
7259 btrfs_report_missing_device(fs_info,
7260 devid, dev_uuid, true);
7261 return -ENOENT;
7262 }
7263 btrfs_report_missing_device(fs_info, devid,
7264 dev_uuid, false);
7265 }
7266
7267 if (!device->bdev &&
7268 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
7269
7270
7271
7272
7273
7274
7275 device->fs_devices->missing_devices++;
7276 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
7277 }
7278
7279
7280 if (device->fs_devices != fs_devices) {
7281 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
7282 &device->dev_state));
7283
7284 list_move(&device->dev_list, &fs_devices->devices);
7285 device->fs_devices->num_devices--;
7286 fs_devices->num_devices++;
7287
7288 device->fs_devices->missing_devices--;
7289 fs_devices->missing_devices++;
7290
7291 device->fs_devices = fs_devices;
7292 }
7293 }
7294
7295 if (device->fs_devices != fs_info->fs_devices) {
7296 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
7297 if (device->generation !=
7298 btrfs_device_generation(leaf, dev_item))
7299 return -EINVAL;
7300 }
7301
7302 fill_device_from_item(leaf, dev_item, device);
7303 if (device->bdev) {
7304 u64 max_total_bytes = i_size_read(device->bdev->bd_inode);
7305
7306 if (device->total_bytes > max_total_bytes) {
7307 btrfs_err(fs_info,
7308 "device total_bytes should be at most %llu but found %llu",
7309 max_total_bytes, device->total_bytes);
7310 return -EINVAL;
7311 }
7312 }
7313 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
7314 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
7315 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
7316 device->fs_devices->total_rw_bytes += device->total_bytes;
7317 atomic64_add(device->total_bytes - device->bytes_used,
7318 &fs_info->free_chunk_space);
7319 }
7320 ret = 0;
7321 return ret;
7322}
7323
7324int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
7325{
7326 struct btrfs_root *root = fs_info->tree_root;
7327 struct btrfs_super_block *super_copy = fs_info->super_copy;
7328 struct extent_buffer *sb;
7329 struct btrfs_disk_key *disk_key;
7330 struct btrfs_chunk *chunk;
7331 u8 *array_ptr;
7332 unsigned long sb_array_offset;
7333 int ret = 0;
7334 u32 num_stripes;
7335 u32 array_size;
7336 u32 len = 0;
7337 u32 cur_offset;
7338 u64 type;
7339 struct btrfs_key key;
7340
7341 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
7342
7343
7344
7345
7346
7347 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
7348 root->root_key.objectid, 0);
7349 if (IS_ERR(sb))
7350 return PTR_ERR(sb);
7351 set_extent_buffer_uptodate(sb);
7352
7353
7354
7355
7356
7357
7358
7359
7360
7361
7362
7363
7364 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
7365 SetPageUptodate(sb->pages[0]);
7366
7367 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
7368 array_size = btrfs_super_sys_array_size(super_copy);
7369
7370 array_ptr = super_copy->sys_chunk_array;
7371 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
7372 cur_offset = 0;
7373
7374 while (cur_offset < array_size) {
7375 disk_key = (struct btrfs_disk_key *)array_ptr;
7376 len = sizeof(*disk_key);
7377 if (cur_offset + len > array_size)
7378 goto out_short_read;
7379
7380 btrfs_disk_key_to_cpu(&key, disk_key);
7381
7382 array_ptr += len;
7383 sb_array_offset += len;
7384 cur_offset += len;
7385
7386 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
7387 btrfs_err(fs_info,
7388 "unexpected item type %u in sys_array at offset %u",
7389 (u32)key.type, cur_offset);
7390 ret = -EIO;
7391 break;
7392 }
7393
7394 chunk = (struct btrfs_chunk *)sb_array_offset;
7395
7396
7397
7398
7399 len = btrfs_chunk_item_size(1);
7400 if (cur_offset + len > array_size)
7401 goto out_short_read;
7402
7403 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
7404 if (!num_stripes) {
7405 btrfs_err(fs_info,
7406 "invalid number of stripes %u in sys_array at offset %u",
7407 num_stripes, cur_offset);
7408 ret = -EIO;
7409 break;
7410 }
7411
7412 type = btrfs_chunk_type(sb, chunk);
7413 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
7414 btrfs_err(fs_info,
7415 "invalid chunk type %llu in sys_array at offset %u",
7416 type, cur_offset);
7417 ret = -EIO;
7418 break;
7419 }
7420
7421 len = btrfs_chunk_item_size(num_stripes);
7422 if (cur_offset + len > array_size)
7423 goto out_short_read;
7424
7425 ret = read_one_chunk(&key, sb, chunk);
7426 if (ret)
7427 break;
7428
7429 array_ptr += len;
7430 sb_array_offset += len;
7431 cur_offset += len;
7432 }
7433 clear_extent_buffer_uptodate(sb);
7434 free_extent_buffer_stale(sb);
7435 return ret;
7436
7437out_short_read:
7438 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
7439 len, cur_offset);
7440 clear_extent_buffer_uptodate(sb);
7441 free_extent_buffer_stale(sb);
7442 return -EIO;
7443}
7444
7445
7446
7447
7448
7449
7450
7451
7452
7453bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
7454 struct btrfs_device *failing_dev)
7455{
7456 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
7457 struct extent_map *em;
7458 u64 next_start = 0;
7459 bool ret = true;
7460
7461 read_lock(&map_tree->lock);
7462 em = lookup_extent_mapping(map_tree, 0, (u64)-1);
7463 read_unlock(&map_tree->lock);
7464
7465 if (!em) {
7466 ret = false;
7467 goto out;
7468 }
7469 while (em) {
7470 struct map_lookup *map;
7471 int missing = 0;
7472 int max_tolerated;
7473 int i;
7474
7475 map = em->map_lookup;
7476 max_tolerated =
7477 btrfs_get_num_tolerated_disk_barrier_failures(
7478 map->type);
7479 for (i = 0; i < map->num_stripes; i++) {
7480 struct btrfs_device *dev = map->stripes[i].dev;
7481
7482 if (!dev || !dev->bdev ||
7483 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
7484 dev->last_flush_error)
7485 missing++;
7486 else if (failing_dev && failing_dev == dev)
7487 missing++;
7488 }
7489 if (missing > max_tolerated) {
7490 if (!failing_dev)
7491 btrfs_warn(fs_info,
7492 "chunk %llu missing %d devices, max tolerance is %d for writable mount",
7493 em->start, missing, max_tolerated);
7494 free_extent_map(em);
7495 ret = false;
7496 goto out;
7497 }
7498 next_start = extent_map_end(em);
7499 free_extent_map(em);
7500
7501 read_lock(&map_tree->lock);
7502 em = lookup_extent_mapping(map_tree, next_start,
7503 (u64)(-1) - next_start);
7504 read_unlock(&map_tree->lock);
7505 }
7506out:
7507 return ret;
7508}
7509
7510static void readahead_tree_node_children(struct extent_buffer *node)
7511{
7512 int i;
7513 const int nr_items = btrfs_header_nritems(node);
7514
7515 for (i = 0; i < nr_items; i++)
7516 btrfs_readahead_node_child(node, i);
7517}
7518
7519int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7520{
7521 struct btrfs_root *root = fs_info->chunk_root;
7522 struct btrfs_path *path;
7523 struct extent_buffer *leaf;
7524 struct btrfs_key key;
7525 struct btrfs_key found_key;
7526 int ret;
7527 int slot;
7528 u64 total_dev = 0;
7529 u64 last_ra_node = 0;
7530
7531 path = btrfs_alloc_path();
7532 if (!path)
7533 return -ENOMEM;
7534
7535
7536
7537
7538
7539 mutex_lock(&uuid_mutex);
7540
7541
7542
7543
7544
7545
7546
7547 fs_info->fs_devices->total_rw_bytes = 0;
7548
7549
7550
7551
7552
7553
7554
7555 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7556 key.offset = 0;
7557 key.type = 0;
7558 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7559 if (ret < 0)
7560 goto error;
7561 while (1) {
7562 struct extent_buffer *node;
7563
7564 leaf = path->nodes[0];
7565 slot = path->slots[0];
7566 if (slot >= btrfs_header_nritems(leaf)) {
7567 ret = btrfs_next_leaf(root, path);
7568 if (ret == 0)
7569 continue;
7570 if (ret < 0)
7571 goto error;
7572 break;
7573 }
7574
7575
7576
7577
7578 node = path->nodes[1];
7579 if (node) {
7580 if (last_ra_node != node->start) {
7581 readahead_tree_node_children(node);
7582 last_ra_node = node->start;
7583 }
7584 }
7585 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7586 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7587 struct btrfs_dev_item *dev_item;
7588 dev_item = btrfs_item_ptr(leaf, slot,
7589 struct btrfs_dev_item);
7590 ret = read_one_dev(leaf, dev_item);
7591 if (ret)
7592 goto error;
7593 total_dev++;
7594 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7595 struct btrfs_chunk *chunk;
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605 ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
7606 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7607 ret = read_one_chunk(&found_key, leaf, chunk);
7608 if (ret)
7609 goto error;
7610 }
7611 path->slots[0]++;
7612 }
7613
7614
7615
7616
7617
7618 if (total_dev != fs_info->fs_devices->total_devices) {
7619 btrfs_err(fs_info,
7620 "super_num_devices %llu mismatch with num_devices %llu found here",
7621 btrfs_super_num_devices(fs_info->super_copy),
7622 total_dev);
7623 ret = -EINVAL;
7624 goto error;
7625 }
7626 if (btrfs_super_total_bytes(fs_info->super_copy) <
7627 fs_info->fs_devices->total_rw_bytes) {
7628 btrfs_err(fs_info,
7629 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7630 btrfs_super_total_bytes(fs_info->super_copy),
7631 fs_info->fs_devices->total_rw_bytes);
7632 ret = -EINVAL;
7633 goto error;
7634 }
7635 ret = 0;
7636error:
7637 mutex_unlock(&uuid_mutex);
7638
7639 btrfs_free_path(path);
7640 return ret;
7641}
7642
7643void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7644{
7645 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
7646 struct btrfs_device *device;
7647
7648 fs_devices->fs_info = fs_info;
7649
7650 mutex_lock(&fs_devices->device_list_mutex);
7651 list_for_each_entry(device, &fs_devices->devices, dev_list)
7652 device->fs_info = fs_info;
7653
7654 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
7655 list_for_each_entry(device, &seed_devs->devices, dev_list)
7656 device->fs_info = fs_info;
7657
7658 seed_devs->fs_info = fs_info;
7659 }
7660 mutex_unlock(&fs_devices->device_list_mutex);
7661}
7662
7663static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
7664 const struct btrfs_dev_stats_item *ptr,
7665 int index)
7666{
7667 u64 val;
7668
7669 read_extent_buffer(eb, &val,
7670 offsetof(struct btrfs_dev_stats_item, values) +
7671 ((unsigned long)ptr) + (index * sizeof(u64)),
7672 sizeof(val));
7673 return val;
7674}
7675
7676static void btrfs_set_dev_stats_value(struct extent_buffer *eb,
7677 struct btrfs_dev_stats_item *ptr,
7678 int index, u64 val)
7679{
7680 write_extent_buffer(eb, &val,
7681 offsetof(struct btrfs_dev_stats_item, values) +
7682 ((unsigned long)ptr) + (index * sizeof(u64)),
7683 sizeof(val));
7684}
7685
7686static int btrfs_device_init_dev_stats(struct btrfs_device *device,
7687 struct btrfs_path *path)
7688{
7689 struct btrfs_dev_stats_item *ptr;
7690 struct extent_buffer *eb;
7691 struct btrfs_key key;
7692 int item_size;
7693 int i, ret, slot;
7694
7695 if (!device->fs_info->dev_root)
7696 return 0;
7697
7698 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7699 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7700 key.offset = device->devid;
7701 ret = btrfs_search_slot(NULL, device->fs_info->dev_root, &key, path, 0, 0);
7702 if (ret) {
7703 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7704 btrfs_dev_stat_set(device, i, 0);
7705 device->dev_stats_valid = 1;
7706 btrfs_release_path(path);
7707 return ret < 0 ? ret : 0;
7708 }
7709 slot = path->slots[0];
7710 eb = path->nodes[0];
7711 item_size = btrfs_item_size_nr(eb, slot);
7712
7713 ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_stats_item);
7714
7715 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7716 if (item_size >= (1 + i) * sizeof(__le64))
7717 btrfs_dev_stat_set(device, i,
7718 btrfs_dev_stats_value(eb, ptr, i));
7719 else
7720 btrfs_dev_stat_set(device, i, 0);
7721 }
7722
7723 device->dev_stats_valid = 1;
7724 btrfs_dev_stat_print_on_load(device);
7725 btrfs_release_path(path);
7726
7727 return 0;
7728}
7729
7730int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7731{
7732 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
7733 struct btrfs_device *device;
7734 struct btrfs_path *path = NULL;
7735 int ret = 0;
7736
7737 path = btrfs_alloc_path();
7738 if (!path)
7739 return -ENOMEM;
7740
7741 mutex_lock(&fs_devices->device_list_mutex);
7742 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7743 ret = btrfs_device_init_dev_stats(device, path);
7744 if (ret)
7745 goto out;
7746 }
7747 list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) {
7748 list_for_each_entry(device, &seed_devs->devices, dev_list) {
7749 ret = btrfs_device_init_dev_stats(device, path);
7750 if (ret)
7751 goto out;
7752 }
7753 }
7754out:
7755 mutex_unlock(&fs_devices->device_list_mutex);
7756
7757 btrfs_free_path(path);
7758 return ret;
7759}
7760
7761static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7762 struct btrfs_device *device)
7763{
7764 struct btrfs_fs_info *fs_info = trans->fs_info;
7765 struct btrfs_root *dev_root = fs_info->dev_root;
7766 struct btrfs_path *path;
7767 struct btrfs_key key;
7768 struct extent_buffer *eb;
7769 struct btrfs_dev_stats_item *ptr;
7770 int ret;
7771 int i;
7772
7773 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7774 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7775 key.offset = device->devid;
7776
7777 path = btrfs_alloc_path();
7778 if (!path)
7779 return -ENOMEM;
7780 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7781 if (ret < 0) {
7782 btrfs_warn_in_rcu(fs_info,
7783 "error %d while searching for dev_stats item for device %s",
7784 ret, rcu_str_deref(device->name));
7785 goto out;
7786 }
7787
7788 if (ret == 0 &&
7789 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7790
7791 ret = btrfs_del_item(trans, dev_root, path);
7792 if (ret != 0) {
7793 btrfs_warn_in_rcu(fs_info,
7794 "delete too small dev_stats item for device %s failed %d",
7795 rcu_str_deref(device->name), ret);
7796 goto out;
7797 }
7798 ret = 1;
7799 }
7800
7801 if (ret == 1) {
7802
7803 btrfs_release_path(path);
7804 ret = btrfs_insert_empty_item(trans, dev_root, path,
7805 &key, sizeof(*ptr));
7806 if (ret < 0) {
7807 btrfs_warn_in_rcu(fs_info,
7808 "insert dev_stats item for device %s failed %d",
7809 rcu_str_deref(device->name), ret);
7810 goto out;
7811 }
7812 }
7813
7814 eb = path->nodes[0];
7815 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7816 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7817 btrfs_set_dev_stats_value(eb, ptr, i,
7818 btrfs_dev_stat_read(device, i));
7819 btrfs_mark_buffer_dirty(eb);
7820
7821out:
7822 btrfs_free_path(path);
7823 return ret;
7824}
7825
7826
7827
7828
7829int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7830{
7831 struct btrfs_fs_info *fs_info = trans->fs_info;
7832 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7833 struct btrfs_device *device;
7834 int stats_cnt;
7835 int ret = 0;
7836
7837 mutex_lock(&fs_devices->device_list_mutex);
7838 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7839 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7840 if (!device->dev_stats_valid || stats_cnt == 0)
7841 continue;
7842
7843
7844
7845
7846
7847
7848
7849
7850
7851
7852
7853
7854
7855 smp_rmb();
7856
7857 ret = update_dev_stat_item(trans, device);
7858 if (!ret)
7859 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7860 }
7861 mutex_unlock(&fs_devices->device_list_mutex);
7862
7863 return ret;
7864}
7865
7866void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7867{
7868 btrfs_dev_stat_inc(dev, index);
7869 btrfs_dev_stat_print_on_error(dev);
7870}
7871
7872static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7873{
7874 if (!dev->dev_stats_valid)
7875 return;
7876 btrfs_err_rl_in_rcu(dev->fs_info,
7877 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7878 rcu_str_deref(dev->name),
7879 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7880 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7881 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7882 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7883 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7884}
7885
7886static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7887{
7888 int i;
7889
7890 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7891 if (btrfs_dev_stat_read(dev, i) != 0)
7892 break;
7893 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7894 return;
7895
7896 btrfs_info_in_rcu(dev->fs_info,
7897 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7898 rcu_str_deref(dev->name),
7899 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7900 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7901 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7902 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7903 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7904}
7905
7906int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7907 struct btrfs_ioctl_get_dev_stats *stats)
7908{
7909 struct btrfs_device *dev;
7910 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7911 int i;
7912
7913 mutex_lock(&fs_devices->device_list_mutex);
7914 dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
7915 mutex_unlock(&fs_devices->device_list_mutex);
7916
7917 if (!dev) {
7918 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7919 return -ENODEV;
7920 } else if (!dev->dev_stats_valid) {
7921 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7922 return -ENODEV;
7923 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7924 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7925 if (stats->nr_items > i)
7926 stats->values[i] =
7927 btrfs_dev_stat_read_and_reset(dev, i);
7928 else
7929 btrfs_dev_stat_set(dev, i, 0);
7930 }
7931 btrfs_info(fs_info, "device stats zeroed by %s (%d)",
7932 current->comm, task_pid_nr(current));
7933 } else {
7934 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7935 if (stats->nr_items > i)
7936 stats->values[i] = btrfs_dev_stat_read(dev, i);
7937 }
7938 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7939 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7940 return 0;
7941}
7942
7943
7944
7945
7946
7947
7948
7949
7950void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7951{
7952 struct btrfs_device *curr, *next;
7953
7954 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7955
7956 if (list_empty(&trans->dev_update_list))
7957 return;
7958
7959
7960
7961
7962
7963
7964 mutex_lock(&trans->fs_info->chunk_mutex);
7965 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7966 post_commit_list) {
7967 list_del_init(&curr->post_commit_list);
7968 curr->commit_total_bytes = curr->disk_total_bytes;
7969 curr->commit_bytes_used = curr->bytes_used;
7970 }
7971 mutex_unlock(&trans->fs_info->chunk_mutex);
7972}
7973
7974
7975
7976
7977int btrfs_bg_type_to_factor(u64 flags)
7978{
7979 const int index = btrfs_bg_flags_to_raid_index(flags);
7980
7981 return btrfs_raid_array[index].ncopies;
7982}
7983
7984
7985
7986static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7987 u64 chunk_offset, u64 devid,
7988 u64 physical_offset, u64 physical_len)
7989{
7990 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7991 struct extent_map *em;
7992 struct map_lookup *map;
7993 struct btrfs_device *dev;
7994 u64 stripe_len;
7995 bool found = false;
7996 int ret = 0;
7997 int i;
7998
7999 read_lock(&em_tree->lock);
8000 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
8001 read_unlock(&em_tree->lock);
8002
8003 if (!em) {
8004 btrfs_err(fs_info,
8005"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
8006 physical_offset, devid);
8007 ret = -EUCLEAN;
8008 goto out;
8009 }
8010
8011 map = em->map_lookup;
8012 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
8013 if (physical_len != stripe_len) {
8014 btrfs_err(fs_info,
8015"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
8016 physical_offset, devid, em->start, physical_len,
8017 stripe_len);
8018 ret = -EUCLEAN;
8019 goto out;
8020 }
8021
8022 for (i = 0; i < map->num_stripes; i++) {
8023 if (map->stripes[i].dev->devid == devid &&
8024 map->stripes[i].physical == physical_offset) {
8025 found = true;
8026 if (map->verified_stripes >= map->num_stripes) {
8027 btrfs_err(fs_info,
8028 "too many dev extents for chunk %llu found",
8029 em->start);
8030 ret = -EUCLEAN;
8031 goto out;
8032 }
8033 map->verified_stripes++;
8034 break;
8035 }
8036 }
8037 if (!found) {
8038 btrfs_err(fs_info,
8039 "dev extent physical offset %llu devid %llu has no corresponding chunk",
8040 physical_offset, devid);
8041 ret = -EUCLEAN;
8042 }
8043
8044
8045 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
8046 if (!dev) {
8047 btrfs_err(fs_info, "failed to find devid %llu", devid);
8048 ret = -EUCLEAN;
8049 goto out;
8050 }
8051
8052 if (physical_offset + physical_len > dev->disk_total_bytes) {
8053 btrfs_err(fs_info,
8054"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
8055 devid, physical_offset, physical_len,
8056 dev->disk_total_bytes);
8057 ret = -EUCLEAN;
8058 goto out;
8059 }
8060
8061 if (dev->zone_info) {
8062 u64 zone_size = dev->zone_info->zone_size;
8063
8064 if (!IS_ALIGNED(physical_offset, zone_size) ||
8065 !IS_ALIGNED(physical_len, zone_size)) {
8066 btrfs_err(fs_info,
8067"zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone",
8068 devid, physical_offset, physical_len);
8069 ret = -EUCLEAN;
8070 goto out;
8071 }
8072 }
8073
8074out:
8075 free_extent_map(em);
8076 return ret;
8077}
8078
8079static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
8080{
8081 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
8082 struct extent_map *em;
8083 struct rb_node *node;
8084 int ret = 0;
8085
8086 read_lock(&em_tree->lock);
8087 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
8088 em = rb_entry(node, struct extent_map, rb_node);
8089 if (em->map_lookup->num_stripes !=
8090 em->map_lookup->verified_stripes) {
8091 btrfs_err(fs_info,
8092 "chunk %llu has missing dev extent, have %d expect %d",
8093 em->start, em->map_lookup->verified_stripes,
8094 em->map_lookup->num_stripes);
8095 ret = -EUCLEAN;
8096 goto out;
8097 }
8098 }
8099out:
8100 read_unlock(&em_tree->lock);
8101 return ret;
8102}
8103
8104
8105
8106
8107
8108
8109
8110
8111int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
8112{
8113 struct btrfs_path *path;
8114 struct btrfs_root *root = fs_info->dev_root;
8115 struct btrfs_key key;
8116 u64 prev_devid = 0;
8117 u64 prev_dev_ext_end = 0;
8118 int ret = 0;
8119
8120
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130 if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
8131 return 0;
8132
8133 key.objectid = 1;
8134 key.type = BTRFS_DEV_EXTENT_KEY;
8135 key.offset = 0;
8136
8137 path = btrfs_alloc_path();
8138 if (!path)
8139 return -ENOMEM;
8140
8141 path->reada = READA_FORWARD;
8142 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8143 if (ret < 0)
8144 goto out;
8145
8146 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
8147 ret = btrfs_next_item(root, path);
8148 if (ret < 0)
8149 goto out;
8150
8151 if (ret > 0) {
8152 ret = -EUCLEAN;
8153 goto out;
8154 }
8155 }
8156 while (1) {
8157 struct extent_buffer *leaf = path->nodes[0];
8158 struct btrfs_dev_extent *dext;
8159 int slot = path->slots[0];
8160 u64 chunk_offset;
8161 u64 physical_offset;
8162 u64 physical_len;
8163 u64 devid;
8164
8165 btrfs_item_key_to_cpu(leaf, &key, slot);
8166 if (key.type != BTRFS_DEV_EXTENT_KEY)
8167 break;
8168 devid = key.objectid;
8169 physical_offset = key.offset;
8170
8171 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
8172 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
8173 physical_len = btrfs_dev_extent_length(leaf, dext);
8174
8175
8176 if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
8177 btrfs_err(fs_info,
8178"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
8179 devid, physical_offset, prev_dev_ext_end);
8180 ret = -EUCLEAN;
8181 goto out;
8182 }
8183
8184 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
8185 physical_offset, physical_len);
8186 if (ret < 0)
8187 goto out;
8188 prev_devid = devid;
8189 prev_dev_ext_end = physical_offset + physical_len;
8190
8191 ret = btrfs_next_item(root, path);
8192 if (ret < 0)
8193 goto out;
8194 if (ret > 0) {
8195 ret = 0;
8196 break;
8197 }
8198 }
8199
8200
8201 ret = verify_chunk_dev_extent_mapping(fs_info);
8202out:
8203 btrfs_free_path(path);
8204 return ret;
8205}
8206
8207
8208
8209
8210
8211bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
8212{
8213 struct btrfs_swapfile_pin *sp;
8214 struct rb_node *node;
8215
8216 spin_lock(&fs_info->swapfile_pins_lock);
8217 node = fs_info->swapfile_pins.rb_node;
8218 while (node) {
8219 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
8220 if (ptr < sp->ptr)
8221 node = node->rb_left;
8222 else if (ptr > sp->ptr)
8223 node = node->rb_right;
8224 else
8225 break;
8226 }
8227 spin_unlock(&fs_info->swapfile_pins_lock);
8228 return node != NULL;
8229}
8230
8231static int relocating_repair_kthread(void *data)
8232{
8233 struct btrfs_block_group *cache = (struct btrfs_block_group *)data;
8234 struct btrfs_fs_info *fs_info = cache->fs_info;
8235 u64 target;
8236 int ret = 0;
8237
8238 target = cache->start;
8239 btrfs_put_block_group(cache);
8240
8241 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
8242 btrfs_info(fs_info,
8243 "zoned: skip relocating block group %llu to repair: EBUSY",
8244 target);
8245 return -EBUSY;
8246 }
8247
8248 mutex_lock(&fs_info->reclaim_bgs_lock);
8249
8250
8251 cache = btrfs_lookup_block_group(fs_info, target);
8252 if (!cache)
8253 goto out;
8254
8255 if (!cache->relocating_repair)
8256 goto out;
8257
8258 ret = btrfs_may_alloc_data_chunk(fs_info, target);
8259 if (ret < 0)
8260 goto out;
8261
8262 btrfs_info(fs_info,
8263 "zoned: relocating block group %llu to repair IO failure",
8264 target);
8265 ret = btrfs_relocate_chunk(fs_info, target);
8266
8267out:
8268 if (cache)
8269 btrfs_put_block_group(cache);
8270 mutex_unlock(&fs_info->reclaim_bgs_lock);
8271 btrfs_exclop_finish(fs_info);
8272
8273 return ret;
8274}
8275
8276int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
8277{
8278 struct btrfs_block_group *cache;
8279
8280
8281 if (btrfs_test_opt(fs_info, DEGRADED))
8282 return 0;
8283
8284 cache = btrfs_lookup_block_group(fs_info, logical);
8285 if (!cache)
8286 return 0;
8287
8288 spin_lock(&cache->lock);
8289 if (cache->relocating_repair) {
8290 spin_unlock(&cache->lock);
8291 btrfs_put_block_group(cache);
8292 return 0;
8293 }
8294 cache->relocating_repair = 1;
8295 spin_unlock(&cache->lock);
8296
8297 kthread_run(relocating_repair_kthread, cache,
8298 "btrfs-relocating-repair");
8299
8300 return 0;
8301}
8302