1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/bio.h>
8#include <linux/slab.h>
9#include <linux/blkdev.h>
10#include <linux/ratelimit.h>
11#include <linux/kthread.h>
12#include <linux/raid/pq.h>
13#include <linux/semaphore.h>
14#include <linux/uuid.h>
15#include <linux/list_sort.h>
16#include "misc.h"
17#include "ctree.h"
18#include "extent_map.h"
19#include "disk-io.h"
20#include "transaction.h"
21#include "print-tree.h"
22#include "volumes.h"
23#include "raid56.h"
24#include "async-thread.h"
25#include "check-integrity.h"
26#include "rcu-string.h"
27#include "dev-replace.h"
28#include "sysfs.h"
29#include "tree-checker.h"
30#include "space-info.h"
31#include "block-group.h"
32#include "discard.h"
33
34const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
35 [BTRFS_RAID_RAID10] = {
36 .sub_stripes = 2,
37 .dev_stripes = 1,
38 .devs_max = 0,
39 .devs_min = 4,
40 .tolerated_failures = 1,
41 .devs_increment = 2,
42 .ncopies = 2,
43 .nparity = 0,
44 .raid_name = "raid10",
45 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
46 .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
47 },
48 [BTRFS_RAID_RAID1] = {
49 .sub_stripes = 1,
50 .dev_stripes = 1,
51 .devs_max = 2,
52 .devs_min = 2,
53 .tolerated_failures = 1,
54 .devs_increment = 2,
55 .ncopies = 2,
56 .nparity = 0,
57 .raid_name = "raid1",
58 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
59 .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
60 },
61 [BTRFS_RAID_RAID1C3] = {
62 .sub_stripes = 1,
63 .dev_stripes = 1,
64 .devs_max = 3,
65 .devs_min = 3,
66 .tolerated_failures = 2,
67 .devs_increment = 3,
68 .ncopies = 3,
69 .nparity = 0,
70 .raid_name = "raid1c3",
71 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
72 .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
73 },
74 [BTRFS_RAID_RAID1C4] = {
75 .sub_stripes = 1,
76 .dev_stripes = 1,
77 .devs_max = 4,
78 .devs_min = 4,
79 .tolerated_failures = 3,
80 .devs_increment = 4,
81 .ncopies = 4,
82 .nparity = 0,
83 .raid_name = "raid1c4",
84 .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
85 .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
86 },
87 [BTRFS_RAID_DUP] = {
88 .sub_stripes = 1,
89 .dev_stripes = 2,
90 .devs_max = 1,
91 .devs_min = 1,
92 .tolerated_failures = 0,
93 .devs_increment = 1,
94 .ncopies = 2,
95 .nparity = 0,
96 .raid_name = "dup",
97 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
98 .mindev_error = 0,
99 },
100 [BTRFS_RAID_RAID0] = {
101 .sub_stripes = 1,
102 .dev_stripes = 1,
103 .devs_max = 0,
104 .devs_min = 2,
105 .tolerated_failures = 0,
106 .devs_increment = 1,
107 .ncopies = 1,
108 .nparity = 0,
109 .raid_name = "raid0",
110 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
111 .mindev_error = 0,
112 },
113 [BTRFS_RAID_SINGLE] = {
114 .sub_stripes = 1,
115 .dev_stripes = 1,
116 .devs_max = 1,
117 .devs_min = 1,
118 .tolerated_failures = 0,
119 .devs_increment = 1,
120 .ncopies = 1,
121 .nparity = 0,
122 .raid_name = "single",
123 .bg_flag = 0,
124 .mindev_error = 0,
125 },
126 [BTRFS_RAID_RAID5] = {
127 .sub_stripes = 1,
128 .dev_stripes = 1,
129 .devs_max = 0,
130 .devs_min = 2,
131 .tolerated_failures = 1,
132 .devs_increment = 1,
133 .ncopies = 1,
134 .nparity = 1,
135 .raid_name = "raid5",
136 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
137 .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
138 },
139 [BTRFS_RAID_RAID6] = {
140 .sub_stripes = 1,
141 .dev_stripes = 1,
142 .devs_max = 0,
143 .devs_min = 3,
144 .tolerated_failures = 2,
145 .devs_increment = 1,
146 .ncopies = 1,
147 .nparity = 2,
148 .raid_name = "raid6",
149 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
150 .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
151 },
152};
153
154const char *btrfs_bg_type_to_raid_name(u64 flags)
155{
156 const int index = btrfs_bg_flags_to_raid_index(flags);
157
158 if (index >= BTRFS_NR_RAID_TYPES)
159 return NULL;
160
161 return btrfs_raid_array[index].raid_name;
162}
163
164
165
166
167
168void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
169{
170 int i;
171 int ret;
172 char *bp = buf;
173 u64 flags = bg_flags;
174 u32 size_bp = size_buf;
175
176 if (!flags) {
177 strcpy(bp, "NONE");
178 return;
179 }
180
181#define DESCRIBE_FLAG(flag, desc) \
182 do { \
183 if (flags & (flag)) { \
184 ret = snprintf(bp, size_bp, "%s|", (desc)); \
185 if (ret < 0 || ret >= size_bp) \
186 goto out_overflow; \
187 size_bp -= ret; \
188 bp += ret; \
189 flags &= ~(flag); \
190 } \
191 } while (0)
192
193 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
194 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
195 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
196
197 DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
198 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
199 DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
200 btrfs_raid_array[i].raid_name);
201#undef DESCRIBE_FLAG
202
203 if (flags) {
204 ret = snprintf(bp, size_bp, "0x%llx|", flags);
205 size_bp -= ret;
206 }
207
208 if (size_bp < size_buf)
209 buf[size_buf - size_bp - 1] = '\0';
210
211
212
213
214
215out_overflow:;
216}
217
218static int init_first_rw_device(struct btrfs_trans_handle *trans);
219static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
220static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
221static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
222static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
223 enum btrfs_map_op op,
224 u64 logical, u64 *length,
225 struct btrfs_bio **bbio_ret,
226 int mirror_num, int need_raid_map);
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323DEFINE_MUTEX(uuid_mutex);
324static LIST_HEAD(fs_uuids);
325struct list_head * __attribute_const__ btrfs_get_fs_uuids(void)
326{
327 return &fs_uuids;
328}
329
330
331
332
333
334
335
336
337
338
339static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
340 const u8 *metadata_fsid)
341{
342 struct btrfs_fs_devices *fs_devs;
343
344 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
345 if (!fs_devs)
346 return ERR_PTR(-ENOMEM);
347
348 mutex_init(&fs_devs->device_list_mutex);
349
350 INIT_LIST_HEAD(&fs_devs->devices);
351 INIT_LIST_HEAD(&fs_devs->alloc_list);
352 INIT_LIST_HEAD(&fs_devs->fs_list);
353 if (fsid)
354 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
355
356 if (metadata_fsid)
357 memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
358 else if (fsid)
359 memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
360
361 return fs_devs;
362}
363
364void btrfs_free_device(struct btrfs_device *device)
365{
366 WARN_ON(!list_empty(&device->post_commit_list));
367 rcu_string_free(device->name);
368 extent_io_tree_release(&device->alloc_state);
369 bio_put(device->flush_bio);
370 kfree(device);
371}
372
373static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
374{
375 struct btrfs_device *device;
376 WARN_ON(fs_devices->opened);
377 while (!list_empty(&fs_devices->devices)) {
378 device = list_entry(fs_devices->devices.next,
379 struct btrfs_device, dev_list);
380 list_del(&device->dev_list);
381 btrfs_free_device(device);
382 }
383 kfree(fs_devices);
384}
385
386void __exit btrfs_cleanup_fs_uuids(void)
387{
388 struct btrfs_fs_devices *fs_devices;
389
390 while (!list_empty(&fs_uuids)) {
391 fs_devices = list_entry(fs_uuids.next,
392 struct btrfs_fs_devices, fs_list);
393 list_del(&fs_devices->fs_list);
394 free_fs_devices(fs_devices);
395 }
396}
397
398
399
400
401
402
403static struct btrfs_device *__alloc_device(void)
404{
405 struct btrfs_device *dev;
406
407 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
408 if (!dev)
409 return ERR_PTR(-ENOMEM);
410
411
412
413
414
415 dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
416 if (!dev->flush_bio) {
417 kfree(dev);
418 return ERR_PTR(-ENOMEM);
419 }
420
421 INIT_LIST_HEAD(&dev->dev_list);
422 INIT_LIST_HEAD(&dev->dev_alloc_list);
423 INIT_LIST_HEAD(&dev->post_commit_list);
424
425 atomic_set(&dev->reada_in_flight, 0);
426 atomic_set(&dev->dev_stats_ccnt, 0);
427 btrfs_device_data_ordered_init(dev);
428 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
429 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
430 extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
431
432 return dev;
433}
434
435static noinline struct btrfs_fs_devices *find_fsid(
436 const u8 *fsid, const u8 *metadata_fsid)
437{
438 struct btrfs_fs_devices *fs_devices;
439
440 ASSERT(fsid);
441
442
443 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
444 if (metadata_fsid) {
445 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
446 && memcmp(metadata_fsid, fs_devices->metadata_uuid,
447 BTRFS_FSID_SIZE) == 0)
448 return fs_devices;
449 } else {
450 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
451 return fs_devices;
452 }
453 }
454 return NULL;
455}
456
457static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
458 struct btrfs_super_block *disk_super)
459{
460
461 struct btrfs_fs_devices *fs_devices;
462
463
464
465
466
467
468
469 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
470 if (fs_devices->fsid_change &&
471 memcmp(disk_super->metadata_uuid, fs_devices->fsid,
472 BTRFS_FSID_SIZE) == 0 &&
473 memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
474 BTRFS_FSID_SIZE) == 0) {
475 return fs_devices;
476 }
477 }
478
479
480
481
482
483
484 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
485 if (fs_devices->fsid_change &&
486 memcmp(fs_devices->metadata_uuid,
487 fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
488 memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
489 BTRFS_FSID_SIZE) == 0) {
490 return fs_devices;
491 }
492 }
493
494 return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
495}
496
497
498static int
499btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
500 int flush, struct block_device **bdev,
501 struct btrfs_super_block **disk_super)
502{
503 int ret;
504
505 *bdev = blkdev_get_by_path(device_path, flags, holder);
506
507 if (IS_ERR(*bdev)) {
508 ret = PTR_ERR(*bdev);
509 goto error;
510 }
511
512 if (flush)
513 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
514 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
515 if (ret) {
516 blkdev_put(*bdev, flags);
517 goto error;
518 }
519 invalidate_bdev(*bdev);
520 *disk_super = btrfs_read_dev_super(*bdev);
521 if (IS_ERR(*disk_super)) {
522 ret = PTR_ERR(*disk_super);
523 blkdev_put(*bdev, flags);
524 goto error;
525 }
526
527 return 0;
528
529error:
530 *bdev = NULL;
531 return ret;
532}
533
534static bool device_path_matched(const char *path, struct btrfs_device *device)
535{
536 int found;
537
538 rcu_read_lock();
539 found = strcmp(rcu_str_deref(device->name), path);
540 rcu_read_unlock();
541
542 return found == 0;
543}
544
545
546
547
548
549
550
551
552
553
554
555
556static int btrfs_free_stale_devices(const char *path,
557 struct btrfs_device *skip_device)
558{
559 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
560 struct btrfs_device *device, *tmp_device;
561 int ret = 0;
562
563 if (path)
564 ret = -ENOENT;
565
566 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
567
568 mutex_lock(&fs_devices->device_list_mutex);
569 list_for_each_entry_safe(device, tmp_device,
570 &fs_devices->devices, dev_list) {
571 if (skip_device && skip_device == device)
572 continue;
573 if (path && !device->name)
574 continue;
575 if (path && !device_path_matched(path, device))
576 continue;
577 if (fs_devices->opened) {
578
579 if (path && ret != 0)
580 ret = -EBUSY;
581 break;
582 }
583
584
585 fs_devices->num_devices--;
586 list_del(&device->dev_list);
587 btrfs_free_device(device);
588
589 ret = 0;
590 if (fs_devices->num_devices == 0)
591 break;
592 }
593 mutex_unlock(&fs_devices->device_list_mutex);
594
595 if (fs_devices->num_devices == 0) {
596 btrfs_sysfs_remove_fsid(fs_devices);
597 list_del(&fs_devices->fs_list);
598 free_fs_devices(fs_devices);
599 }
600 }
601
602 return ret;
603}
604
605static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
606 struct btrfs_device *device, fmode_t flags,
607 void *holder)
608{
609 struct request_queue *q;
610 struct block_device *bdev;
611 struct btrfs_super_block *disk_super;
612 u64 devid;
613 int ret;
614
615 if (device->bdev)
616 return -EINVAL;
617 if (!device->name)
618 return -EINVAL;
619
620 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
621 &bdev, &disk_super);
622 if (ret)
623 return ret;
624
625 devid = btrfs_stack_device_id(&disk_super->dev_item);
626 if (devid != device->devid)
627 goto error_free_page;
628
629 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
630 goto error_free_page;
631
632 device->generation = btrfs_super_generation(disk_super);
633
634 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
635 if (btrfs_super_incompat_flags(disk_super) &
636 BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
637 pr_err(
638 "BTRFS: Invalid seeding and uuid-changed device detected\n");
639 goto error_free_page;
640 }
641
642 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
643 fs_devices->seeding = true;
644 } else {
645 if (bdev_read_only(bdev))
646 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
647 else
648 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
649 }
650
651 q = bdev_get_queue(bdev);
652 if (!blk_queue_nonrot(q))
653 fs_devices->rotating = true;
654
655 device->bdev = bdev;
656 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
657 device->mode = flags;
658
659 fs_devices->open_devices++;
660 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
661 device->devid != BTRFS_DEV_REPLACE_DEVID) {
662 fs_devices->rw_devices++;
663 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
664 }
665 btrfs_release_disk_super(disk_super);
666
667 return 0;
668
669error_free_page:
670 btrfs_release_disk_super(disk_super);
671 blkdev_put(bdev, flags);
672
673 return -EINVAL;
674}
675
676
677
678
679
680
681
682static struct btrfs_fs_devices *find_fsid_inprogress(
683 struct btrfs_super_block *disk_super)
684{
685 struct btrfs_fs_devices *fs_devices;
686
687 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
688 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
689 BTRFS_FSID_SIZE) != 0 &&
690 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
691 BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
692 return fs_devices;
693 }
694 }
695
696 return find_fsid(disk_super->fsid, NULL);
697}
698
699
700static struct btrfs_fs_devices *find_fsid_changed(
701 struct btrfs_super_block *disk_super)
702{
703 struct btrfs_fs_devices *fs_devices;
704
705
706
707
708
709
710
711
712
713
714 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
715
716 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
717 BTRFS_FSID_SIZE) != 0 &&
718 memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
719 BTRFS_FSID_SIZE) == 0 &&
720 memcmp(fs_devices->fsid, disk_super->fsid,
721 BTRFS_FSID_SIZE) != 0)
722 return fs_devices;
723
724
725 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
726 BTRFS_FSID_SIZE) == 0 &&
727 memcmp(fs_devices->fsid, disk_super->metadata_uuid,
728 BTRFS_FSID_SIZE) == 0)
729 return fs_devices;
730 }
731
732 return NULL;
733}
734
735static struct btrfs_fs_devices *find_fsid_reverted_metadata(
736 struct btrfs_super_block *disk_super)
737{
738 struct btrfs_fs_devices *fs_devices;
739
740
741
742
743
744
745
746
747
748
749 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
750 if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
751 BTRFS_FSID_SIZE) != 0 &&
752 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
753 BTRFS_FSID_SIZE) == 0 &&
754 fs_devices->fsid_change)
755 return fs_devices;
756 }
757
758 return NULL;
759}
760
761
762
763
764
765
766
767static noinline struct btrfs_device *device_list_add(const char *path,
768 struct btrfs_super_block *disk_super,
769 bool *new_device_added)
770{
771 struct btrfs_device *device;
772 struct btrfs_fs_devices *fs_devices = NULL;
773 struct rcu_string *name;
774 u64 found_transid = btrfs_super_generation(disk_super);
775 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
776 bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
777 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
778 bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
779 BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
780
781 if (fsid_change_in_progress) {
782 if (!has_metadata_uuid)
783 fs_devices = find_fsid_inprogress(disk_super);
784 else
785 fs_devices = find_fsid_changed(disk_super);
786 } else if (has_metadata_uuid) {
787 fs_devices = find_fsid_with_metadata_uuid(disk_super);
788 } else {
789 fs_devices = find_fsid_reverted_metadata(disk_super);
790 if (!fs_devices)
791 fs_devices = find_fsid(disk_super->fsid, NULL);
792 }
793
794
795 if (!fs_devices) {
796 if (has_metadata_uuid)
797 fs_devices = alloc_fs_devices(disk_super->fsid,
798 disk_super->metadata_uuid);
799 else
800 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
801
802 if (IS_ERR(fs_devices))
803 return ERR_CAST(fs_devices);
804
805 fs_devices->fsid_change = fsid_change_in_progress;
806
807 mutex_lock(&fs_devices->device_list_mutex);
808 list_add(&fs_devices->fs_list, &fs_uuids);
809
810 device = NULL;
811 } else {
812 mutex_lock(&fs_devices->device_list_mutex);
813 device = btrfs_find_device(fs_devices, devid,
814 disk_super->dev_item.uuid, NULL, false);
815
816
817
818
819
820
821 if (fs_devices->fsid_change &&
822 found_transid > fs_devices->latest_generation) {
823 memcpy(fs_devices->fsid, disk_super->fsid,
824 BTRFS_FSID_SIZE);
825
826 if (has_metadata_uuid)
827 memcpy(fs_devices->metadata_uuid,
828 disk_super->metadata_uuid,
829 BTRFS_FSID_SIZE);
830 else
831 memcpy(fs_devices->metadata_uuid,
832 disk_super->fsid, BTRFS_FSID_SIZE);
833
834 fs_devices->fsid_change = false;
835 }
836 }
837
838 if (!device) {
839 if (fs_devices->opened) {
840 mutex_unlock(&fs_devices->device_list_mutex);
841 return ERR_PTR(-EBUSY);
842 }
843
844 device = btrfs_alloc_device(NULL, &devid,
845 disk_super->dev_item.uuid);
846 if (IS_ERR(device)) {
847 mutex_unlock(&fs_devices->device_list_mutex);
848
849 return device;
850 }
851
852 name = rcu_string_strdup(path, GFP_NOFS);
853 if (!name) {
854 btrfs_free_device(device);
855 mutex_unlock(&fs_devices->device_list_mutex);
856 return ERR_PTR(-ENOMEM);
857 }
858 rcu_assign_pointer(device->name, name);
859
860 list_add_rcu(&device->dev_list, &fs_devices->devices);
861 fs_devices->num_devices++;
862
863 device->fs_devices = fs_devices;
864 *new_device_added = true;
865
866 if (disk_super->label[0])
867 pr_info(
868 "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
869 disk_super->label, devid, found_transid, path,
870 current->comm, task_pid_nr(current));
871 else
872 pr_info(
873 "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
874 disk_super->fsid, devid, found_transid, path,
875 current->comm, task_pid_nr(current));
876
877 } else if (!device->name || strcmp(device->name->str, path)) {
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904 if (!fs_devices->opened && found_transid < device->generation) {
905
906
907
908
909
910
911
912 mutex_unlock(&fs_devices->device_list_mutex);
913 return ERR_PTR(-EEXIST);
914 }
915
916
917
918
919
920 if (device->bdev) {
921 struct block_device *path_bdev;
922
923 path_bdev = lookup_bdev(path);
924 if (IS_ERR(path_bdev)) {
925 mutex_unlock(&fs_devices->device_list_mutex);
926 return ERR_CAST(path_bdev);
927 }
928
929 if (device->bdev != path_bdev) {
930 bdput(path_bdev);
931 mutex_unlock(&fs_devices->device_list_mutex);
932 btrfs_warn_in_rcu(device->fs_info,
933 "duplicate device fsid:devid for %pU:%llu old:%s new:%s",
934 disk_super->fsid, devid,
935 rcu_str_deref(device->name), path);
936 return ERR_PTR(-EEXIST);
937 }
938 bdput(path_bdev);
939 btrfs_info_in_rcu(device->fs_info,
940 "device fsid %pU devid %llu moved old:%s new:%s",
941 disk_super->fsid, devid,
942 rcu_str_deref(device->name), path);
943 }
944
945 name = rcu_string_strdup(path, GFP_NOFS);
946 if (!name) {
947 mutex_unlock(&fs_devices->device_list_mutex);
948 return ERR_PTR(-ENOMEM);
949 }
950 rcu_string_free(device->name);
951 rcu_assign_pointer(device->name, name);
952 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
953 fs_devices->missing_devices--;
954 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
955 }
956 }
957
958
959
960
961
962
963
964 if (!fs_devices->opened) {
965 device->generation = found_transid;
966 fs_devices->latest_generation = max_t(u64, found_transid,
967 fs_devices->latest_generation);
968 }
969
970 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
971
972 mutex_unlock(&fs_devices->device_list_mutex);
973 return device;
974}
975
976static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
977{
978 struct btrfs_fs_devices *fs_devices;
979 struct btrfs_device *device;
980 struct btrfs_device *orig_dev;
981 int ret = 0;
982
983 fs_devices = alloc_fs_devices(orig->fsid, NULL);
984 if (IS_ERR(fs_devices))
985 return fs_devices;
986
987 mutex_lock(&orig->device_list_mutex);
988 fs_devices->total_devices = orig->total_devices;
989
990 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
991 struct rcu_string *name;
992
993 device = btrfs_alloc_device(NULL, &orig_dev->devid,
994 orig_dev->uuid);
995 if (IS_ERR(device)) {
996 ret = PTR_ERR(device);
997 goto error;
998 }
999
1000
1001
1002
1003
1004 if (orig_dev->name) {
1005 name = rcu_string_strdup(orig_dev->name->str,
1006 GFP_KERNEL);
1007 if (!name) {
1008 btrfs_free_device(device);
1009 ret = -ENOMEM;
1010 goto error;
1011 }
1012 rcu_assign_pointer(device->name, name);
1013 }
1014
1015 list_add(&device->dev_list, &fs_devices->devices);
1016 device->fs_devices = fs_devices;
1017 fs_devices->num_devices++;
1018 }
1019 mutex_unlock(&orig->device_list_mutex);
1020 return fs_devices;
1021error:
1022 mutex_unlock(&orig->device_list_mutex);
1023 free_fs_devices(fs_devices);
1024 return ERR_PTR(ret);
1025}
1026
1027
1028
1029
1030
1031void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
1032{
1033 struct btrfs_device *device, *next;
1034 struct btrfs_device *latest_dev = NULL;
1035
1036 mutex_lock(&uuid_mutex);
1037again:
1038
1039 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1040 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
1041 &device->dev_state)) {
1042 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1043 &device->dev_state) &&
1044 !test_bit(BTRFS_DEV_STATE_MISSING,
1045 &device->dev_state) &&
1046 (!latest_dev ||
1047 device->generation > latest_dev->generation)) {
1048 latest_dev = device;
1049 }
1050 continue;
1051 }
1052
1053 if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064 if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1065 &device->dev_state)) {
1066 continue;
1067 }
1068 }
1069 if (device->bdev) {
1070 blkdev_put(device->bdev, device->mode);
1071 device->bdev = NULL;
1072 fs_devices->open_devices--;
1073 }
1074 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1075 list_del_init(&device->dev_alloc_list);
1076 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1077 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1078 &device->dev_state))
1079 fs_devices->rw_devices--;
1080 }
1081 list_del_init(&device->dev_list);
1082 fs_devices->num_devices--;
1083 btrfs_free_device(device);
1084 }
1085
1086 if (fs_devices->seed) {
1087 fs_devices = fs_devices->seed;
1088 goto again;
1089 }
1090
1091 fs_devices->latest_bdev = latest_dev->bdev;
1092
1093 mutex_unlock(&uuid_mutex);
1094}
1095
1096static void btrfs_close_bdev(struct btrfs_device *device)
1097{
1098 if (!device->bdev)
1099 return;
1100
1101 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1102 sync_blockdev(device->bdev);
1103 invalidate_bdev(device->bdev);
1104 }
1105
1106 blkdev_put(device->bdev, device->mode);
1107}
1108
1109static void btrfs_close_one_device(struct btrfs_device *device)
1110{
1111 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1112
1113 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1114 device->devid != BTRFS_DEV_REPLACE_DEVID) {
1115 list_del_init(&device->dev_alloc_list);
1116 fs_devices->rw_devices--;
1117 }
1118
1119 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1120 fs_devices->missing_devices--;
1121
1122 btrfs_close_bdev(device);
1123 if (device->bdev) {
1124 fs_devices->open_devices--;
1125 device->bdev = NULL;
1126 }
1127 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1128
1129 device->fs_info = NULL;
1130 atomic_set(&device->dev_stats_ccnt, 0);
1131 extent_io_tree_release(&device->alloc_state);
1132
1133
1134 ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
1135 ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1136 ASSERT(list_empty(&device->dev_alloc_list));
1137 ASSERT(list_empty(&device->post_commit_list));
1138 ASSERT(atomic_read(&device->reada_in_flight) == 0);
1139}
1140
1141static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
1142{
1143 struct btrfs_device *device, *tmp;
1144
1145 if (--fs_devices->opened > 0)
1146 return 0;
1147
1148 mutex_lock(&fs_devices->device_list_mutex);
1149 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
1150 btrfs_close_one_device(device);
1151 }
1152 mutex_unlock(&fs_devices->device_list_mutex);
1153
1154 WARN_ON(fs_devices->open_devices);
1155 WARN_ON(fs_devices->rw_devices);
1156 fs_devices->opened = 0;
1157 fs_devices->seeding = false;
1158
1159 return 0;
1160}
1161
1162int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1163{
1164 struct btrfs_fs_devices *seed_devices = NULL;
1165 int ret;
1166
1167 mutex_lock(&uuid_mutex);
1168 ret = close_fs_devices(fs_devices);
1169 if (!fs_devices->opened) {
1170 seed_devices = fs_devices->seed;
1171 fs_devices->seed = NULL;
1172 }
1173 mutex_unlock(&uuid_mutex);
1174
1175 while (seed_devices) {
1176 fs_devices = seed_devices;
1177 seed_devices = fs_devices->seed;
1178 close_fs_devices(fs_devices);
1179 free_fs_devices(fs_devices);
1180 }
1181 return ret;
1182}
1183
1184static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
1185 fmode_t flags, void *holder)
1186{
1187 struct btrfs_device *device;
1188 struct btrfs_device *latest_dev = NULL;
1189
1190 flags |= FMODE_EXCL;
1191
1192 list_for_each_entry(device, &fs_devices->devices, dev_list) {
1193
1194 if (btrfs_open_one_device(fs_devices, device, flags, holder))
1195 continue;
1196
1197 if (!latest_dev ||
1198 device->generation > latest_dev->generation)
1199 latest_dev = device;
1200 }
1201 if (fs_devices->open_devices == 0)
1202 return -EINVAL;
1203
1204 fs_devices->opened = 1;
1205 fs_devices->latest_bdev = latest_dev->bdev;
1206 fs_devices->total_rw_bytes = 0;
1207 fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
1208
1209 return 0;
1210}
1211
1212static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
1213{
1214 struct btrfs_device *dev1, *dev2;
1215
1216 dev1 = list_entry(a, struct btrfs_device, dev_list);
1217 dev2 = list_entry(b, struct btrfs_device, dev_list);
1218
1219 if (dev1->devid < dev2->devid)
1220 return -1;
1221 else if (dev1->devid > dev2->devid)
1222 return 1;
1223 return 0;
1224}
1225
1226int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1227 fmode_t flags, void *holder)
1228{
1229 int ret;
1230
1231 lockdep_assert_held(&uuid_mutex);
1232
1233 mutex_lock(&fs_devices->device_list_mutex);
1234 if (fs_devices->opened) {
1235 fs_devices->opened++;
1236 ret = 0;
1237 } else {
1238 list_sort(NULL, &fs_devices->devices, devid_cmp);
1239 ret = open_fs_devices(fs_devices, flags, holder);
1240 }
1241 mutex_unlock(&fs_devices->device_list_mutex);
1242
1243 return ret;
1244}
1245
1246void btrfs_release_disk_super(struct btrfs_super_block *super)
1247{
1248 struct page *page = virt_to_page(super);
1249
1250 put_page(page);
1251}
1252
1253static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
1254 u64 bytenr)
1255{
1256 struct btrfs_super_block *disk_super;
1257 struct page *page;
1258 void *p;
1259 pgoff_t index;
1260
1261
1262 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1263 return ERR_PTR(-EINVAL);
1264
1265
1266 if (sizeof(*disk_super) > PAGE_SIZE)
1267 return ERR_PTR(-EINVAL);
1268
1269
1270 index = bytenr >> PAGE_SHIFT;
1271 if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
1272 return ERR_PTR(-EINVAL);
1273
1274
1275 page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL);
1276
1277 if (IS_ERR(page))
1278 return ERR_CAST(page);
1279
1280 p = page_address(page);
1281
1282
1283 disk_super = p + offset_in_page(bytenr);
1284
1285 if (btrfs_super_bytenr(disk_super) != bytenr ||
1286 btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
1287 btrfs_release_disk_super(p);
1288 return ERR_PTR(-EINVAL);
1289 }
1290
1291 if (disk_super->label[0] && disk_super->label[BTRFS_LABEL_SIZE - 1])
1292 disk_super->label[BTRFS_LABEL_SIZE - 1] = 0;
1293
1294 return disk_super;
1295}
1296
1297int btrfs_forget_devices(const char *path)
1298{
1299 int ret;
1300
1301 mutex_lock(&uuid_mutex);
1302 ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
1303 mutex_unlock(&uuid_mutex);
1304
1305 return ret;
1306}
1307
1308
1309
1310
1311
1312
1313struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1314 void *holder)
1315{
1316 struct btrfs_super_block *disk_super;
1317 bool new_device_added = false;
1318 struct btrfs_device *device = NULL;
1319 struct block_device *bdev;
1320 u64 bytenr;
1321
1322 lockdep_assert_held(&uuid_mutex);
1323
1324
1325
1326
1327
1328
1329
1330 bytenr = btrfs_sb_offset(0);
1331 flags |= FMODE_EXCL;
1332
1333 bdev = blkdev_get_by_path(path, flags, holder);
1334 if (IS_ERR(bdev))
1335 return ERR_CAST(bdev);
1336
1337 disk_super = btrfs_read_disk_super(bdev, bytenr);
1338 if (IS_ERR(disk_super)) {
1339 device = ERR_CAST(disk_super);
1340 goto error_bdev_put;
1341 }
1342
1343 device = device_list_add(path, disk_super, &new_device_added);
1344 if (!IS_ERR(device)) {
1345 if (new_device_added)
1346 btrfs_free_stale_devices(path, device);
1347 }
1348
1349 btrfs_release_disk_super(disk_super);
1350
1351error_bdev_put:
1352 blkdev_put(bdev, flags);
1353
1354 return device;
1355}
1356
1357
1358
1359
1360
1361static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1362 u64 len)
1363{
1364 u64 physical_start, physical_end;
1365
1366 lockdep_assert_held(&device->fs_info->chunk_mutex);
1367
1368 if (!find_first_extent_bit(&device->alloc_state, *start,
1369 &physical_start, &physical_end,
1370 CHUNK_ALLOCATED, NULL)) {
1371
1372 if (in_range(physical_start, *start, len) ||
1373 in_range(*start, physical_start,
1374 physical_end - physical_start)) {
1375 *start = physical_end + 1;
1376 return true;
1377 }
1378 }
1379 return false;
1380}
1381
1382static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
1383{
1384 switch (device->fs_devices->chunk_alloc_policy) {
1385 case BTRFS_CHUNK_ALLOC_REGULAR:
1386
1387
1388
1389
1390
1391 return max_t(u64, start, SZ_1M);
1392 default:
1393 BUG();
1394 }
1395}
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
1408 u64 *hole_size, u64 num_bytes)
1409{
1410 bool changed = false;
1411 u64 hole_end = *hole_start + *hole_size;
1412
1413
1414
1415
1416
1417 if (contains_pending_extent(device, hole_start, *hole_size)) {
1418 if (hole_end >= *hole_start)
1419 *hole_size = hole_end - *hole_start;
1420 else
1421 *hole_size = 0;
1422 changed = true;
1423 }
1424
1425 switch (device->fs_devices->chunk_alloc_policy) {
1426 case BTRFS_CHUNK_ALLOC_REGULAR:
1427
1428 break;
1429 default:
1430 BUG();
1431 }
1432
1433 return changed;
1434}
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463static int find_free_dev_extent_start(struct btrfs_device *device,
1464 u64 num_bytes, u64 search_start, u64 *start,
1465 u64 *len)
1466{
1467 struct btrfs_fs_info *fs_info = device->fs_info;
1468 struct btrfs_root *root = fs_info->dev_root;
1469 struct btrfs_key key;
1470 struct btrfs_dev_extent *dev_extent;
1471 struct btrfs_path *path;
1472 u64 hole_size;
1473 u64 max_hole_start;
1474 u64 max_hole_size;
1475 u64 extent_end;
1476 u64 search_end = device->total_bytes;
1477 int ret;
1478 int slot;
1479 struct extent_buffer *l;
1480
1481 search_start = dev_extent_search_start(device, search_start);
1482
1483 path = btrfs_alloc_path();
1484 if (!path)
1485 return -ENOMEM;
1486
1487 max_hole_start = search_start;
1488 max_hole_size = 0;
1489
1490again:
1491 if (search_start >= search_end ||
1492 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1493 ret = -ENOSPC;
1494 goto out;
1495 }
1496
1497 path->reada = READA_FORWARD;
1498 path->search_commit_root = 1;
1499 path->skip_locking = 1;
1500
1501 key.objectid = device->devid;
1502 key.offset = search_start;
1503 key.type = BTRFS_DEV_EXTENT_KEY;
1504
1505 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1506 if (ret < 0)
1507 goto out;
1508 if (ret > 0) {
1509 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1510 if (ret < 0)
1511 goto out;
1512 }
1513
1514 while (1) {
1515 l = path->nodes[0];
1516 slot = path->slots[0];
1517 if (slot >= btrfs_header_nritems(l)) {
1518 ret = btrfs_next_leaf(root, path);
1519 if (ret == 0)
1520 continue;
1521 if (ret < 0)
1522 goto out;
1523
1524 break;
1525 }
1526 btrfs_item_key_to_cpu(l, &key, slot);
1527
1528 if (key.objectid < device->devid)
1529 goto next;
1530
1531 if (key.objectid > device->devid)
1532 break;
1533
1534 if (key.type != BTRFS_DEV_EXTENT_KEY)
1535 goto next;
1536
1537 if (key.offset > search_start) {
1538 hole_size = key.offset - search_start;
1539 dev_extent_hole_check(device, &search_start, &hole_size,
1540 num_bytes);
1541
1542 if (hole_size > max_hole_size) {
1543 max_hole_start = search_start;
1544 max_hole_size = hole_size;
1545 }
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556 if (hole_size >= num_bytes) {
1557 ret = 0;
1558 goto out;
1559 }
1560 }
1561
1562 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1563 extent_end = key.offset + btrfs_dev_extent_length(l,
1564 dev_extent);
1565 if (extent_end > search_start)
1566 search_start = extent_end;
1567next:
1568 path->slots[0]++;
1569 cond_resched();
1570 }
1571
1572
1573
1574
1575
1576
1577 if (search_end > search_start) {
1578 hole_size = search_end - search_start;
1579 if (dev_extent_hole_check(device, &search_start, &hole_size,
1580 num_bytes)) {
1581 btrfs_release_path(path);
1582 goto again;
1583 }
1584
1585 if (hole_size > max_hole_size) {
1586 max_hole_start = search_start;
1587 max_hole_size = hole_size;
1588 }
1589 }
1590
1591
1592 if (max_hole_size < num_bytes)
1593 ret = -ENOSPC;
1594 else
1595 ret = 0;
1596
1597out:
1598 btrfs_free_path(path);
1599 *start = max_hole_start;
1600 if (len)
1601 *len = max_hole_size;
1602 return ret;
1603}
1604
1605int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1606 u64 *start, u64 *len)
1607{
1608
1609 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1610}
1611
1612static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1613 struct btrfs_device *device,
1614 u64 start, u64 *dev_extent_len)
1615{
1616 struct btrfs_fs_info *fs_info = device->fs_info;
1617 struct btrfs_root *root = fs_info->dev_root;
1618 int ret;
1619 struct btrfs_path *path;
1620 struct btrfs_key key;
1621 struct btrfs_key found_key;
1622 struct extent_buffer *leaf = NULL;
1623 struct btrfs_dev_extent *extent = NULL;
1624
1625 path = btrfs_alloc_path();
1626 if (!path)
1627 return -ENOMEM;
1628
1629 key.objectid = device->devid;
1630 key.offset = start;
1631 key.type = BTRFS_DEV_EXTENT_KEY;
1632again:
1633 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1634 if (ret > 0) {
1635 ret = btrfs_previous_item(root, path, key.objectid,
1636 BTRFS_DEV_EXTENT_KEY);
1637 if (ret)
1638 goto out;
1639 leaf = path->nodes[0];
1640 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1641 extent = btrfs_item_ptr(leaf, path->slots[0],
1642 struct btrfs_dev_extent);
1643 BUG_ON(found_key.offset > start || found_key.offset +
1644 btrfs_dev_extent_length(leaf, extent) < start);
1645 key = found_key;
1646 btrfs_release_path(path);
1647 goto again;
1648 } else if (ret == 0) {
1649 leaf = path->nodes[0];
1650 extent = btrfs_item_ptr(leaf, path->slots[0],
1651 struct btrfs_dev_extent);
1652 } else {
1653 btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
1654 goto out;
1655 }
1656
1657 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1658
1659 ret = btrfs_del_item(trans, root, path);
1660 if (ret) {
1661 btrfs_handle_fs_error(fs_info, ret,
1662 "Failed to remove dev extent item");
1663 } else {
1664 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1665 }
1666out:
1667 btrfs_free_path(path);
1668 return ret;
1669}
1670
1671static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1672 struct btrfs_device *device,
1673 u64 chunk_offset, u64 start, u64 num_bytes)
1674{
1675 int ret;
1676 struct btrfs_path *path;
1677 struct btrfs_fs_info *fs_info = device->fs_info;
1678 struct btrfs_root *root = fs_info->dev_root;
1679 struct btrfs_dev_extent *extent;
1680 struct extent_buffer *leaf;
1681 struct btrfs_key key;
1682
1683 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1684 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1685 path = btrfs_alloc_path();
1686 if (!path)
1687 return -ENOMEM;
1688
1689 key.objectid = device->devid;
1690 key.offset = start;
1691 key.type = BTRFS_DEV_EXTENT_KEY;
1692 ret = btrfs_insert_empty_item(trans, root, path, &key,
1693 sizeof(*extent));
1694 if (ret)
1695 goto out;
1696
1697 leaf = path->nodes[0];
1698 extent = btrfs_item_ptr(leaf, path->slots[0],
1699 struct btrfs_dev_extent);
1700 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1701 BTRFS_CHUNK_TREE_OBJECTID);
1702 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1703 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1704 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1705
1706 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1707 btrfs_mark_buffer_dirty(leaf);
1708out:
1709 btrfs_free_path(path);
1710 return ret;
1711}
1712
1713static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1714{
1715 struct extent_map_tree *em_tree;
1716 struct extent_map *em;
1717 struct rb_node *n;
1718 u64 ret = 0;
1719
1720 em_tree = &fs_info->mapping_tree;
1721 read_lock(&em_tree->lock);
1722 n = rb_last(&em_tree->map.rb_root);
1723 if (n) {
1724 em = rb_entry(n, struct extent_map, rb_node);
1725 ret = em->start + em->len;
1726 }
1727 read_unlock(&em_tree->lock);
1728
1729 return ret;
1730}
1731
1732static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1733 u64 *devid_ret)
1734{
1735 int ret;
1736 struct btrfs_key key;
1737 struct btrfs_key found_key;
1738 struct btrfs_path *path;
1739
1740 path = btrfs_alloc_path();
1741 if (!path)
1742 return -ENOMEM;
1743
1744 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1745 key.type = BTRFS_DEV_ITEM_KEY;
1746 key.offset = (u64)-1;
1747
1748 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1749 if (ret < 0)
1750 goto error;
1751
1752 if (ret == 0) {
1753
1754 btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
1755 ret = -EUCLEAN;
1756 goto error;
1757 }
1758
1759 ret = btrfs_previous_item(fs_info->chunk_root, path,
1760 BTRFS_DEV_ITEMS_OBJECTID,
1761 BTRFS_DEV_ITEM_KEY);
1762 if (ret) {
1763 *devid_ret = 1;
1764 } else {
1765 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1766 path->slots[0]);
1767 *devid_ret = found_key.offset + 1;
1768 }
1769 ret = 0;
1770error:
1771 btrfs_free_path(path);
1772 return ret;
1773}
1774
1775
1776
1777
1778
1779static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1780 struct btrfs_device *device)
1781{
1782 int ret;
1783 struct btrfs_path *path;
1784 struct btrfs_dev_item *dev_item;
1785 struct extent_buffer *leaf;
1786 struct btrfs_key key;
1787 unsigned long ptr;
1788
1789 path = btrfs_alloc_path();
1790 if (!path)
1791 return -ENOMEM;
1792
1793 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1794 key.type = BTRFS_DEV_ITEM_KEY;
1795 key.offset = device->devid;
1796
1797 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1798 &key, sizeof(*dev_item));
1799 if (ret)
1800 goto out;
1801
1802 leaf = path->nodes[0];
1803 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1804
1805 btrfs_set_device_id(leaf, dev_item, device->devid);
1806 btrfs_set_device_generation(leaf, dev_item, 0);
1807 btrfs_set_device_type(leaf, dev_item, device->type);
1808 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1809 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1810 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1811 btrfs_set_device_total_bytes(leaf, dev_item,
1812 btrfs_device_get_disk_total_bytes(device));
1813 btrfs_set_device_bytes_used(leaf, dev_item,
1814 btrfs_device_get_bytes_used(device));
1815 btrfs_set_device_group(leaf, dev_item, 0);
1816 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1817 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1818 btrfs_set_device_start_offset(leaf, dev_item, 0);
1819
1820 ptr = btrfs_device_uuid(dev_item);
1821 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1822 ptr = btrfs_device_fsid(dev_item);
1823 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
1824 ptr, BTRFS_FSID_SIZE);
1825 btrfs_mark_buffer_dirty(leaf);
1826
1827 ret = 0;
1828out:
1829 btrfs_free_path(path);
1830 return ret;
1831}
1832
1833
1834
1835
1836
1837static void update_dev_time(const char *path_name)
1838{
1839 struct file *filp;
1840
1841 filp = filp_open(path_name, O_RDWR, 0);
1842 if (IS_ERR(filp))
1843 return;
1844 file_update_time(filp);
1845 filp_close(filp, NULL);
1846}
1847
1848static int btrfs_rm_dev_item(struct btrfs_device *device)
1849{
1850 struct btrfs_root *root = device->fs_info->chunk_root;
1851 int ret;
1852 struct btrfs_path *path;
1853 struct btrfs_key key;
1854 struct btrfs_trans_handle *trans;
1855
1856 path = btrfs_alloc_path();
1857 if (!path)
1858 return -ENOMEM;
1859
1860 trans = btrfs_start_transaction(root, 0);
1861 if (IS_ERR(trans)) {
1862 btrfs_free_path(path);
1863 return PTR_ERR(trans);
1864 }
1865 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1866 key.type = BTRFS_DEV_ITEM_KEY;
1867 key.offset = device->devid;
1868
1869 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1870 if (ret) {
1871 if (ret > 0)
1872 ret = -ENOENT;
1873 btrfs_abort_transaction(trans, ret);
1874 btrfs_end_transaction(trans);
1875 goto out;
1876 }
1877
1878 ret = btrfs_del_item(trans, root, path);
1879 if (ret) {
1880 btrfs_abort_transaction(trans, ret);
1881 btrfs_end_transaction(trans);
1882 }
1883
1884out:
1885 btrfs_free_path(path);
1886 if (!ret)
1887 ret = btrfs_commit_transaction(trans);
1888 return ret;
1889}
1890
1891
1892
1893
1894
1895
1896static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
1897 u64 num_devices)
1898{
1899 u64 all_avail;
1900 unsigned seq;
1901 int i;
1902
1903 do {
1904 seq = read_seqbegin(&fs_info->profiles_lock);
1905
1906 all_avail = fs_info->avail_data_alloc_bits |
1907 fs_info->avail_system_alloc_bits |
1908 fs_info->avail_metadata_alloc_bits;
1909 } while (read_seqretry(&fs_info->profiles_lock, seq));
1910
1911 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1912 if (!(all_avail & btrfs_raid_array[i].bg_flag))
1913 continue;
1914
1915 if (num_devices < btrfs_raid_array[i].devs_min) {
1916 int ret = btrfs_raid_array[i].mindev_error;
1917
1918 if (ret)
1919 return ret;
1920 }
1921 }
1922
1923 return 0;
1924}
1925
1926static struct btrfs_device * btrfs_find_next_active_device(
1927 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
1928{
1929 struct btrfs_device *next_device;
1930
1931 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
1932 if (next_device != device &&
1933 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
1934 && next_device->bdev)
1935 return next_device;
1936 }
1937
1938 return NULL;
1939}
1940
1941
1942
1943
1944
1945
1946
1947void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
1948 struct btrfs_device *this_dev)
1949{
1950 struct btrfs_fs_info *fs_info = device->fs_info;
1951 struct btrfs_device *next_device;
1952
1953 if (this_dev)
1954 next_device = this_dev;
1955 else
1956 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
1957 device);
1958 ASSERT(next_device);
1959
1960 if (fs_info->sb->s_bdev &&
1961 (fs_info->sb->s_bdev == device->bdev))
1962 fs_info->sb->s_bdev = next_device->bdev;
1963
1964 if (fs_info->fs_devices->latest_bdev == device->bdev)
1965 fs_info->fs_devices->latest_bdev = next_device->bdev;
1966}
1967
1968
1969
1970
1971
1972static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
1973{
1974 u64 num_devices = fs_info->fs_devices->num_devices;
1975
1976 down_read(&fs_info->dev_replace.rwsem);
1977 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
1978 ASSERT(num_devices > 1);
1979 num_devices--;
1980 }
1981 up_read(&fs_info->dev_replace.rwsem);
1982
1983 return num_devices;
1984}
1985
1986static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
1987 struct block_device *bdev,
1988 const char *device_path)
1989{
1990 struct btrfs_super_block *disk_super;
1991 int copy_num;
1992
1993 if (!bdev)
1994 return;
1995
1996 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX; copy_num++) {
1997 struct page *page;
1998 int ret;
1999
2000 disk_super = btrfs_read_dev_one_super(bdev, copy_num);
2001 if (IS_ERR(disk_super))
2002 continue;
2003
2004 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
2005
2006 page = virt_to_page(disk_super);
2007 set_page_dirty(page);
2008 lock_page(page);
2009
2010 ret = write_one_page(page);
2011 if (ret)
2012 btrfs_warn(fs_info,
2013 "error clearing superblock number %d (%d)",
2014 copy_num, ret);
2015 btrfs_release_disk_super(disk_super);
2016
2017 }
2018
2019
2020 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
2021
2022
2023 update_dev_time(device_path);
2024}
2025
2026int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2027 u64 devid)
2028{
2029 struct btrfs_device *device;
2030 struct btrfs_fs_devices *cur_devices;
2031 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2032 u64 num_devices;
2033 int ret = 0;
2034
2035 mutex_lock(&uuid_mutex);
2036
2037 num_devices = btrfs_num_devices(fs_info);
2038
2039 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
2040 if (ret)
2041 goto out;
2042
2043 device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
2044
2045 if (IS_ERR(device)) {
2046 if (PTR_ERR(device) == -ENOENT &&
2047 strcmp(device_path, "missing") == 0)
2048 ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2049 else
2050 ret = PTR_ERR(device);
2051 goto out;
2052 }
2053
2054 if (btrfs_pinned_by_swapfile(fs_info, device)) {
2055 btrfs_warn_in_rcu(fs_info,
2056 "cannot remove device %s (devid %llu) due to active swapfile",
2057 rcu_str_deref(device->name), device->devid);
2058 ret = -ETXTBSY;
2059 goto out;
2060 }
2061
2062 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2063 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
2064 goto out;
2065 }
2066
2067 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
2068 fs_info->fs_devices->rw_devices == 1) {
2069 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
2070 goto out;
2071 }
2072
2073 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2074 mutex_lock(&fs_info->chunk_mutex);
2075 list_del_init(&device->dev_alloc_list);
2076 device->fs_devices->rw_devices--;
2077 mutex_unlock(&fs_info->chunk_mutex);
2078 }
2079
2080 mutex_unlock(&uuid_mutex);
2081 ret = btrfs_shrink_device(device, 0);
2082 mutex_lock(&uuid_mutex);
2083 if (ret)
2084 goto error_undo;
2085
2086
2087
2088
2089
2090
2091 ret = btrfs_rm_dev_item(device);
2092 if (ret)
2093 goto error_undo;
2094
2095 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2096 btrfs_scrub_cancel_dev(device);
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113 cur_devices = device->fs_devices;
2114 mutex_lock(&fs_devices->device_list_mutex);
2115 list_del_rcu(&device->dev_list);
2116
2117 cur_devices->num_devices--;
2118 cur_devices->total_devices--;
2119
2120 if (cur_devices != fs_devices)
2121 fs_devices->total_devices--;
2122
2123 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2124 cur_devices->missing_devices--;
2125
2126 btrfs_assign_next_active_device(device, NULL);
2127
2128 if (device->bdev) {
2129 cur_devices->open_devices--;
2130
2131 btrfs_sysfs_remove_devices_dir(fs_devices, device);
2132 }
2133
2134 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2135 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2136 mutex_unlock(&fs_devices->device_list_mutex);
2137
2138
2139
2140
2141
2142
2143 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2144 btrfs_scratch_superblocks(fs_info, device->bdev,
2145 device->name->str);
2146
2147 btrfs_close_bdev(device);
2148 synchronize_rcu();
2149 btrfs_free_device(device);
2150
2151 if (cur_devices->open_devices == 0) {
2152 while (fs_devices) {
2153 if (fs_devices->seed == cur_devices) {
2154 fs_devices->seed = cur_devices->seed;
2155 break;
2156 }
2157 fs_devices = fs_devices->seed;
2158 }
2159 cur_devices->seed = NULL;
2160 close_fs_devices(cur_devices);
2161 free_fs_devices(cur_devices);
2162 }
2163
2164out:
2165 mutex_unlock(&uuid_mutex);
2166 return ret;
2167
2168error_undo:
2169 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2170 mutex_lock(&fs_info->chunk_mutex);
2171 list_add(&device->dev_alloc_list,
2172 &fs_devices->alloc_list);
2173 device->fs_devices->rw_devices++;
2174 mutex_unlock(&fs_info->chunk_mutex);
2175 }
2176 goto out;
2177}
2178
2179void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2180{
2181 struct btrfs_fs_devices *fs_devices;
2182
2183 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2184
2185
2186
2187
2188
2189
2190
2191 fs_devices = srcdev->fs_devices;
2192
2193 list_del_rcu(&srcdev->dev_list);
2194 list_del(&srcdev->dev_alloc_list);
2195 fs_devices->num_devices--;
2196 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2197 fs_devices->missing_devices--;
2198
2199 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2200 fs_devices->rw_devices--;
2201
2202 if (srcdev->bdev)
2203 fs_devices->open_devices--;
2204}
2205
2206void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2207{
2208 struct btrfs_fs_info *fs_info = srcdev->fs_info;
2209 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2210
2211 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
2212
2213 btrfs_scratch_superblocks(fs_info, srcdev->bdev,
2214 srcdev->name->str);
2215 }
2216
2217 btrfs_close_bdev(srcdev);
2218 synchronize_rcu();
2219 btrfs_free_device(srcdev);
2220
2221
2222 if (!fs_devices->num_devices) {
2223 struct btrfs_fs_devices *tmp_fs_devices;
2224
2225
2226
2227
2228
2229
2230
2231 ASSERT(fs_devices->seeding);
2232
2233 tmp_fs_devices = fs_info->fs_devices;
2234 while (tmp_fs_devices) {
2235 if (tmp_fs_devices->seed == fs_devices) {
2236 tmp_fs_devices->seed = fs_devices->seed;
2237 break;
2238 }
2239 tmp_fs_devices = tmp_fs_devices->seed;
2240 }
2241 fs_devices->seed = NULL;
2242 close_fs_devices(fs_devices);
2243 free_fs_devices(fs_devices);
2244 }
2245}
2246
2247void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2248{
2249 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2250
2251 mutex_lock(&fs_devices->device_list_mutex);
2252
2253 btrfs_sysfs_remove_devices_dir(fs_devices, tgtdev);
2254
2255 if (tgtdev->bdev)
2256 fs_devices->open_devices--;
2257
2258 fs_devices->num_devices--;
2259
2260 btrfs_assign_next_active_device(tgtdev, NULL);
2261
2262 list_del_rcu(&tgtdev->dev_list);
2263
2264 mutex_unlock(&fs_devices->device_list_mutex);
2265
2266
2267
2268
2269
2270
2271
2272
2273 btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev,
2274 tgtdev->name->str);
2275
2276 btrfs_close_bdev(tgtdev);
2277 synchronize_rcu();
2278 btrfs_free_device(tgtdev);
2279}
2280
2281static struct btrfs_device *btrfs_find_device_by_path(
2282 struct btrfs_fs_info *fs_info, const char *device_path)
2283{
2284 int ret = 0;
2285 struct btrfs_super_block *disk_super;
2286 u64 devid;
2287 u8 *dev_uuid;
2288 struct block_device *bdev;
2289 struct btrfs_device *device;
2290
2291 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2292 fs_info->bdev_holder, 0, &bdev, &disk_super);
2293 if (ret)
2294 return ERR_PTR(ret);
2295
2296 devid = btrfs_stack_device_id(&disk_super->dev_item);
2297 dev_uuid = disk_super->dev_item.uuid;
2298 if (btrfs_fs_incompat(fs_info, METADATA_UUID))
2299 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2300 disk_super->metadata_uuid, true);
2301 else
2302 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2303 disk_super->fsid, true);
2304
2305 btrfs_release_disk_super(disk_super);
2306 if (!device)
2307 device = ERR_PTR(-ENOENT);
2308 blkdev_put(bdev, FMODE_READ);
2309 return device;
2310}
2311
2312
2313
2314
2315struct btrfs_device *btrfs_find_device_by_devspec(
2316 struct btrfs_fs_info *fs_info, u64 devid,
2317 const char *device_path)
2318{
2319 struct btrfs_device *device;
2320
2321 if (devid) {
2322 device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
2323 NULL, true);
2324 if (!device)
2325 return ERR_PTR(-ENOENT);
2326 return device;
2327 }
2328
2329 if (!device_path || !device_path[0])
2330 return ERR_PTR(-EINVAL);
2331
2332 if (strcmp(device_path, "missing") == 0) {
2333
2334 list_for_each_entry(device, &fs_info->fs_devices->devices,
2335 dev_list) {
2336 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2337 &device->dev_state) && !device->bdev)
2338 return device;
2339 }
2340 return ERR_PTR(-ENOENT);
2341 }
2342
2343 return btrfs_find_device_by_path(fs_info, device_path);
2344}
2345
2346
2347
2348
2349static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2350{
2351 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2352 struct btrfs_fs_devices *old_devices;
2353 struct btrfs_fs_devices *seed_devices;
2354 struct btrfs_super_block *disk_super = fs_info->super_copy;
2355 struct btrfs_device *device;
2356 u64 super_flags;
2357
2358 lockdep_assert_held(&uuid_mutex);
2359 if (!fs_devices->seeding)
2360 return -EINVAL;
2361
2362 seed_devices = alloc_fs_devices(NULL, NULL);
2363 if (IS_ERR(seed_devices))
2364 return PTR_ERR(seed_devices);
2365
2366 old_devices = clone_fs_devices(fs_devices);
2367 if (IS_ERR(old_devices)) {
2368 kfree(seed_devices);
2369 return PTR_ERR(old_devices);
2370 }
2371
2372 list_add(&old_devices->fs_list, &fs_uuids);
2373
2374 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2375 seed_devices->opened = 1;
2376 INIT_LIST_HEAD(&seed_devices->devices);
2377 INIT_LIST_HEAD(&seed_devices->alloc_list);
2378 mutex_init(&seed_devices->device_list_mutex);
2379
2380 mutex_lock(&fs_devices->device_list_mutex);
2381 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2382 synchronize_rcu);
2383 list_for_each_entry(device, &seed_devices->devices, dev_list)
2384 device->fs_devices = seed_devices;
2385
2386 mutex_lock(&fs_info->chunk_mutex);
2387 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
2388 mutex_unlock(&fs_info->chunk_mutex);
2389
2390 fs_devices->seeding = false;
2391 fs_devices->num_devices = 0;
2392 fs_devices->open_devices = 0;
2393 fs_devices->missing_devices = 0;
2394 fs_devices->rotating = false;
2395 fs_devices->seed = seed_devices;
2396
2397 generate_random_uuid(fs_devices->fsid);
2398 memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
2399 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2400 mutex_unlock(&fs_devices->device_list_mutex);
2401
2402 super_flags = btrfs_super_flags(disk_super) &
2403 ~BTRFS_SUPER_FLAG_SEEDING;
2404 btrfs_set_super_flags(disk_super, super_flags);
2405
2406 return 0;
2407}
2408
2409
2410
2411
2412static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2413{
2414 struct btrfs_fs_info *fs_info = trans->fs_info;
2415 struct btrfs_root *root = fs_info->chunk_root;
2416 struct btrfs_path *path;
2417 struct extent_buffer *leaf;
2418 struct btrfs_dev_item *dev_item;
2419 struct btrfs_device *device;
2420 struct btrfs_key key;
2421 u8 fs_uuid[BTRFS_FSID_SIZE];
2422 u8 dev_uuid[BTRFS_UUID_SIZE];
2423 u64 devid;
2424 int ret;
2425
2426 path = btrfs_alloc_path();
2427 if (!path)
2428 return -ENOMEM;
2429
2430 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2431 key.offset = 0;
2432 key.type = BTRFS_DEV_ITEM_KEY;
2433
2434 while (1) {
2435 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2436 if (ret < 0)
2437 goto error;
2438
2439 leaf = path->nodes[0];
2440next_slot:
2441 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2442 ret = btrfs_next_leaf(root, path);
2443 if (ret > 0)
2444 break;
2445 if (ret < 0)
2446 goto error;
2447 leaf = path->nodes[0];
2448 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2449 btrfs_release_path(path);
2450 continue;
2451 }
2452
2453 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2454 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2455 key.type != BTRFS_DEV_ITEM_KEY)
2456 break;
2457
2458 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2459 struct btrfs_dev_item);
2460 devid = btrfs_device_id(leaf, dev_item);
2461 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2462 BTRFS_UUID_SIZE);
2463 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2464 BTRFS_FSID_SIZE);
2465 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2466 fs_uuid, true);
2467 BUG_ON(!device);
2468
2469 if (device->fs_devices->seeding) {
2470 btrfs_set_device_generation(leaf, dev_item,
2471 device->generation);
2472 btrfs_mark_buffer_dirty(leaf);
2473 }
2474
2475 path->slots[0]++;
2476 goto next_slot;
2477 }
2478 ret = 0;
2479error:
2480 btrfs_free_path(path);
2481 return ret;
2482}
2483
2484int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2485{
2486 struct btrfs_root *root = fs_info->dev_root;
2487 struct request_queue *q;
2488 struct btrfs_trans_handle *trans;
2489 struct btrfs_device *device;
2490 struct block_device *bdev;
2491 struct super_block *sb = fs_info->sb;
2492 struct rcu_string *name;
2493 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2494 u64 orig_super_total_bytes;
2495 u64 orig_super_num_devices;
2496 int seeding_dev = 0;
2497 int ret = 0;
2498 bool unlocked = false;
2499
2500 if (sb_rdonly(sb) && !fs_devices->seeding)
2501 return -EROFS;
2502
2503 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2504 fs_info->bdev_holder);
2505 if (IS_ERR(bdev))
2506 return PTR_ERR(bdev);
2507
2508 if (fs_devices->seeding) {
2509 seeding_dev = 1;
2510 down_write(&sb->s_umount);
2511 mutex_lock(&uuid_mutex);
2512 }
2513
2514 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2515
2516 mutex_lock(&fs_devices->device_list_mutex);
2517 list_for_each_entry(device, &fs_devices->devices, dev_list) {
2518 if (device->bdev == bdev) {
2519 ret = -EEXIST;
2520 mutex_unlock(
2521 &fs_devices->device_list_mutex);
2522 goto error;
2523 }
2524 }
2525 mutex_unlock(&fs_devices->device_list_mutex);
2526
2527 device = btrfs_alloc_device(fs_info, NULL, NULL);
2528 if (IS_ERR(device)) {
2529
2530 ret = PTR_ERR(device);
2531 goto error;
2532 }
2533
2534 name = rcu_string_strdup(device_path, GFP_KERNEL);
2535 if (!name) {
2536 ret = -ENOMEM;
2537 goto error_free_device;
2538 }
2539 rcu_assign_pointer(device->name, name);
2540
2541 trans = btrfs_start_transaction(root, 0);
2542 if (IS_ERR(trans)) {
2543 ret = PTR_ERR(trans);
2544 goto error_free_device;
2545 }
2546
2547 q = bdev_get_queue(bdev);
2548 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2549 device->generation = trans->transid;
2550 device->io_width = fs_info->sectorsize;
2551 device->io_align = fs_info->sectorsize;
2552 device->sector_size = fs_info->sectorsize;
2553 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2554 fs_info->sectorsize);
2555 device->disk_total_bytes = device->total_bytes;
2556 device->commit_total_bytes = device->total_bytes;
2557 device->fs_info = fs_info;
2558 device->bdev = bdev;
2559 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2560 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2561 device->mode = FMODE_EXCL;
2562 device->dev_stats_valid = 1;
2563 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2564
2565 if (seeding_dev) {
2566 sb->s_flags &= ~SB_RDONLY;
2567 ret = btrfs_prepare_sprout(fs_info);
2568 if (ret) {
2569 btrfs_abort_transaction(trans, ret);
2570 goto error_trans;
2571 }
2572 }
2573
2574 device->fs_devices = fs_devices;
2575
2576 mutex_lock(&fs_devices->device_list_mutex);
2577 mutex_lock(&fs_info->chunk_mutex);
2578 list_add_rcu(&device->dev_list, &fs_devices->devices);
2579 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2580 fs_devices->num_devices++;
2581 fs_devices->open_devices++;
2582 fs_devices->rw_devices++;
2583 fs_devices->total_devices++;
2584 fs_devices->total_rw_bytes += device->total_bytes;
2585
2586 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2587
2588 if (!blk_queue_nonrot(q))
2589 fs_devices->rotating = true;
2590
2591 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2592 btrfs_set_super_total_bytes(fs_info->super_copy,
2593 round_down(orig_super_total_bytes + device->total_bytes,
2594 fs_info->sectorsize));
2595
2596 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2597 btrfs_set_super_num_devices(fs_info->super_copy,
2598 orig_super_num_devices + 1);
2599
2600
2601 btrfs_sysfs_add_devices_dir(fs_devices, device);
2602
2603
2604
2605
2606
2607 btrfs_clear_space_info_full(fs_info);
2608
2609 mutex_unlock(&fs_info->chunk_mutex);
2610 mutex_unlock(&fs_devices->device_list_mutex);
2611
2612 if (seeding_dev) {
2613 mutex_lock(&fs_info->chunk_mutex);
2614 ret = init_first_rw_device(trans);
2615 mutex_unlock(&fs_info->chunk_mutex);
2616 if (ret) {
2617 btrfs_abort_transaction(trans, ret);
2618 goto error_sysfs;
2619 }
2620 }
2621
2622 ret = btrfs_add_dev_item(trans, device);
2623 if (ret) {
2624 btrfs_abort_transaction(trans, ret);
2625 goto error_sysfs;
2626 }
2627
2628 if (seeding_dev) {
2629 ret = btrfs_finish_sprout(trans);
2630 if (ret) {
2631 btrfs_abort_transaction(trans, ret);
2632 goto error_sysfs;
2633 }
2634
2635 btrfs_sysfs_update_sprout_fsid(fs_devices,
2636 fs_info->fs_devices->fsid);
2637 }
2638
2639 ret = btrfs_commit_transaction(trans);
2640
2641 if (seeding_dev) {
2642 mutex_unlock(&uuid_mutex);
2643 up_write(&sb->s_umount);
2644 unlocked = true;
2645
2646 if (ret)
2647 return ret;
2648
2649 ret = btrfs_relocate_sys_chunks(fs_info);
2650 if (ret < 0)
2651 btrfs_handle_fs_error(fs_info, ret,
2652 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2653 trans = btrfs_attach_transaction(root);
2654 if (IS_ERR(trans)) {
2655 if (PTR_ERR(trans) == -ENOENT)
2656 return 0;
2657 ret = PTR_ERR(trans);
2658 trans = NULL;
2659 goto error_sysfs;
2660 }
2661 ret = btrfs_commit_transaction(trans);
2662 }
2663
2664
2665
2666
2667
2668
2669
2670
2671 btrfs_forget_devices(device_path);
2672
2673
2674 update_dev_time(device_path);
2675
2676 return ret;
2677
2678error_sysfs:
2679 btrfs_sysfs_remove_devices_dir(fs_devices, device);
2680 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2681 mutex_lock(&fs_info->chunk_mutex);
2682 list_del_rcu(&device->dev_list);
2683 list_del(&device->dev_alloc_list);
2684 fs_info->fs_devices->num_devices--;
2685 fs_info->fs_devices->open_devices--;
2686 fs_info->fs_devices->rw_devices--;
2687 fs_info->fs_devices->total_devices--;
2688 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2689 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2690 btrfs_set_super_total_bytes(fs_info->super_copy,
2691 orig_super_total_bytes);
2692 btrfs_set_super_num_devices(fs_info->super_copy,
2693 orig_super_num_devices);
2694 mutex_unlock(&fs_info->chunk_mutex);
2695 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2696error_trans:
2697 if (seeding_dev)
2698 sb->s_flags |= SB_RDONLY;
2699 if (trans)
2700 btrfs_end_transaction(trans);
2701error_free_device:
2702 btrfs_free_device(device);
2703error:
2704 blkdev_put(bdev, FMODE_EXCL);
2705 if (seeding_dev && !unlocked) {
2706 mutex_unlock(&uuid_mutex);
2707 up_write(&sb->s_umount);
2708 }
2709 return ret;
2710}
2711
2712static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2713 struct btrfs_device *device)
2714{
2715 int ret;
2716 struct btrfs_path *path;
2717 struct btrfs_root *root = device->fs_info->chunk_root;
2718 struct btrfs_dev_item *dev_item;
2719 struct extent_buffer *leaf;
2720 struct btrfs_key key;
2721
2722 path = btrfs_alloc_path();
2723 if (!path)
2724 return -ENOMEM;
2725
2726 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2727 key.type = BTRFS_DEV_ITEM_KEY;
2728 key.offset = device->devid;
2729
2730 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2731 if (ret < 0)
2732 goto out;
2733
2734 if (ret > 0) {
2735 ret = -ENOENT;
2736 goto out;
2737 }
2738
2739 leaf = path->nodes[0];
2740 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2741
2742 btrfs_set_device_id(leaf, dev_item, device->devid);
2743 btrfs_set_device_type(leaf, dev_item, device->type);
2744 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2745 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2746 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2747 btrfs_set_device_total_bytes(leaf, dev_item,
2748 btrfs_device_get_disk_total_bytes(device));
2749 btrfs_set_device_bytes_used(leaf, dev_item,
2750 btrfs_device_get_bytes_used(device));
2751 btrfs_mark_buffer_dirty(leaf);
2752
2753out:
2754 btrfs_free_path(path);
2755 return ret;
2756}
2757
2758int btrfs_grow_device(struct btrfs_trans_handle *trans,
2759 struct btrfs_device *device, u64 new_size)
2760{
2761 struct btrfs_fs_info *fs_info = device->fs_info;
2762 struct btrfs_super_block *super_copy = fs_info->super_copy;
2763 u64 old_total;
2764 u64 diff;
2765
2766 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2767 return -EACCES;
2768
2769 new_size = round_down(new_size, fs_info->sectorsize);
2770
2771 mutex_lock(&fs_info->chunk_mutex);
2772 old_total = btrfs_super_total_bytes(super_copy);
2773 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2774
2775 if (new_size <= device->total_bytes ||
2776 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2777 mutex_unlock(&fs_info->chunk_mutex);
2778 return -EINVAL;
2779 }
2780
2781 btrfs_set_super_total_bytes(super_copy,
2782 round_down(old_total + diff, fs_info->sectorsize));
2783 device->fs_devices->total_rw_bytes += diff;
2784
2785 btrfs_device_set_total_bytes(device, new_size);
2786 btrfs_device_set_disk_total_bytes(device, new_size);
2787 btrfs_clear_space_info_full(device->fs_info);
2788 if (list_empty(&device->post_commit_list))
2789 list_add_tail(&device->post_commit_list,
2790 &trans->transaction->dev_update_list);
2791 mutex_unlock(&fs_info->chunk_mutex);
2792
2793 return btrfs_update_device(trans, device);
2794}
2795
2796static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2797{
2798 struct btrfs_fs_info *fs_info = trans->fs_info;
2799 struct btrfs_root *root = fs_info->chunk_root;
2800 int ret;
2801 struct btrfs_path *path;
2802 struct btrfs_key key;
2803
2804 path = btrfs_alloc_path();
2805 if (!path)
2806 return -ENOMEM;
2807
2808 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2809 key.offset = chunk_offset;
2810 key.type = BTRFS_CHUNK_ITEM_KEY;
2811
2812 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2813 if (ret < 0)
2814 goto out;
2815 else if (ret > 0) {
2816 btrfs_handle_fs_error(fs_info, -ENOENT,
2817 "Failed lookup while freeing chunk.");
2818 ret = -ENOENT;
2819 goto out;
2820 }
2821
2822 ret = btrfs_del_item(trans, root, path);
2823 if (ret < 0)
2824 btrfs_handle_fs_error(fs_info, ret,
2825 "Failed to delete chunk item.");
2826out:
2827 btrfs_free_path(path);
2828 return ret;
2829}
2830
2831static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2832{
2833 struct btrfs_super_block *super_copy = fs_info->super_copy;
2834 struct btrfs_disk_key *disk_key;
2835 struct btrfs_chunk *chunk;
2836 u8 *ptr;
2837 int ret = 0;
2838 u32 num_stripes;
2839 u32 array_size;
2840 u32 len = 0;
2841 u32 cur;
2842 struct btrfs_key key;
2843
2844 mutex_lock(&fs_info->chunk_mutex);
2845 array_size = btrfs_super_sys_array_size(super_copy);
2846
2847 ptr = super_copy->sys_chunk_array;
2848 cur = 0;
2849
2850 while (cur < array_size) {
2851 disk_key = (struct btrfs_disk_key *)ptr;
2852 btrfs_disk_key_to_cpu(&key, disk_key);
2853
2854 len = sizeof(*disk_key);
2855
2856 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2857 chunk = (struct btrfs_chunk *)(ptr + len);
2858 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2859 len += btrfs_chunk_item_size(num_stripes);
2860 } else {
2861 ret = -EIO;
2862 break;
2863 }
2864 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2865 key.offset == chunk_offset) {
2866 memmove(ptr, ptr + len, array_size - (cur + len));
2867 array_size -= len;
2868 btrfs_set_super_sys_array_size(super_copy, array_size);
2869 } else {
2870 ptr += len;
2871 cur += len;
2872 }
2873 }
2874 mutex_unlock(&fs_info->chunk_mutex);
2875 return ret;
2876}
2877
2878
2879
2880
2881
2882
2883
2884
2885struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
2886 u64 logical, u64 length)
2887{
2888 struct extent_map_tree *em_tree;
2889 struct extent_map *em;
2890
2891 em_tree = &fs_info->mapping_tree;
2892 read_lock(&em_tree->lock);
2893 em = lookup_extent_mapping(em_tree, logical, length);
2894 read_unlock(&em_tree->lock);
2895
2896 if (!em) {
2897 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2898 logical, length);
2899 return ERR_PTR(-EINVAL);
2900 }
2901
2902 if (em->start > logical || em->start + em->len < logical) {
2903 btrfs_crit(fs_info,
2904 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
2905 logical, length, em->start, em->start + em->len);
2906 free_extent_map(em);
2907 return ERR_PTR(-EINVAL);
2908 }
2909
2910
2911 return em;
2912}
2913
2914int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2915{
2916 struct btrfs_fs_info *fs_info = trans->fs_info;
2917 struct extent_map *em;
2918 struct map_lookup *map;
2919 u64 dev_extent_len = 0;
2920 int i, ret = 0;
2921 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2922
2923 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
2924 if (IS_ERR(em)) {
2925
2926
2927
2928
2929
2930 ASSERT(0);
2931 return PTR_ERR(em);
2932 }
2933 map = em->map_lookup;
2934 mutex_lock(&fs_info->chunk_mutex);
2935 check_system_chunk(trans, map->type);
2936 mutex_unlock(&fs_info->chunk_mutex);
2937
2938
2939
2940
2941
2942
2943 mutex_lock(&fs_devices->device_list_mutex);
2944 for (i = 0; i < map->num_stripes; i++) {
2945 struct btrfs_device *device = map->stripes[i].dev;
2946 ret = btrfs_free_dev_extent(trans, device,
2947 map->stripes[i].physical,
2948 &dev_extent_len);
2949 if (ret) {
2950 mutex_unlock(&fs_devices->device_list_mutex);
2951 btrfs_abort_transaction(trans, ret);
2952 goto out;
2953 }
2954
2955 if (device->bytes_used > 0) {
2956 mutex_lock(&fs_info->chunk_mutex);
2957 btrfs_device_set_bytes_used(device,
2958 device->bytes_used - dev_extent_len);
2959 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
2960 btrfs_clear_space_info_full(fs_info);
2961 mutex_unlock(&fs_info->chunk_mutex);
2962 }
2963
2964 ret = btrfs_update_device(trans, device);
2965 if (ret) {
2966 mutex_unlock(&fs_devices->device_list_mutex);
2967 btrfs_abort_transaction(trans, ret);
2968 goto out;
2969 }
2970 }
2971 mutex_unlock(&fs_devices->device_list_mutex);
2972
2973 ret = btrfs_free_chunk(trans, chunk_offset);
2974 if (ret) {
2975 btrfs_abort_transaction(trans, ret);
2976 goto out;
2977 }
2978
2979 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
2980
2981 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2982 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
2983 if (ret) {
2984 btrfs_abort_transaction(trans, ret);
2985 goto out;
2986 }
2987 }
2988
2989 ret = btrfs_remove_block_group(trans, chunk_offset, em);
2990 if (ret) {
2991 btrfs_abort_transaction(trans, ret);
2992 goto out;
2993 }
2994
2995out:
2996
2997 free_extent_map(em);
2998 return ret;
2999}
3000
3001static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3002{
3003 struct btrfs_root *root = fs_info->chunk_root;
3004 struct btrfs_trans_handle *trans;
3005 struct btrfs_block_group *block_group;
3006 int ret;
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020 lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
3021
3022
3023 btrfs_scrub_pause(fs_info);
3024 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
3025 btrfs_scrub_continue(fs_info);
3026 if (ret)
3027 return ret;
3028
3029 block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
3030 if (!block_group)
3031 return -ENOENT;
3032 btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
3033 btrfs_put_block_group(block_group);
3034
3035 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3036 chunk_offset);
3037 if (IS_ERR(trans)) {
3038 ret = PTR_ERR(trans);
3039 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3040 return ret;
3041 }
3042
3043
3044
3045
3046
3047 ret = btrfs_remove_chunk(trans, chunk_offset);
3048 btrfs_end_transaction(trans);
3049 return ret;
3050}
3051
3052static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3053{
3054 struct btrfs_root *chunk_root = fs_info->chunk_root;
3055 struct btrfs_path *path;
3056 struct extent_buffer *leaf;
3057 struct btrfs_chunk *chunk;
3058 struct btrfs_key key;
3059 struct btrfs_key found_key;
3060 u64 chunk_type;
3061 bool retried = false;
3062 int failed = 0;
3063 int ret;
3064
3065 path = btrfs_alloc_path();
3066 if (!path)
3067 return -ENOMEM;
3068
3069again:
3070 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3071 key.offset = (u64)-1;
3072 key.type = BTRFS_CHUNK_ITEM_KEY;
3073
3074 while (1) {
3075 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3076 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3077 if (ret < 0) {
3078 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3079 goto error;
3080 }
3081 BUG_ON(ret == 0);
3082
3083 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3084 key.type);
3085 if (ret)
3086 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3087 if (ret < 0)
3088 goto error;
3089 if (ret > 0)
3090 break;
3091
3092 leaf = path->nodes[0];
3093 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3094
3095 chunk = btrfs_item_ptr(leaf, path->slots[0],
3096 struct btrfs_chunk);
3097 chunk_type = btrfs_chunk_type(leaf, chunk);
3098 btrfs_release_path(path);
3099
3100 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3101 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3102 if (ret == -ENOSPC)
3103 failed++;
3104 else
3105 BUG_ON(ret);
3106 }
3107 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3108
3109 if (found_key.offset == 0)
3110 break;
3111 key.offset = found_key.offset - 1;
3112 }
3113 ret = 0;
3114 if (failed && !retried) {
3115 failed = 0;
3116 retried = true;
3117 goto again;
3118 } else if (WARN_ON(failed && retried)) {
3119 ret = -ENOSPC;
3120 }
3121error:
3122 btrfs_free_path(path);
3123 return ret;
3124}
3125
3126
3127
3128
3129
3130
3131static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3132 u64 chunk_offset)
3133{
3134 struct btrfs_block_group *cache;
3135 u64 bytes_used;
3136 u64 chunk_type;
3137
3138 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3139 ASSERT(cache);
3140 chunk_type = cache->flags;
3141 btrfs_put_block_group(cache);
3142
3143 if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA))
3144 return 0;
3145
3146 spin_lock(&fs_info->data_sinfo->lock);
3147 bytes_used = fs_info->data_sinfo->bytes_used;
3148 spin_unlock(&fs_info->data_sinfo->lock);
3149
3150 if (!bytes_used) {
3151 struct btrfs_trans_handle *trans;
3152 int ret;
3153
3154 trans = btrfs_join_transaction(fs_info->tree_root);
3155 if (IS_ERR(trans))
3156 return PTR_ERR(trans);
3157
3158 ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA);
3159 btrfs_end_transaction(trans);
3160 if (ret < 0)
3161 return ret;
3162 return 1;
3163 }
3164
3165 return 0;
3166}
3167
3168static int insert_balance_item(struct btrfs_fs_info *fs_info,
3169 struct btrfs_balance_control *bctl)
3170{
3171 struct btrfs_root *root = fs_info->tree_root;
3172 struct btrfs_trans_handle *trans;
3173 struct btrfs_balance_item *item;
3174 struct btrfs_disk_balance_args disk_bargs;
3175 struct btrfs_path *path;
3176 struct extent_buffer *leaf;
3177 struct btrfs_key key;
3178 int ret, err;
3179
3180 path = btrfs_alloc_path();
3181 if (!path)
3182 return -ENOMEM;
3183
3184 trans = btrfs_start_transaction(root, 0);
3185 if (IS_ERR(trans)) {
3186 btrfs_free_path(path);
3187 return PTR_ERR(trans);
3188 }
3189
3190 key.objectid = BTRFS_BALANCE_OBJECTID;
3191 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3192 key.offset = 0;
3193
3194 ret = btrfs_insert_empty_item(trans, root, path, &key,
3195 sizeof(*item));
3196 if (ret)
3197 goto out;
3198
3199 leaf = path->nodes[0];
3200 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3201
3202 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3203
3204 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3205 btrfs_set_balance_data(leaf, item, &disk_bargs);
3206 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3207 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3208 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3209 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3210
3211 btrfs_set_balance_flags(leaf, item, bctl->flags);
3212
3213 btrfs_mark_buffer_dirty(leaf);
3214out:
3215 btrfs_free_path(path);
3216 err = btrfs_commit_transaction(trans);
3217 if (err && !ret)
3218 ret = err;
3219 return ret;
3220}
3221
3222static int del_balance_item(struct btrfs_fs_info *fs_info)
3223{
3224 struct btrfs_root *root = fs_info->tree_root;
3225 struct btrfs_trans_handle *trans;
3226 struct btrfs_path *path;
3227 struct btrfs_key key;
3228 int ret, err;
3229
3230 path = btrfs_alloc_path();
3231 if (!path)
3232 return -ENOMEM;
3233
3234 trans = btrfs_start_transaction(root, 0);
3235 if (IS_ERR(trans)) {
3236 btrfs_free_path(path);
3237 return PTR_ERR(trans);
3238 }
3239
3240 key.objectid = BTRFS_BALANCE_OBJECTID;
3241 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3242 key.offset = 0;
3243
3244 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3245 if (ret < 0)
3246 goto out;
3247 if (ret > 0) {
3248 ret = -ENOENT;
3249 goto out;
3250 }
3251
3252 ret = btrfs_del_item(trans, root, path);
3253out:
3254 btrfs_free_path(path);
3255 err = btrfs_commit_transaction(trans);
3256 if (err && !ret)
3257 ret = err;
3258 return ret;
3259}
3260
3261
3262
3263
3264
3265static void update_balance_args(struct btrfs_balance_control *bctl)
3266{
3267
3268
3269
3270 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3271 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3272 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3273 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3274 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3275 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3276
3277
3278
3279
3280
3281
3282
3283
3284 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3285 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3286 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3287 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3288 bctl->data.usage = 90;
3289 }
3290 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3291 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3292 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3293 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3294 bctl->sys.usage = 90;
3295 }
3296 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3297 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3298 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3299 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3300 bctl->meta.usage = 90;
3301 }
3302}
3303
3304
3305
3306
3307static void reset_balance_state(struct btrfs_fs_info *fs_info)
3308{
3309 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3310 int ret;
3311
3312 BUG_ON(!fs_info->balance_ctl);
3313
3314 spin_lock(&fs_info->balance_lock);
3315 fs_info->balance_ctl = NULL;
3316 spin_unlock(&fs_info->balance_lock);
3317
3318 kfree(bctl);
3319 ret = del_balance_item(fs_info);
3320 if (ret)
3321 btrfs_handle_fs_error(fs_info, ret, NULL);
3322}
3323
3324
3325
3326
3327
3328static int chunk_profiles_filter(u64 chunk_type,
3329 struct btrfs_balance_args *bargs)
3330{
3331 chunk_type = chunk_to_extended(chunk_type) &
3332 BTRFS_EXTENDED_PROFILE_MASK;
3333
3334 if (bargs->profiles & chunk_type)
3335 return 0;
3336
3337 return 1;
3338}
3339
3340static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3341 struct btrfs_balance_args *bargs)
3342{
3343 struct btrfs_block_group *cache;
3344 u64 chunk_used;
3345 u64 user_thresh_min;
3346 u64 user_thresh_max;
3347 int ret = 1;
3348
3349 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3350 chunk_used = cache->used;
3351
3352 if (bargs->usage_min == 0)
3353 user_thresh_min = 0;
3354 else
3355 user_thresh_min = div_factor_fine(cache->length,
3356 bargs->usage_min);
3357
3358 if (bargs->usage_max == 0)
3359 user_thresh_max = 1;
3360 else if (bargs->usage_max > 100)
3361 user_thresh_max = cache->length;
3362 else
3363 user_thresh_max = div_factor_fine(cache->length,
3364 bargs->usage_max);
3365
3366 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3367 ret = 0;
3368
3369 btrfs_put_block_group(cache);
3370 return ret;
3371}
3372
3373static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3374 u64 chunk_offset, struct btrfs_balance_args *bargs)
3375{
3376 struct btrfs_block_group *cache;
3377 u64 chunk_used, user_thresh;
3378 int ret = 1;
3379
3380 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3381 chunk_used = cache->used;
3382
3383 if (bargs->usage_min == 0)
3384 user_thresh = 1;
3385 else if (bargs->usage > 100)
3386 user_thresh = cache->length;
3387 else
3388 user_thresh = div_factor_fine(cache->length, bargs->usage);
3389
3390 if (chunk_used < user_thresh)
3391 ret = 0;
3392
3393 btrfs_put_block_group(cache);
3394 return ret;
3395}
3396
3397static int chunk_devid_filter(struct extent_buffer *leaf,
3398 struct btrfs_chunk *chunk,
3399 struct btrfs_balance_args *bargs)
3400{
3401 struct btrfs_stripe *stripe;
3402 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3403 int i;
3404
3405 for (i = 0; i < num_stripes; i++) {
3406 stripe = btrfs_stripe_nr(chunk, i);
3407 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3408 return 0;
3409 }
3410
3411 return 1;
3412}
3413
3414static u64 calc_data_stripes(u64 type, int num_stripes)
3415{
3416 const int index = btrfs_bg_flags_to_raid_index(type);
3417 const int ncopies = btrfs_raid_array[index].ncopies;
3418 const int nparity = btrfs_raid_array[index].nparity;
3419
3420 if (nparity)
3421 return num_stripes - nparity;
3422 else
3423 return num_stripes / ncopies;
3424}
3425
3426
3427static int chunk_drange_filter(struct extent_buffer *leaf,
3428 struct btrfs_chunk *chunk,
3429 struct btrfs_balance_args *bargs)
3430{
3431 struct btrfs_stripe *stripe;
3432 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3433 u64 stripe_offset;
3434 u64 stripe_length;
3435 u64 type;
3436 int factor;
3437 int i;
3438
3439 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3440 return 0;
3441
3442 type = btrfs_chunk_type(leaf, chunk);
3443 factor = calc_data_stripes(type, num_stripes);
3444
3445 for (i = 0; i < num_stripes; i++) {
3446 stripe = btrfs_stripe_nr(chunk, i);
3447 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3448 continue;
3449
3450 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3451 stripe_length = btrfs_chunk_length(leaf, chunk);
3452 stripe_length = div_u64(stripe_length, factor);
3453
3454 if (stripe_offset < bargs->pend &&
3455 stripe_offset + stripe_length > bargs->pstart)
3456 return 0;
3457 }
3458
3459 return 1;
3460}
3461
3462
3463static int chunk_vrange_filter(struct extent_buffer *leaf,
3464 struct btrfs_chunk *chunk,
3465 u64 chunk_offset,
3466 struct btrfs_balance_args *bargs)
3467{
3468 if (chunk_offset < bargs->vend &&
3469 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3470
3471 return 0;
3472
3473 return 1;
3474}
3475
3476static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3477 struct btrfs_chunk *chunk,
3478 struct btrfs_balance_args *bargs)
3479{
3480 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3481
3482 if (bargs->stripes_min <= num_stripes
3483 && num_stripes <= bargs->stripes_max)
3484 return 0;
3485
3486 return 1;
3487}
3488
3489static int chunk_soft_convert_filter(u64 chunk_type,
3490 struct btrfs_balance_args *bargs)
3491{
3492 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3493 return 0;
3494
3495 chunk_type = chunk_to_extended(chunk_type) &
3496 BTRFS_EXTENDED_PROFILE_MASK;
3497
3498 if (bargs->target == chunk_type)
3499 return 1;
3500
3501 return 0;
3502}
3503
3504static int should_balance_chunk(struct extent_buffer *leaf,
3505 struct btrfs_chunk *chunk, u64 chunk_offset)
3506{
3507 struct btrfs_fs_info *fs_info = leaf->fs_info;
3508 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3509 struct btrfs_balance_args *bargs = NULL;
3510 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3511
3512
3513 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3514 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3515 return 0;
3516 }
3517
3518 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3519 bargs = &bctl->data;
3520 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3521 bargs = &bctl->sys;
3522 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3523 bargs = &bctl->meta;
3524
3525
3526 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3527 chunk_profiles_filter(chunk_type, bargs)) {
3528 return 0;
3529 }
3530
3531
3532 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3533 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3534 return 0;
3535 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3536 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3537 return 0;
3538 }
3539
3540
3541 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3542 chunk_devid_filter(leaf, chunk, bargs)) {
3543 return 0;
3544 }
3545
3546
3547 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3548 chunk_drange_filter(leaf, chunk, bargs)) {
3549 return 0;
3550 }
3551
3552
3553 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3554 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3555 return 0;
3556 }
3557
3558
3559 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3560 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3561 return 0;
3562 }
3563
3564
3565 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3566 chunk_soft_convert_filter(chunk_type, bargs)) {
3567 return 0;
3568 }
3569
3570
3571
3572
3573 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3574 if (bargs->limit == 0)
3575 return 0;
3576 else
3577 bargs->limit--;
3578 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3579
3580
3581
3582
3583
3584 if (bargs->limit_max == 0)
3585 return 0;
3586 else
3587 bargs->limit_max--;
3588 }
3589
3590 return 1;
3591}
3592
3593static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3594{
3595 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3596 struct btrfs_root *chunk_root = fs_info->chunk_root;
3597 u64 chunk_type;
3598 struct btrfs_chunk *chunk;
3599 struct btrfs_path *path = NULL;
3600 struct btrfs_key key;
3601 struct btrfs_key found_key;
3602 struct extent_buffer *leaf;
3603 int slot;
3604 int ret;
3605 int enospc_errors = 0;
3606 bool counting = true;
3607
3608 u64 limit_data = bctl->data.limit;
3609 u64 limit_meta = bctl->meta.limit;
3610 u64 limit_sys = bctl->sys.limit;
3611 u32 count_data = 0;
3612 u32 count_meta = 0;
3613 u32 count_sys = 0;
3614 int chunk_reserved = 0;
3615
3616 path = btrfs_alloc_path();
3617 if (!path) {
3618 ret = -ENOMEM;
3619 goto error;
3620 }
3621
3622
3623 spin_lock(&fs_info->balance_lock);
3624 memset(&bctl->stat, 0, sizeof(bctl->stat));
3625 spin_unlock(&fs_info->balance_lock);
3626again:
3627 if (!counting) {
3628
3629
3630
3631
3632 bctl->data.limit = limit_data;
3633 bctl->meta.limit = limit_meta;
3634 bctl->sys.limit = limit_sys;
3635 }
3636 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3637 key.offset = (u64)-1;
3638 key.type = BTRFS_CHUNK_ITEM_KEY;
3639
3640 while (1) {
3641 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3642 atomic_read(&fs_info->balance_cancel_req)) {
3643 ret = -ECANCELED;
3644 goto error;
3645 }
3646
3647 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3648 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3649 if (ret < 0) {
3650 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3651 goto error;
3652 }
3653
3654
3655
3656
3657
3658 if (ret == 0)
3659 BUG();
3660
3661 ret = btrfs_previous_item(chunk_root, path, 0,
3662 BTRFS_CHUNK_ITEM_KEY);
3663 if (ret) {
3664 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3665 ret = 0;
3666 break;
3667 }
3668
3669 leaf = path->nodes[0];
3670 slot = path->slots[0];
3671 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3672
3673 if (found_key.objectid != key.objectid) {
3674 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3675 break;
3676 }
3677
3678 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3679 chunk_type = btrfs_chunk_type(leaf, chunk);
3680
3681 if (!counting) {
3682 spin_lock(&fs_info->balance_lock);
3683 bctl->stat.considered++;
3684 spin_unlock(&fs_info->balance_lock);
3685 }
3686
3687 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3688
3689 btrfs_release_path(path);
3690 if (!ret) {
3691 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3692 goto loop;
3693 }
3694
3695 if (counting) {
3696 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3697 spin_lock(&fs_info->balance_lock);
3698 bctl->stat.expected++;
3699 spin_unlock(&fs_info->balance_lock);
3700
3701 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3702 count_data++;
3703 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3704 count_sys++;
3705 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3706 count_meta++;
3707
3708 goto loop;
3709 }
3710
3711
3712
3713
3714
3715 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3716 count_data < bctl->data.limit_min)
3717 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3718 count_meta < bctl->meta.limit_min)
3719 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3720 count_sys < bctl->sys.limit_min)) {
3721 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3722 goto loop;
3723 }
3724
3725 if (!chunk_reserved) {
3726
3727
3728
3729
3730
3731
3732 ret = btrfs_may_alloc_data_chunk(fs_info,
3733 found_key.offset);
3734 if (ret < 0) {
3735 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3736 goto error;
3737 } else if (ret == 1) {
3738 chunk_reserved = 1;
3739 }
3740 }
3741
3742 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3743 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3744 if (ret == -ENOSPC) {
3745 enospc_errors++;
3746 } else if (ret == -ETXTBSY) {
3747 btrfs_info(fs_info,
3748 "skipping relocation of block group %llu due to active swapfile",
3749 found_key.offset);
3750 ret = 0;
3751 } else if (ret) {
3752 goto error;
3753 } else {
3754 spin_lock(&fs_info->balance_lock);
3755 bctl->stat.completed++;
3756 spin_unlock(&fs_info->balance_lock);
3757 }
3758loop:
3759 if (found_key.offset == 0)
3760 break;
3761 key.offset = found_key.offset - 1;
3762 }
3763
3764 if (counting) {
3765 btrfs_release_path(path);
3766 counting = false;
3767 goto again;
3768 }
3769error:
3770 btrfs_free_path(path);
3771 if (enospc_errors) {
3772 btrfs_info(fs_info, "%d enospc errors during balance",
3773 enospc_errors);
3774 if (!ret)
3775 ret = -ENOSPC;
3776 }
3777
3778 return ret;
3779}
3780
3781
3782
3783
3784
3785
3786static int alloc_profile_is_valid(u64 flags, int extended)
3787{
3788 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3789 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3790
3791 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3792
3793
3794 if (flags & ~mask)
3795 return 0;
3796
3797
3798 if (flags == 0)
3799 return !extended;
3800
3801 return has_single_bit_set(flags);
3802}
3803
3804static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3805{
3806
3807 return atomic_read(&fs_info->balance_cancel_req) ||
3808 (atomic_read(&fs_info->balance_pause_req) == 0 &&
3809 atomic_read(&fs_info->balance_cancel_req) == 0);
3810}
3811
3812
3813
3814
3815
3816static inline int validate_convert_profile(struct btrfs_fs_info *fs_info,
3817 const struct btrfs_balance_args *bargs,
3818 u64 allowed, const char *type)
3819{
3820 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3821 return true;
3822
3823
3824 if (alloc_profile_is_valid(bargs->target, 1) &&
3825 (bargs->target & ~allowed) == 0)
3826 return true;
3827
3828 btrfs_err(fs_info, "balance: invalid convert %s profile %s",
3829 type, btrfs_bg_type_to_raid_name(bargs->target));
3830 return false;
3831}
3832
3833
3834
3835
3836
3837
3838static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
3839 u32 size_buf)
3840{
3841 int ret;
3842 u32 size_bp = size_buf;
3843 char *bp = buf;
3844 u64 flags = bargs->flags;
3845 char tmp_buf[128] = {'\0'};
3846
3847 if (!flags)
3848 return;
3849
3850#define CHECK_APPEND_NOARG(a) \
3851 do { \
3852 ret = snprintf(bp, size_bp, (a)); \
3853 if (ret < 0 || ret >= size_bp) \
3854 goto out_overflow; \
3855 size_bp -= ret; \
3856 bp += ret; \
3857 } while (0)
3858
3859#define CHECK_APPEND_1ARG(a, v1) \
3860 do { \
3861 ret = snprintf(bp, size_bp, (a), (v1)); \
3862 if (ret < 0 || ret >= size_bp) \
3863 goto out_overflow; \
3864 size_bp -= ret; \
3865 bp += ret; \
3866 } while (0)
3867
3868#define CHECK_APPEND_2ARG(a, v1, v2) \
3869 do { \
3870 ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
3871 if (ret < 0 || ret >= size_bp) \
3872 goto out_overflow; \
3873 size_bp -= ret; \
3874 bp += ret; \
3875 } while (0)
3876
3877 if (flags & BTRFS_BALANCE_ARGS_CONVERT)
3878 CHECK_APPEND_1ARG("convert=%s,",
3879 btrfs_bg_type_to_raid_name(bargs->target));
3880
3881 if (flags & BTRFS_BALANCE_ARGS_SOFT)
3882 CHECK_APPEND_NOARG("soft,");
3883
3884 if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
3885 btrfs_describe_block_groups(bargs->profiles, tmp_buf,
3886 sizeof(tmp_buf));
3887 CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
3888 }
3889
3890 if (flags & BTRFS_BALANCE_ARGS_USAGE)
3891 CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
3892
3893 if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
3894 CHECK_APPEND_2ARG("usage=%u..%u,",
3895 bargs->usage_min, bargs->usage_max);
3896
3897 if (flags & BTRFS_BALANCE_ARGS_DEVID)
3898 CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
3899
3900 if (flags & BTRFS_BALANCE_ARGS_DRANGE)
3901 CHECK_APPEND_2ARG("drange=%llu..%llu,",
3902 bargs->pstart, bargs->pend);
3903
3904 if (flags & BTRFS_BALANCE_ARGS_VRANGE)
3905 CHECK_APPEND_2ARG("vrange=%llu..%llu,",
3906 bargs->vstart, bargs->vend);
3907
3908 if (flags & BTRFS_BALANCE_ARGS_LIMIT)
3909 CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
3910
3911 if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
3912 CHECK_APPEND_2ARG("limit=%u..%u,",
3913 bargs->limit_min, bargs->limit_max);
3914
3915 if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
3916 CHECK_APPEND_2ARG("stripes=%u..%u,",
3917 bargs->stripes_min, bargs->stripes_max);
3918
3919#undef CHECK_APPEND_2ARG
3920#undef CHECK_APPEND_1ARG
3921#undef CHECK_APPEND_NOARG
3922
3923out_overflow:
3924
3925 if (size_bp < size_buf)
3926 buf[size_buf - size_bp - 1] = '\0';
3927 else
3928 buf[0] = '\0';
3929}
3930
3931static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
3932{
3933 u32 size_buf = 1024;
3934 char tmp_buf[192] = {'\0'};
3935 char *buf;
3936 char *bp;
3937 u32 size_bp = size_buf;
3938 int ret;
3939 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3940
3941 buf = kzalloc(size_buf, GFP_KERNEL);
3942 if (!buf)
3943 return;
3944
3945 bp = buf;
3946
3947#define CHECK_APPEND_1ARG(a, v1) \
3948 do { \
3949 ret = snprintf(bp, size_bp, (a), (v1)); \
3950 if (ret < 0 || ret >= size_bp) \
3951 goto out_overflow; \
3952 size_bp -= ret; \
3953 bp += ret; \
3954 } while (0)
3955
3956 if (bctl->flags & BTRFS_BALANCE_FORCE)
3957 CHECK_APPEND_1ARG("%s", "-f ");
3958
3959 if (bctl->flags & BTRFS_BALANCE_DATA) {
3960 describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
3961 CHECK_APPEND_1ARG("-d%s ", tmp_buf);
3962 }
3963
3964 if (bctl->flags & BTRFS_BALANCE_METADATA) {
3965 describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
3966 CHECK_APPEND_1ARG("-m%s ", tmp_buf);
3967 }
3968
3969 if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
3970 describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
3971 CHECK_APPEND_1ARG("-s%s ", tmp_buf);
3972 }
3973
3974#undef CHECK_APPEND_1ARG
3975
3976out_overflow:
3977
3978 if (size_bp < size_buf)
3979 buf[size_buf - size_bp - 1] = '\0';
3980 btrfs_info(fs_info, "balance: %s %s",
3981 (bctl->flags & BTRFS_BALANCE_RESUME) ?
3982 "resume" : "start", buf);
3983
3984 kfree(buf);
3985}
3986
3987
3988
3989
3990int btrfs_balance(struct btrfs_fs_info *fs_info,
3991 struct btrfs_balance_control *bctl,
3992 struct btrfs_ioctl_balance_args *bargs)
3993{
3994 u64 meta_target, data_target;
3995 u64 allowed;
3996 int mixed = 0;
3997 int ret;
3998 u64 num_devices;
3999 unsigned seq;
4000 bool reducing_redundancy;
4001 int i;
4002
4003 if (btrfs_fs_closing(fs_info) ||
4004 atomic_read(&fs_info->balance_pause_req) ||
4005 btrfs_should_cancel_balance(fs_info)) {
4006 ret = -EINVAL;
4007 goto out;
4008 }
4009
4010 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
4011 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
4012 mixed = 1;
4013
4014
4015
4016
4017
4018 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
4019 if (mixed && (bctl->flags & allowed)) {
4020 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
4021 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
4022 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
4023 btrfs_err(fs_info,
4024 "balance: mixed groups data and metadata options must be the same");
4025 ret = -EINVAL;
4026 goto out;
4027 }
4028 }
4029
4030
4031
4032
4033
4034 num_devices = fs_info->fs_devices->rw_devices;
4035
4036
4037
4038
4039
4040
4041 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
4042 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
4043 if (num_devices >= btrfs_raid_array[i].devs_min)
4044 allowed |= btrfs_raid_array[i].bg_flag;
4045
4046 if (!validate_convert_profile(fs_info, &bctl->data, allowed, "data") ||
4047 !validate_convert_profile(fs_info, &bctl->meta, allowed, "metadata") ||
4048 !validate_convert_profile(fs_info, &bctl->sys, allowed, "system")) {
4049 ret = -EINVAL;
4050 goto out;
4051 }
4052
4053
4054
4055
4056
4057 allowed = 0;
4058 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
4059 if (btrfs_raid_array[i].ncopies >= 2 ||
4060 btrfs_raid_array[i].tolerated_failures >= 1)
4061 allowed |= btrfs_raid_array[i].bg_flag;
4062 }
4063 do {
4064 seq = read_seqbegin(&fs_info->profiles_lock);
4065
4066 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4067 (fs_info->avail_system_alloc_bits & allowed) &&
4068 !(bctl->sys.target & allowed)) ||
4069 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4070 (fs_info->avail_metadata_alloc_bits & allowed) &&
4071 !(bctl->meta.target & allowed)))
4072 reducing_redundancy = true;
4073 else
4074 reducing_redundancy = false;
4075
4076
4077 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4078 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
4079 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4080 bctl->data.target : fs_info->avail_data_alloc_bits;
4081 } while (read_seqretry(&fs_info->profiles_lock, seq));
4082
4083 if (reducing_redundancy) {
4084 if (bctl->flags & BTRFS_BALANCE_FORCE) {
4085 btrfs_info(fs_info,
4086 "balance: force reducing metadata redundancy");
4087 } else {
4088 btrfs_err(fs_info,
4089 "balance: reduces metadata redundancy, use --force if you want this");
4090 ret = -EINVAL;
4091 goto out;
4092 }
4093 }
4094
4095 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
4096 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
4097 btrfs_warn(fs_info,
4098 "balance: metadata profile %s has lower redundancy than data profile %s",
4099 btrfs_bg_type_to_raid_name(meta_target),
4100 btrfs_bg_type_to_raid_name(data_target));
4101 }
4102
4103 if (fs_info->send_in_progress) {
4104 btrfs_warn_rl(fs_info,
4105"cannot run balance while send operations are in progress (%d in progress)",
4106 fs_info->send_in_progress);
4107 ret = -EAGAIN;
4108 goto out;
4109 }
4110
4111 ret = insert_balance_item(fs_info, bctl);
4112 if (ret && ret != -EEXIST)
4113 goto out;
4114
4115 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
4116 BUG_ON(ret == -EEXIST);
4117 BUG_ON(fs_info->balance_ctl);
4118 spin_lock(&fs_info->balance_lock);
4119 fs_info->balance_ctl = bctl;
4120 spin_unlock(&fs_info->balance_lock);
4121 } else {
4122 BUG_ON(ret != -EEXIST);
4123 spin_lock(&fs_info->balance_lock);
4124 update_balance_args(bctl);
4125 spin_unlock(&fs_info->balance_lock);
4126 }
4127
4128 ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4129 set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4130 describe_balance_start_or_resume(fs_info);
4131 mutex_unlock(&fs_info->balance_mutex);
4132
4133 ret = __btrfs_balance(fs_info);
4134
4135 mutex_lock(&fs_info->balance_mutex);
4136 if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
4137 btrfs_info(fs_info, "balance: paused");
4138 else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req))
4139 btrfs_info(fs_info, "balance: canceled");
4140 else
4141 btrfs_info(fs_info, "balance: ended with status: %d", ret);
4142
4143 clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4144
4145 if (bargs) {
4146 memset(bargs, 0, sizeof(*bargs));
4147 btrfs_update_ioctl_balance_args(fs_info, bargs);
4148 }
4149
4150 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4151 balance_need_close(fs_info)) {
4152 reset_balance_state(fs_info);
4153 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4154 }
4155
4156 wake_up(&fs_info->balance_wait_q);
4157
4158 return ret;
4159out:
4160 if (bctl->flags & BTRFS_BALANCE_RESUME)
4161 reset_balance_state(fs_info);
4162 else
4163 kfree(bctl);
4164 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4165
4166 return ret;
4167}
4168
4169static int balance_kthread(void *data)
4170{
4171 struct btrfs_fs_info *fs_info = data;
4172 int ret = 0;
4173
4174 mutex_lock(&fs_info->balance_mutex);
4175 if (fs_info->balance_ctl)
4176 ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
4177 mutex_unlock(&fs_info->balance_mutex);
4178
4179 return ret;
4180}
4181
4182int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4183{
4184 struct task_struct *tsk;
4185
4186 mutex_lock(&fs_info->balance_mutex);
4187 if (!fs_info->balance_ctl) {
4188 mutex_unlock(&fs_info->balance_mutex);
4189 return 0;
4190 }
4191 mutex_unlock(&fs_info->balance_mutex);
4192
4193 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4194 btrfs_info(fs_info, "balance: resume skipped");
4195 return 0;
4196 }
4197
4198
4199
4200
4201
4202
4203 spin_lock(&fs_info->balance_lock);
4204 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4205 spin_unlock(&fs_info->balance_lock);
4206
4207 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4208 return PTR_ERR_OR_ZERO(tsk);
4209}
4210
4211int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4212{
4213 struct btrfs_balance_control *bctl;
4214 struct btrfs_balance_item *item;
4215 struct btrfs_disk_balance_args disk_bargs;
4216 struct btrfs_path *path;
4217 struct extent_buffer *leaf;
4218 struct btrfs_key key;
4219 int ret;
4220
4221 path = btrfs_alloc_path();
4222 if (!path)
4223 return -ENOMEM;
4224
4225 key.objectid = BTRFS_BALANCE_OBJECTID;
4226 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4227 key.offset = 0;
4228
4229 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4230 if (ret < 0)
4231 goto out;
4232 if (ret > 0) {
4233 ret = 0;
4234 goto out;
4235 }
4236
4237 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4238 if (!bctl) {
4239 ret = -ENOMEM;
4240 goto out;
4241 }
4242
4243 leaf = path->nodes[0];
4244 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4245
4246 bctl->flags = btrfs_balance_flags(leaf, item);
4247 bctl->flags |= BTRFS_BALANCE_RESUME;
4248
4249 btrfs_balance_data(leaf, item, &disk_bargs);
4250 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4251 btrfs_balance_meta(leaf, item, &disk_bargs);
4252 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4253 btrfs_balance_sys(leaf, item, &disk_bargs);
4254 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
4267 btrfs_warn(fs_info,
4268 "balance: cannot set exclusive op status, resume manually");
4269
4270 mutex_lock(&fs_info->balance_mutex);
4271 BUG_ON(fs_info->balance_ctl);
4272 spin_lock(&fs_info->balance_lock);
4273 fs_info->balance_ctl = bctl;
4274 spin_unlock(&fs_info->balance_lock);
4275 mutex_unlock(&fs_info->balance_mutex);
4276out:
4277 btrfs_free_path(path);
4278 return ret;
4279}
4280
4281int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4282{
4283 int ret = 0;
4284
4285 mutex_lock(&fs_info->balance_mutex);
4286 if (!fs_info->balance_ctl) {
4287 mutex_unlock(&fs_info->balance_mutex);
4288 return -ENOTCONN;
4289 }
4290
4291 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4292 atomic_inc(&fs_info->balance_pause_req);
4293 mutex_unlock(&fs_info->balance_mutex);
4294
4295 wait_event(fs_info->balance_wait_q,
4296 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4297
4298 mutex_lock(&fs_info->balance_mutex);
4299
4300 BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4301 atomic_dec(&fs_info->balance_pause_req);
4302 } else {
4303 ret = -ENOTCONN;
4304 }
4305
4306 mutex_unlock(&fs_info->balance_mutex);
4307 return ret;
4308}
4309
4310int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4311{
4312 mutex_lock(&fs_info->balance_mutex);
4313 if (!fs_info->balance_ctl) {
4314 mutex_unlock(&fs_info->balance_mutex);
4315 return -ENOTCONN;
4316 }
4317
4318
4319
4320
4321
4322
4323 if (sb_rdonly(fs_info->sb)) {
4324 mutex_unlock(&fs_info->balance_mutex);
4325 return -EROFS;
4326 }
4327
4328 atomic_inc(&fs_info->balance_cancel_req);
4329
4330
4331
4332
4333 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4334 mutex_unlock(&fs_info->balance_mutex);
4335 wait_event(fs_info->balance_wait_q,
4336 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4337 mutex_lock(&fs_info->balance_mutex);
4338 } else {
4339 mutex_unlock(&fs_info->balance_mutex);
4340
4341
4342
4343
4344 mutex_lock(&fs_info->balance_mutex);
4345
4346 if (fs_info->balance_ctl) {
4347 reset_balance_state(fs_info);
4348 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4349 btrfs_info(fs_info, "balance: canceled");
4350 }
4351 }
4352
4353 BUG_ON(fs_info->balance_ctl ||
4354 test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4355 atomic_dec(&fs_info->balance_cancel_req);
4356 mutex_unlock(&fs_info->balance_mutex);
4357 return 0;
4358}
4359
4360int btrfs_uuid_scan_kthread(void *data)
4361{
4362 struct btrfs_fs_info *fs_info = data;
4363 struct btrfs_root *root = fs_info->tree_root;
4364 struct btrfs_key key;
4365 struct btrfs_path *path = NULL;
4366 int ret = 0;
4367 struct extent_buffer *eb;
4368 int slot;
4369 struct btrfs_root_item root_item;
4370 u32 item_size;
4371 struct btrfs_trans_handle *trans = NULL;
4372 bool closing = false;
4373
4374 path = btrfs_alloc_path();
4375 if (!path) {
4376 ret = -ENOMEM;
4377 goto out;
4378 }
4379
4380 key.objectid = 0;
4381 key.type = BTRFS_ROOT_ITEM_KEY;
4382 key.offset = 0;
4383
4384 while (1) {
4385 if (btrfs_fs_closing(fs_info)) {
4386 closing = true;
4387 break;
4388 }
4389 ret = btrfs_search_forward(root, &key, path,
4390 BTRFS_OLDEST_GENERATION);
4391 if (ret) {
4392 if (ret > 0)
4393 ret = 0;
4394 break;
4395 }
4396
4397 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4398 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4399 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4400 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4401 goto skip;
4402
4403 eb = path->nodes[0];
4404 slot = path->slots[0];
4405 item_size = btrfs_item_size_nr(eb, slot);
4406 if (item_size < sizeof(root_item))
4407 goto skip;
4408
4409 read_extent_buffer(eb, &root_item,
4410 btrfs_item_ptr_offset(eb, slot),
4411 (int)sizeof(root_item));
4412 if (btrfs_root_refs(&root_item) == 0)
4413 goto skip;
4414
4415 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4416 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4417 if (trans)
4418 goto update_tree;
4419
4420 btrfs_release_path(path);
4421
4422
4423
4424
4425 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4426 if (IS_ERR(trans)) {
4427 ret = PTR_ERR(trans);
4428 break;
4429 }
4430 continue;
4431 } else {
4432 goto skip;
4433 }
4434update_tree:
4435 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4436 ret = btrfs_uuid_tree_add(trans, root_item.uuid,
4437 BTRFS_UUID_KEY_SUBVOL,
4438 key.objectid);
4439 if (ret < 0) {
4440 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4441 ret);
4442 break;
4443 }
4444 }
4445
4446 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4447 ret = btrfs_uuid_tree_add(trans,
4448 root_item.received_uuid,
4449 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4450 key.objectid);
4451 if (ret < 0) {
4452 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4453 ret);
4454 break;
4455 }
4456 }
4457
4458skip:
4459 if (trans) {
4460 ret = btrfs_end_transaction(trans);
4461 trans = NULL;
4462 if (ret)
4463 break;
4464 }
4465
4466 btrfs_release_path(path);
4467 if (key.offset < (u64)-1) {
4468 key.offset++;
4469 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4470 key.offset = 0;
4471 key.type = BTRFS_ROOT_ITEM_KEY;
4472 } else if (key.objectid < (u64)-1) {
4473 key.offset = 0;
4474 key.type = BTRFS_ROOT_ITEM_KEY;
4475 key.objectid++;
4476 } else {
4477 break;
4478 }
4479 cond_resched();
4480 }
4481
4482out:
4483 btrfs_free_path(path);
4484 if (trans && !IS_ERR(trans))
4485 btrfs_end_transaction(trans);
4486 if (ret)
4487 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4488 else if (!closing)
4489 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4490 up(&fs_info->uuid_tree_rescan_sem);
4491 return 0;
4492}
4493
4494int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4495{
4496 struct btrfs_trans_handle *trans;
4497 struct btrfs_root *tree_root = fs_info->tree_root;
4498 struct btrfs_root *uuid_root;
4499 struct task_struct *task;
4500 int ret;
4501
4502
4503
4504
4505
4506 trans = btrfs_start_transaction(tree_root, 2);
4507 if (IS_ERR(trans))
4508 return PTR_ERR(trans);
4509
4510 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4511 if (IS_ERR(uuid_root)) {
4512 ret = PTR_ERR(uuid_root);
4513 btrfs_abort_transaction(trans, ret);
4514 btrfs_end_transaction(trans);
4515 return ret;
4516 }
4517
4518 fs_info->uuid_root = uuid_root;
4519
4520 ret = btrfs_commit_transaction(trans);
4521 if (ret)
4522 return ret;
4523
4524 down(&fs_info->uuid_tree_rescan_sem);
4525 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4526 if (IS_ERR(task)) {
4527
4528 btrfs_warn(fs_info, "failed to start uuid_scan task");
4529 up(&fs_info->uuid_tree_rescan_sem);
4530 return PTR_ERR(task);
4531 }
4532
4533 return 0;
4534}
4535
4536
4537
4538
4539
4540
4541int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4542{
4543 struct btrfs_fs_info *fs_info = device->fs_info;
4544 struct btrfs_root *root = fs_info->dev_root;
4545 struct btrfs_trans_handle *trans;
4546 struct btrfs_dev_extent *dev_extent = NULL;
4547 struct btrfs_path *path;
4548 u64 length;
4549 u64 chunk_offset;
4550 int ret;
4551 int slot;
4552 int failed = 0;
4553 bool retried = false;
4554 struct extent_buffer *l;
4555 struct btrfs_key key;
4556 struct btrfs_super_block *super_copy = fs_info->super_copy;
4557 u64 old_total = btrfs_super_total_bytes(super_copy);
4558 u64 old_size = btrfs_device_get_total_bytes(device);
4559 u64 diff;
4560 u64 start;
4561
4562 new_size = round_down(new_size, fs_info->sectorsize);
4563 start = new_size;
4564 diff = round_down(old_size - new_size, fs_info->sectorsize);
4565
4566 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4567 return -EINVAL;
4568
4569 path = btrfs_alloc_path();
4570 if (!path)
4571 return -ENOMEM;
4572
4573 path->reada = READA_BACK;
4574
4575 trans = btrfs_start_transaction(root, 0);
4576 if (IS_ERR(trans)) {
4577 btrfs_free_path(path);
4578 return PTR_ERR(trans);
4579 }
4580
4581 mutex_lock(&fs_info->chunk_mutex);
4582
4583 btrfs_device_set_total_bytes(device, new_size);
4584 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4585 device->fs_devices->total_rw_bytes -= diff;
4586 atomic64_sub(diff, &fs_info->free_chunk_space);
4587 }
4588
4589
4590
4591
4592
4593
4594 if (contains_pending_extent(device, &start, diff)) {
4595 mutex_unlock(&fs_info->chunk_mutex);
4596 ret = btrfs_commit_transaction(trans);
4597 if (ret)
4598 goto done;
4599 } else {
4600 mutex_unlock(&fs_info->chunk_mutex);
4601 btrfs_end_transaction(trans);
4602 }
4603
4604again:
4605 key.objectid = device->devid;
4606 key.offset = (u64)-1;
4607 key.type = BTRFS_DEV_EXTENT_KEY;
4608
4609 do {
4610 mutex_lock(&fs_info->delete_unused_bgs_mutex);
4611 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4612 if (ret < 0) {
4613 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4614 goto done;
4615 }
4616
4617 ret = btrfs_previous_item(root, path, 0, key.type);
4618 if (ret)
4619 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4620 if (ret < 0)
4621 goto done;
4622 if (ret) {
4623 ret = 0;
4624 btrfs_release_path(path);
4625 break;
4626 }
4627
4628 l = path->nodes[0];
4629 slot = path->slots[0];
4630 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4631
4632 if (key.objectid != device->devid) {
4633 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4634 btrfs_release_path(path);
4635 break;
4636 }
4637
4638 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4639 length = btrfs_dev_extent_length(l, dev_extent);
4640
4641 if (key.offset + length <= new_size) {
4642 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4643 btrfs_release_path(path);
4644 break;
4645 }
4646
4647 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4648 btrfs_release_path(path);
4649
4650
4651
4652
4653
4654
4655
4656 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4657 if (ret < 0) {
4658 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4659 goto done;
4660 }
4661
4662 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4663 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4664 if (ret == -ENOSPC) {
4665 failed++;
4666 } else if (ret) {
4667 if (ret == -ETXTBSY) {
4668 btrfs_warn(fs_info,
4669 "could not shrink block group %llu due to active swapfile",
4670 chunk_offset);
4671 }
4672 goto done;
4673 }
4674 } while (key.offset-- > 0);
4675
4676 if (failed && !retried) {
4677 failed = 0;
4678 retried = true;
4679 goto again;
4680 } else if (failed && retried) {
4681 ret = -ENOSPC;
4682 goto done;
4683 }
4684
4685
4686 trans = btrfs_start_transaction(root, 0);
4687 if (IS_ERR(trans)) {
4688 ret = PTR_ERR(trans);
4689 goto done;
4690 }
4691
4692 mutex_lock(&fs_info->chunk_mutex);
4693 btrfs_device_set_disk_total_bytes(device, new_size);
4694 if (list_empty(&device->post_commit_list))
4695 list_add_tail(&device->post_commit_list,
4696 &trans->transaction->dev_update_list);
4697
4698 WARN_ON(diff > old_total);
4699 btrfs_set_super_total_bytes(super_copy,
4700 round_down(old_total - diff, fs_info->sectorsize));
4701 mutex_unlock(&fs_info->chunk_mutex);
4702
4703
4704 ret = btrfs_update_device(trans, device);
4705 if (ret < 0) {
4706 btrfs_abort_transaction(trans, ret);
4707 btrfs_end_transaction(trans);
4708 } else {
4709 ret = btrfs_commit_transaction(trans);
4710 }
4711done:
4712 btrfs_free_path(path);
4713 if (ret) {
4714 mutex_lock(&fs_info->chunk_mutex);
4715 btrfs_device_set_total_bytes(device, old_size);
4716 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4717 device->fs_devices->total_rw_bytes += diff;
4718 atomic64_add(diff, &fs_info->free_chunk_space);
4719 mutex_unlock(&fs_info->chunk_mutex);
4720 }
4721 return ret;
4722}
4723
4724static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4725 struct btrfs_key *key,
4726 struct btrfs_chunk *chunk, int item_size)
4727{
4728 struct btrfs_super_block *super_copy = fs_info->super_copy;
4729 struct btrfs_disk_key disk_key;
4730 u32 array_size;
4731 u8 *ptr;
4732
4733 mutex_lock(&fs_info->chunk_mutex);
4734 array_size = btrfs_super_sys_array_size(super_copy);
4735 if (array_size + item_size + sizeof(disk_key)
4736 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
4737 mutex_unlock(&fs_info->chunk_mutex);
4738 return -EFBIG;
4739 }
4740
4741 ptr = super_copy->sys_chunk_array + array_size;
4742 btrfs_cpu_key_to_disk(&disk_key, key);
4743 memcpy(ptr, &disk_key, sizeof(disk_key));
4744 ptr += sizeof(disk_key);
4745 memcpy(ptr, chunk, item_size);
4746 item_size += sizeof(disk_key);
4747 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4748 mutex_unlock(&fs_info->chunk_mutex);
4749
4750 return 0;
4751}
4752
4753
4754
4755
4756static int btrfs_cmp_device_info(const void *a, const void *b)
4757{
4758 const struct btrfs_device_info *di_a = a;
4759 const struct btrfs_device_info *di_b = b;
4760
4761 if (di_a->max_avail > di_b->max_avail)
4762 return -1;
4763 if (di_a->max_avail < di_b->max_avail)
4764 return 1;
4765 if (di_a->total_avail > di_b->total_avail)
4766 return -1;
4767 if (di_a->total_avail < di_b->total_avail)
4768 return 1;
4769 return 0;
4770}
4771
4772static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4773{
4774 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4775 return;
4776
4777 btrfs_set_fs_incompat(info, RAID56);
4778}
4779
4780static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
4781{
4782 if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)))
4783 return;
4784
4785 btrfs_set_fs_incompat(info, RAID1C34);
4786}
4787
4788
4789
4790
4791
4792struct alloc_chunk_ctl {
4793 u64 start;
4794 u64 type;
4795
4796 int num_stripes;
4797
4798 int sub_stripes;
4799
4800 int dev_stripes;
4801
4802 int devs_max;
4803
4804 int devs_min;
4805
4806 int devs_increment;
4807
4808 int ncopies;
4809
4810 int nparity;
4811 u64 max_stripe_size;
4812 u64 max_chunk_size;
4813 u64 dev_extent_min;
4814 u64 stripe_size;
4815 u64 chunk_size;
4816 int ndevs;
4817};
4818
4819static void init_alloc_chunk_ctl_policy_regular(
4820 struct btrfs_fs_devices *fs_devices,
4821 struct alloc_chunk_ctl *ctl)
4822{
4823 u64 type = ctl->type;
4824
4825 if (type & BTRFS_BLOCK_GROUP_DATA) {
4826 ctl->max_stripe_size = SZ_1G;
4827 ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
4828 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
4829
4830 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
4831 ctl->max_stripe_size = SZ_1G;
4832 else
4833 ctl->max_stripe_size = SZ_256M;
4834 ctl->max_chunk_size = ctl->max_stripe_size;
4835 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
4836 ctl->max_stripe_size = SZ_32M;
4837 ctl->max_chunk_size = 2 * ctl->max_stripe_size;
4838 ctl->devs_max = min_t(int, ctl->devs_max,
4839 BTRFS_MAX_DEVS_SYS_CHUNK);
4840 } else {
4841 BUG();
4842 }
4843
4844
4845 ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
4846 ctl->max_chunk_size);
4847 ctl->dev_extent_min = BTRFS_STRIPE_LEN * ctl->dev_stripes;
4848}
4849
4850static void init_alloc_chunk_ctl(struct btrfs_fs_devices *fs_devices,
4851 struct alloc_chunk_ctl *ctl)
4852{
4853 int index = btrfs_bg_flags_to_raid_index(ctl->type);
4854
4855 ctl->sub_stripes = btrfs_raid_array[index].sub_stripes;
4856 ctl->dev_stripes = btrfs_raid_array[index].dev_stripes;
4857 ctl->devs_max = btrfs_raid_array[index].devs_max;
4858 if (!ctl->devs_max)
4859 ctl->devs_max = BTRFS_MAX_DEVS(fs_devices->fs_info);
4860 ctl->devs_min = btrfs_raid_array[index].devs_min;
4861 ctl->devs_increment = btrfs_raid_array[index].devs_increment;
4862 ctl->ncopies = btrfs_raid_array[index].ncopies;
4863 ctl->nparity = btrfs_raid_array[index].nparity;
4864 ctl->ndevs = 0;
4865
4866 switch (fs_devices->chunk_alloc_policy) {
4867 case BTRFS_CHUNK_ALLOC_REGULAR:
4868 init_alloc_chunk_ctl_policy_regular(fs_devices, ctl);
4869 break;
4870 default:
4871 BUG();
4872 }
4873}
4874
4875static int gather_device_info(struct btrfs_fs_devices *fs_devices,
4876 struct alloc_chunk_ctl *ctl,
4877 struct btrfs_device_info *devices_info)
4878{
4879 struct btrfs_fs_info *info = fs_devices->fs_info;
4880 struct btrfs_device *device;
4881 u64 total_avail;
4882 u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes;
4883 int ret;
4884 int ndevs = 0;
4885 u64 max_avail;
4886 u64 dev_offset;
4887
4888
4889
4890
4891
4892 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
4893 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4894 WARN(1, KERN_ERR
4895 "BTRFS: read-only device in alloc_list\n");
4896 continue;
4897 }
4898
4899 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
4900 &device->dev_state) ||
4901 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4902 continue;
4903
4904 if (device->total_bytes > device->bytes_used)
4905 total_avail = device->total_bytes - device->bytes_used;
4906 else
4907 total_avail = 0;
4908
4909
4910 if (total_avail < ctl->dev_extent_min)
4911 continue;
4912
4913 ret = find_free_dev_extent(device, dev_extent_want, &dev_offset,
4914 &max_avail);
4915 if (ret && ret != -ENOSPC)
4916 return ret;
4917
4918 if (ret == 0)
4919 max_avail = dev_extent_want;
4920
4921 if (max_avail < ctl->dev_extent_min) {
4922 if (btrfs_test_opt(info, ENOSPC_DEBUG))
4923 btrfs_debug(info,
4924 "%s: devid %llu has no free space, have=%llu want=%llu",
4925 __func__, device->devid, max_avail,
4926 ctl->dev_extent_min);
4927 continue;
4928 }
4929
4930 if (ndevs == fs_devices->rw_devices) {
4931 WARN(1, "%s: found more than %llu devices\n",
4932 __func__, fs_devices->rw_devices);
4933 break;
4934 }
4935 devices_info[ndevs].dev_offset = dev_offset;
4936 devices_info[ndevs].max_avail = max_avail;
4937 devices_info[ndevs].total_avail = total_avail;
4938 devices_info[ndevs].dev = device;
4939 ++ndevs;
4940 }
4941 ctl->ndevs = ndevs;
4942
4943
4944
4945
4946 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
4947 btrfs_cmp_device_info, NULL);
4948
4949 return 0;
4950}
4951
4952static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
4953 struct btrfs_device_info *devices_info)
4954{
4955
4956 int data_stripes;
4957
4958
4959
4960
4961
4962
4963
4964
4965 ctl->stripe_size = div_u64(devices_info[ctl->ndevs - 1].max_avail,
4966 ctl->dev_stripes);
4967 ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
4968
4969
4970 data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
4971
4972
4973
4974
4975
4976
4977
4978 if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
4979
4980
4981
4982
4983
4984 ctl->stripe_size = min(round_up(div_u64(ctl->max_chunk_size,
4985 data_stripes), SZ_16M),
4986 ctl->stripe_size);
4987 }
4988
4989
4990 ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
4991 ctl->chunk_size = ctl->stripe_size * data_stripes;
4992
4993 return 0;
4994}
4995
4996static int decide_stripe_size(struct btrfs_fs_devices *fs_devices,
4997 struct alloc_chunk_ctl *ctl,
4998 struct btrfs_device_info *devices_info)
4999{
5000 struct btrfs_fs_info *info = fs_devices->fs_info;
5001
5002
5003
5004
5005
5006
5007 ctl->ndevs = rounddown(ctl->ndevs, ctl->devs_increment);
5008
5009 if (ctl->ndevs < ctl->devs_min) {
5010 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
5011 btrfs_debug(info,
5012 "%s: not enough devices with free space: have=%d minimum required=%d",
5013 __func__, ctl->ndevs, ctl->devs_min);
5014 }
5015 return -ENOSPC;
5016 }
5017
5018 ctl->ndevs = min(ctl->ndevs, ctl->devs_max);
5019
5020 switch (fs_devices->chunk_alloc_policy) {
5021 case BTRFS_CHUNK_ALLOC_REGULAR:
5022 return decide_stripe_size_regular(ctl, devices_info);
5023 default:
5024 BUG();
5025 }
5026}
5027
5028static int create_chunk(struct btrfs_trans_handle *trans,
5029 struct alloc_chunk_ctl *ctl,
5030 struct btrfs_device_info *devices_info)
5031{
5032 struct btrfs_fs_info *info = trans->fs_info;
5033 struct map_lookup *map = NULL;
5034 struct extent_map_tree *em_tree;
5035 struct extent_map *em;
5036 u64 start = ctl->start;
5037 u64 type = ctl->type;
5038 int ret;
5039 int i;
5040 int j;
5041
5042 map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS);
5043 if (!map)
5044 return -ENOMEM;
5045 map->num_stripes = ctl->num_stripes;
5046
5047 for (i = 0; i < ctl->ndevs; ++i) {
5048 for (j = 0; j < ctl->dev_stripes; ++j) {
5049 int s = i * ctl->dev_stripes + j;
5050 map->stripes[s].dev = devices_info[i].dev;
5051 map->stripes[s].physical = devices_info[i].dev_offset +
5052 j * ctl->stripe_size;
5053 }
5054 }
5055 map->stripe_len = BTRFS_STRIPE_LEN;
5056 map->io_align = BTRFS_STRIPE_LEN;
5057 map->io_width = BTRFS_STRIPE_LEN;
5058 map->type = type;
5059 map->sub_stripes = ctl->sub_stripes;
5060
5061 trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size);
5062
5063 em = alloc_extent_map();
5064 if (!em) {
5065 kfree(map);
5066 return -ENOMEM;
5067 }
5068 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5069 em->map_lookup = map;
5070 em->start = start;
5071 em->len = ctl->chunk_size;
5072 em->block_start = 0;
5073 em->block_len = em->len;
5074 em->orig_block_len = ctl->stripe_size;
5075
5076 em_tree = &info->mapping_tree;
5077 write_lock(&em_tree->lock);
5078 ret = add_extent_mapping(em_tree, em, 0);
5079 if (ret) {
5080 write_unlock(&em_tree->lock);
5081 free_extent_map(em);
5082 return ret;
5083 }
5084 write_unlock(&em_tree->lock);
5085
5086 ret = btrfs_make_block_group(trans, 0, type, start, ctl->chunk_size);
5087 if (ret)
5088 goto error_del_extent;
5089
5090 for (i = 0; i < map->num_stripes; i++) {
5091 struct btrfs_device *dev = map->stripes[i].dev;
5092
5093 btrfs_device_set_bytes_used(dev,
5094 dev->bytes_used + ctl->stripe_size);
5095 if (list_empty(&dev->post_commit_list))
5096 list_add_tail(&dev->post_commit_list,
5097 &trans->transaction->dev_update_list);
5098 }
5099
5100 atomic64_sub(ctl->stripe_size * map->num_stripes,
5101 &info->free_chunk_space);
5102
5103 free_extent_map(em);
5104 check_raid56_incompat_flag(info, type);
5105 check_raid1c34_incompat_flag(info, type);
5106
5107 return 0;
5108
5109error_del_extent:
5110 write_lock(&em_tree->lock);
5111 remove_extent_mapping(em_tree, em);
5112 write_unlock(&em_tree->lock);
5113
5114
5115 free_extent_map(em);
5116
5117 free_extent_map(em);
5118
5119 return ret;
5120}
5121
5122int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
5123{
5124 struct btrfs_fs_info *info = trans->fs_info;
5125 struct btrfs_fs_devices *fs_devices = info->fs_devices;
5126 struct btrfs_device_info *devices_info = NULL;
5127 struct alloc_chunk_ctl ctl;
5128 int ret;
5129
5130 lockdep_assert_held(&info->chunk_mutex);
5131
5132 if (!alloc_profile_is_valid(type, 0)) {
5133 ASSERT(0);
5134 return -EINVAL;
5135 }
5136
5137 if (list_empty(&fs_devices->alloc_list)) {
5138 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5139 btrfs_debug(info, "%s: no writable device", __func__);
5140 return -ENOSPC;
5141 }
5142
5143 if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
5144 btrfs_err(info, "invalid chunk type 0x%llx requested", type);
5145 ASSERT(0);
5146 return -EINVAL;
5147 }
5148
5149 ctl.start = find_next_chunk(info);
5150 ctl.type = type;
5151 init_alloc_chunk_ctl(fs_devices, &ctl);
5152
5153 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
5154 GFP_NOFS);
5155 if (!devices_info)
5156 return -ENOMEM;
5157
5158 ret = gather_device_info(fs_devices, &ctl, devices_info);
5159 if (ret < 0)
5160 goto out;
5161
5162 ret = decide_stripe_size(fs_devices, &ctl, devices_info);
5163 if (ret < 0)
5164 goto out;
5165
5166 ret = create_chunk(trans, &ctl, devices_info);
5167
5168out:
5169 kfree(devices_info);
5170 return ret;
5171}
5172
5173
5174
5175
5176
5177
5178
5179
5180int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
5181 u64 chunk_offset, u64 chunk_size)
5182{
5183 struct btrfs_fs_info *fs_info = trans->fs_info;
5184 struct btrfs_root *extent_root = fs_info->extent_root;
5185 struct btrfs_root *chunk_root = fs_info->chunk_root;
5186 struct btrfs_key key;
5187 struct btrfs_device *device;
5188 struct btrfs_chunk *chunk;
5189 struct btrfs_stripe *stripe;
5190 struct extent_map *em;
5191 struct map_lookup *map;
5192 size_t item_size;
5193 u64 dev_offset;
5194 u64 stripe_size;
5195 int i = 0;
5196 int ret = 0;
5197
5198 em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
5199 if (IS_ERR(em))
5200 return PTR_ERR(em);
5201
5202 map = em->map_lookup;
5203 item_size = btrfs_chunk_item_size(map->num_stripes);
5204 stripe_size = em->orig_block_len;
5205
5206 chunk = kzalloc(item_size, GFP_NOFS);
5207 if (!chunk) {
5208 ret = -ENOMEM;
5209 goto out;
5210 }
5211
5212
5213
5214
5215
5216
5217
5218
5219 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5220 for (i = 0; i < map->num_stripes; i++) {
5221 device = map->stripes[i].dev;
5222 dev_offset = map->stripes[i].physical;
5223
5224 ret = btrfs_update_device(trans, device);
5225 if (ret)
5226 break;
5227 ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5228 dev_offset, stripe_size);
5229 if (ret)
5230 break;
5231 }
5232 if (ret) {
5233 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5234 goto out;
5235 }
5236
5237 stripe = &chunk->stripe;
5238 for (i = 0; i < map->num_stripes; i++) {
5239 device = map->stripes[i].dev;
5240 dev_offset = map->stripes[i].physical;
5241
5242 btrfs_set_stack_stripe_devid(stripe, device->devid);
5243 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5244 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5245 stripe++;
5246 }
5247 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5248
5249 btrfs_set_stack_chunk_length(chunk, chunk_size);
5250 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5251 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5252 btrfs_set_stack_chunk_type(chunk, map->type);
5253 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5254 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5255 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5256 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5257 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5258
5259 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5260 key.type = BTRFS_CHUNK_ITEM_KEY;
5261 key.offset = chunk_offset;
5262
5263 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5264 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5265
5266
5267
5268
5269 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5270 }
5271
5272out:
5273 kfree(chunk);
5274 free_extent_map(em);
5275 return ret;
5276}
5277
5278static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5279{
5280 struct btrfs_fs_info *fs_info = trans->fs_info;
5281 u64 alloc_profile;
5282 int ret;
5283
5284 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5285 ret = btrfs_alloc_chunk(trans, alloc_profile);
5286 if (ret)
5287 return ret;
5288
5289 alloc_profile = btrfs_system_alloc_profile(fs_info);
5290 ret = btrfs_alloc_chunk(trans, alloc_profile);
5291 return ret;
5292}
5293
5294static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5295{
5296 const int index = btrfs_bg_flags_to_raid_index(map->type);
5297
5298 return btrfs_raid_array[index].tolerated_failures;
5299}
5300
5301int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5302{
5303 struct extent_map *em;
5304 struct map_lookup *map;
5305 int readonly = 0;
5306 int miss_ndevs = 0;
5307 int i;
5308
5309 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
5310 if (IS_ERR(em))
5311 return 1;
5312
5313 map = em->map_lookup;
5314 for (i = 0; i < map->num_stripes; i++) {
5315 if (test_bit(BTRFS_DEV_STATE_MISSING,
5316 &map->stripes[i].dev->dev_state)) {
5317 miss_ndevs++;
5318 continue;
5319 }
5320 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5321 &map->stripes[i].dev->dev_state)) {
5322 readonly = 1;
5323 goto end;
5324 }
5325 }
5326
5327
5328
5329
5330
5331
5332 if (miss_ndevs > btrfs_chunk_max_errors(map))
5333 readonly = 1;
5334end:
5335 free_extent_map(em);
5336 return readonly;
5337}
5338
5339void btrfs_mapping_tree_free(struct extent_map_tree *tree)
5340{
5341 struct extent_map *em;
5342
5343 while (1) {
5344 write_lock(&tree->lock);
5345 em = lookup_extent_mapping(tree, 0, (u64)-1);
5346 if (em)
5347 remove_extent_mapping(tree, em);
5348 write_unlock(&tree->lock);
5349 if (!em)
5350 break;
5351
5352 free_extent_map(em);
5353
5354 free_extent_map(em);
5355 }
5356}
5357
5358int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5359{
5360 struct extent_map *em;
5361 struct map_lookup *map;
5362 int ret;
5363
5364 em = btrfs_get_chunk_map(fs_info, logical, len);
5365 if (IS_ERR(em))
5366
5367
5368
5369
5370
5371
5372 return 1;
5373
5374 map = em->map_lookup;
5375 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
5376 ret = map->num_stripes;
5377 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5378 ret = map->sub_stripes;
5379 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5380 ret = 2;
5381 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5382
5383
5384
5385
5386
5387
5388
5389 ret = map->num_stripes;
5390 else
5391 ret = 1;
5392 free_extent_map(em);
5393
5394 down_read(&fs_info->dev_replace.rwsem);
5395 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5396 fs_info->dev_replace.tgtdev)
5397 ret++;
5398 up_read(&fs_info->dev_replace.rwsem);
5399
5400 return ret;
5401}
5402
5403unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5404 u64 logical)
5405{
5406 struct extent_map *em;
5407 struct map_lookup *map;
5408 unsigned long len = fs_info->sectorsize;
5409
5410 em = btrfs_get_chunk_map(fs_info, logical, len);
5411
5412 if (!WARN_ON(IS_ERR(em))) {
5413 map = em->map_lookup;
5414 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5415 len = map->stripe_len * nr_data_stripes(map);
5416 free_extent_map(em);
5417 }
5418 return len;
5419}
5420
5421int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5422{
5423 struct extent_map *em;
5424 struct map_lookup *map;
5425 int ret = 0;
5426
5427 em = btrfs_get_chunk_map(fs_info, logical, len);
5428
5429 if(!WARN_ON(IS_ERR(em))) {
5430 map = em->map_lookup;
5431 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5432 ret = 1;
5433 free_extent_map(em);
5434 }
5435 return ret;
5436}
5437
5438static int find_live_mirror(struct btrfs_fs_info *fs_info,
5439 struct map_lookup *map, int first,
5440 int dev_replace_is_ongoing)
5441{
5442 int i;
5443 int num_stripes;
5444 int preferred_mirror;
5445 int tolerance;
5446 struct btrfs_device *srcdev;
5447
5448 ASSERT((map->type &
5449 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
5450
5451 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5452 num_stripes = map->sub_stripes;
5453 else
5454 num_stripes = map->num_stripes;
5455
5456 preferred_mirror = first + current->pid % num_stripes;
5457
5458 if (dev_replace_is_ongoing &&
5459 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5460 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5461 srcdev = fs_info->dev_replace.srcdev;
5462 else
5463 srcdev = NULL;
5464
5465
5466
5467
5468
5469
5470 for (tolerance = 0; tolerance < 2; tolerance++) {
5471 if (map->stripes[preferred_mirror].dev->bdev &&
5472 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5473 return preferred_mirror;
5474 for (i = first; i < first + num_stripes; i++) {
5475 if (map->stripes[i].dev->bdev &&
5476 (tolerance || map->stripes[i].dev != srcdev))
5477 return i;
5478 }
5479 }
5480
5481
5482
5483
5484 return preferred_mirror;
5485}
5486
5487
5488static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5489{
5490 int i;
5491 int again = 1;
5492
5493 while (again) {
5494 again = 0;
5495 for (i = 0; i < num_stripes - 1; i++) {
5496
5497 if (bbio->raid_map[i] > bbio->raid_map[i + 1]) {
5498 swap(bbio->stripes[i], bbio->stripes[i + 1]);
5499 swap(bbio->raid_map[i], bbio->raid_map[i + 1]);
5500 again = 1;
5501 }
5502 }
5503 }
5504}
5505
5506static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5507{
5508 struct btrfs_bio *bbio = kzalloc(
5509
5510 sizeof(struct btrfs_bio) +
5511
5512 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5513
5514 sizeof(int) * (real_stripes) +
5515
5516
5517
5518
5519 sizeof(u64) * (total_stripes),
5520 GFP_NOFS|__GFP_NOFAIL);
5521
5522 atomic_set(&bbio->error, 0);
5523 refcount_set(&bbio->refs, 1);
5524
5525 return bbio;
5526}
5527
5528void btrfs_get_bbio(struct btrfs_bio *bbio)
5529{
5530 WARN_ON(!refcount_read(&bbio->refs));
5531 refcount_inc(&bbio->refs);
5532}
5533
5534void btrfs_put_bbio(struct btrfs_bio *bbio)
5535{
5536 if (!bbio)
5537 return;
5538 if (refcount_dec_and_test(&bbio->refs))
5539 kfree(bbio);
5540}
5541
5542
5543
5544
5545
5546
5547static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5548 u64 logical, u64 *length_ret,
5549 struct btrfs_bio **bbio_ret)
5550{
5551 struct extent_map *em;
5552 struct map_lookup *map;
5553 struct btrfs_bio *bbio;
5554 u64 length = *length_ret;
5555 u64 offset;
5556 u64 stripe_nr;
5557 u64 stripe_nr_end;
5558 u64 stripe_end_offset;
5559 u64 stripe_cnt;
5560 u64 stripe_len;
5561 u64 stripe_offset;
5562 u64 num_stripes;
5563 u32 stripe_index;
5564 u32 factor = 0;
5565 u32 sub_stripes = 0;
5566 u64 stripes_per_dev = 0;
5567 u32 remaining_stripes = 0;
5568 u32 last_stripe = 0;
5569 int ret = 0;
5570 int i;
5571
5572
5573 ASSERT(bbio_ret);
5574
5575 em = btrfs_get_chunk_map(fs_info, logical, length);
5576 if (IS_ERR(em))
5577 return PTR_ERR(em);
5578
5579 map = em->map_lookup;
5580
5581 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5582 ret = -EOPNOTSUPP;
5583 goto out;
5584 }
5585
5586 offset = logical - em->start;
5587 length = min_t(u64, em->start + em->len - logical, length);
5588 *length_ret = length;
5589
5590 stripe_len = map->stripe_len;
5591
5592
5593
5594
5595 stripe_nr = div64_u64(offset, stripe_len);
5596
5597
5598 stripe_offset = offset - stripe_nr * stripe_len;
5599
5600 stripe_nr_end = round_up(offset + length, map->stripe_len);
5601 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5602 stripe_cnt = stripe_nr_end - stripe_nr;
5603 stripe_end_offset = stripe_nr_end * map->stripe_len -
5604 (offset + length);
5605
5606
5607
5608
5609
5610 num_stripes = 1;
5611 stripe_index = 0;
5612 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5613 BTRFS_BLOCK_GROUP_RAID10)) {
5614 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5615 sub_stripes = 1;
5616 else
5617 sub_stripes = map->sub_stripes;
5618
5619 factor = map->num_stripes / sub_stripes;
5620 num_stripes = min_t(u64, map->num_stripes,
5621 sub_stripes * stripe_cnt);
5622 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5623 stripe_index *= sub_stripes;
5624 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
5625 &remaining_stripes);
5626 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
5627 last_stripe *= sub_stripes;
5628 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
5629 BTRFS_BLOCK_GROUP_DUP)) {
5630 num_stripes = map->num_stripes;
5631 } else {
5632 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5633 &stripe_index);
5634 }
5635
5636 bbio = alloc_btrfs_bio(num_stripes, 0);
5637 if (!bbio) {
5638 ret = -ENOMEM;
5639 goto out;
5640 }
5641
5642 for (i = 0; i < num_stripes; i++) {
5643 bbio->stripes[i].physical =
5644 map->stripes[stripe_index].physical +
5645 stripe_offset + stripe_nr * map->stripe_len;
5646 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
5647
5648 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5649 BTRFS_BLOCK_GROUP_RAID10)) {
5650 bbio->stripes[i].length = stripes_per_dev *
5651 map->stripe_len;
5652
5653 if (i / sub_stripes < remaining_stripes)
5654 bbio->stripes[i].length +=
5655 map->stripe_len;
5656
5657
5658
5659
5660
5661
5662
5663
5664
5665 if (i < sub_stripes)
5666 bbio->stripes[i].length -=
5667 stripe_offset;
5668
5669 if (stripe_index >= last_stripe &&
5670 stripe_index <= (last_stripe +
5671 sub_stripes - 1))
5672 bbio->stripes[i].length -=
5673 stripe_end_offset;
5674
5675 if (i == sub_stripes - 1)
5676 stripe_offset = 0;
5677 } else {
5678 bbio->stripes[i].length = length;
5679 }
5680
5681 stripe_index++;
5682 if (stripe_index == map->num_stripes) {
5683 stripe_index = 0;
5684 stripe_nr++;
5685 }
5686 }
5687
5688 *bbio_ret = bbio;
5689 bbio->map_type = map->type;
5690 bbio->num_stripes = num_stripes;
5691out:
5692 free_extent_map(em);
5693 return ret;
5694}
5695
5696
5697
5698
5699
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
5710 u64 logical, u64 length,
5711 u64 srcdev_devid, int *mirror_num,
5712 u64 *physical)
5713{
5714 struct btrfs_bio *bbio = NULL;
5715 int num_stripes;
5716 int index_srcdev = 0;
5717 int found = 0;
5718 u64 physical_of_found = 0;
5719 int i;
5720 int ret = 0;
5721
5722 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
5723 logical, &length, &bbio, 0, 0);
5724 if (ret) {
5725 ASSERT(bbio == NULL);
5726 return ret;
5727 }
5728
5729 num_stripes = bbio->num_stripes;
5730 if (*mirror_num > num_stripes) {
5731
5732
5733
5734
5735
5736 btrfs_put_bbio(bbio);
5737 return -EIO;
5738 }
5739
5740
5741
5742
5743
5744
5745 for (i = 0; i < num_stripes; i++) {
5746 if (bbio->stripes[i].dev->devid != srcdev_devid)
5747 continue;
5748
5749
5750
5751
5752
5753 if (found &&
5754 physical_of_found <= bbio->stripes[i].physical)
5755 continue;
5756
5757 index_srcdev = i;
5758 found = 1;
5759 physical_of_found = bbio->stripes[i].physical;
5760 }
5761
5762 btrfs_put_bbio(bbio);
5763
5764 ASSERT(found);
5765 if (!found)
5766 return -EIO;
5767
5768 *mirror_num = index_srcdev + 1;
5769 *physical = physical_of_found;
5770 return ret;
5771}
5772
5773static void handle_ops_on_dev_replace(enum btrfs_map_op op,
5774 struct btrfs_bio **bbio_ret,
5775 struct btrfs_dev_replace *dev_replace,
5776 int *num_stripes_ret, int *max_errors_ret)
5777{
5778 struct btrfs_bio *bbio = *bbio_ret;
5779 u64 srcdev_devid = dev_replace->srcdev->devid;
5780 int tgtdev_indexes = 0;
5781 int num_stripes = *num_stripes_ret;
5782 int max_errors = *max_errors_ret;
5783 int i;
5784
5785 if (op == BTRFS_MAP_WRITE) {
5786 int index_where_to_add;
5787
5788
5789
5790
5791
5792
5793
5794
5795
5796
5797
5798
5799 index_where_to_add = num_stripes;
5800 for (i = 0; i < num_stripes; i++) {
5801 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5802
5803 struct btrfs_bio_stripe *new =
5804 bbio->stripes + index_where_to_add;
5805 struct btrfs_bio_stripe *old =
5806 bbio->stripes + i;
5807
5808 new->physical = old->physical;
5809 new->length = old->length;
5810 new->dev = dev_replace->tgtdev;
5811 bbio->tgtdev_map[i] = index_where_to_add;
5812 index_where_to_add++;
5813 max_errors++;
5814 tgtdev_indexes++;
5815 }
5816 }
5817 num_stripes = index_where_to_add;
5818 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
5819 int index_srcdev = 0;
5820 int found = 0;
5821 u64 physical_of_found = 0;
5822
5823
5824
5825
5826
5827
5828
5829
5830 for (i = 0; i < num_stripes; i++) {
5831 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5832
5833
5834
5835
5836
5837 if (found &&
5838 physical_of_found <=
5839 bbio->stripes[i].physical)
5840 continue;
5841 index_srcdev = i;
5842 found = 1;
5843 physical_of_found = bbio->stripes[i].physical;
5844 }
5845 }
5846 if (found) {
5847 struct btrfs_bio_stripe *tgtdev_stripe =
5848 bbio->stripes + num_stripes;
5849
5850 tgtdev_stripe->physical = physical_of_found;
5851 tgtdev_stripe->length =
5852 bbio->stripes[index_srcdev].length;
5853 tgtdev_stripe->dev = dev_replace->tgtdev;
5854 bbio->tgtdev_map[index_srcdev] = num_stripes;
5855
5856 tgtdev_indexes++;
5857 num_stripes++;
5858 }
5859 }
5860
5861 *num_stripes_ret = num_stripes;
5862 *max_errors_ret = max_errors;
5863 bbio->num_tgtdevs = tgtdev_indexes;
5864 *bbio_ret = bbio;
5865}
5866
5867static bool need_full_stripe(enum btrfs_map_op op)
5868{
5869 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
5870}
5871
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
5887 u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
5888{
5889 struct extent_map *em;
5890 struct map_lookup *map;
5891 u64 offset;
5892 u64 stripe_offset;
5893 u64 stripe_nr;
5894 u64 stripe_len;
5895 u64 raid56_full_stripe_start = (u64)-1;
5896 int data_stripes;
5897 int ret = 0;
5898
5899 ASSERT(op != BTRFS_MAP_DISCARD);
5900
5901 em = btrfs_get_chunk_map(fs_info, logical, len);
5902 if (IS_ERR(em))
5903 return PTR_ERR(em);
5904
5905 map = em->map_lookup;
5906
5907 offset = logical - em->start;
5908
5909 stripe_len = map->stripe_len;
5910
5911 stripe_nr = div64_u64(offset, stripe_len);
5912
5913 stripe_offset = stripe_nr * stripe_len;
5914 if (offset < stripe_offset) {
5915 btrfs_crit(fs_info,
5916"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
5917 stripe_offset, offset, em->start, logical, stripe_len);
5918 ret = -EINVAL;
5919 goto out;
5920 }
5921
5922
5923 stripe_offset = offset - stripe_offset;
5924 data_stripes = nr_data_stripes(map);
5925
5926 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
5927 u64 max_len = stripe_len - stripe_offset;
5928
5929
5930
5931
5932 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5933 unsigned long full_stripe_len = stripe_len * data_stripes;
5934 raid56_full_stripe_start = offset;
5935
5936
5937
5938
5939
5940 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
5941 full_stripe_len);
5942 raid56_full_stripe_start *= full_stripe_len;
5943
5944
5945
5946
5947
5948
5949 if (op == BTRFS_MAP_WRITE) {
5950 max_len = stripe_len * data_stripes -
5951 (offset - raid56_full_stripe_start);
5952 }
5953 }
5954 len = min_t(u64, em->len - offset, max_len);
5955 } else {
5956 len = em->len - offset;
5957 }
5958
5959 io_geom->len = len;
5960 io_geom->offset = offset;
5961 io_geom->stripe_len = stripe_len;
5962 io_geom->stripe_nr = stripe_nr;
5963 io_geom->stripe_offset = stripe_offset;
5964 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
5965
5966out:
5967
5968 free_extent_map(em);
5969 return ret;
5970}
5971
5972static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
5973 enum btrfs_map_op op,
5974 u64 logical, u64 *length,
5975 struct btrfs_bio **bbio_ret,
5976 int mirror_num, int need_raid_map)
5977{
5978 struct extent_map *em;
5979 struct map_lookup *map;
5980 u64 stripe_offset;
5981 u64 stripe_nr;
5982 u64 stripe_len;
5983 u32 stripe_index;
5984 int data_stripes;
5985 int i;
5986 int ret = 0;
5987 int num_stripes;
5988 int max_errors = 0;
5989 int tgtdev_indexes = 0;
5990 struct btrfs_bio *bbio = NULL;
5991 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
5992 int dev_replace_is_ongoing = 0;
5993 int num_alloc_stripes;
5994 int patch_the_first_stripe_for_dev_replace = 0;
5995 u64 physical_to_patch_in_first_stripe = 0;
5996 u64 raid56_full_stripe_start = (u64)-1;
5997 struct btrfs_io_geometry geom;
5998
5999 ASSERT(bbio_ret);
6000 ASSERT(op != BTRFS_MAP_DISCARD);
6001
6002 ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
6003 if (ret < 0)
6004 return ret;
6005
6006 em = btrfs_get_chunk_map(fs_info, logical, *length);
6007 ASSERT(!IS_ERR(em));
6008 map = em->map_lookup;
6009
6010 *length = geom.len;
6011 stripe_len = geom.stripe_len;
6012 stripe_nr = geom.stripe_nr;
6013 stripe_offset = geom.stripe_offset;
6014 raid56_full_stripe_start = geom.raid56_stripe_offset;
6015 data_stripes = nr_data_stripes(map);
6016
6017 down_read(&dev_replace->rwsem);
6018 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
6019
6020
6021
6022
6023 if (!dev_replace_is_ongoing)
6024 up_read(&dev_replace->rwsem);
6025
6026 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
6027 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
6028 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
6029 dev_replace->srcdev->devid,
6030 &mirror_num,
6031 &physical_to_patch_in_first_stripe);
6032 if (ret)
6033 goto out;
6034 else
6035 patch_the_first_stripe_for_dev_replace = 1;
6036 } else if (mirror_num > map->num_stripes) {
6037 mirror_num = 0;
6038 }
6039
6040 num_stripes = 1;
6041 stripe_index = 0;
6042 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6043 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6044 &stripe_index);
6045 if (!need_full_stripe(op))
6046 mirror_num = 1;
6047 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
6048 if (need_full_stripe(op))
6049 num_stripes = map->num_stripes;
6050 else if (mirror_num)
6051 stripe_index = mirror_num - 1;
6052 else {
6053 stripe_index = find_live_mirror(fs_info, map, 0,
6054 dev_replace_is_ongoing);
6055 mirror_num = stripe_index + 1;
6056 }
6057
6058 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
6059 if (need_full_stripe(op)) {
6060 num_stripes = map->num_stripes;
6061 } else if (mirror_num) {
6062 stripe_index = mirror_num - 1;
6063 } else {
6064 mirror_num = 1;
6065 }
6066
6067 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6068 u32 factor = map->num_stripes / map->sub_stripes;
6069
6070 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6071 stripe_index *= map->sub_stripes;
6072
6073 if (need_full_stripe(op))
6074 num_stripes = map->sub_stripes;
6075 else if (mirror_num)
6076 stripe_index += mirror_num - 1;
6077 else {
6078 int old_stripe_index = stripe_index;
6079 stripe_index = find_live_mirror(fs_info, map,
6080 stripe_index,
6081 dev_replace_is_ongoing);
6082 mirror_num = stripe_index - old_stripe_index + 1;
6083 }
6084
6085 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6086 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
6087
6088 stripe_nr = div64_u64(raid56_full_stripe_start,
6089 stripe_len * data_stripes);
6090
6091
6092 num_stripes = map->num_stripes;
6093 max_errors = nr_parity_stripes(map);
6094
6095 *length = map->stripe_len;
6096 stripe_index = 0;
6097 stripe_offset = 0;
6098 } else {
6099
6100
6101
6102
6103
6104 stripe_nr = div_u64_rem(stripe_nr,
6105 data_stripes, &stripe_index);
6106 if (mirror_num > 1)
6107 stripe_index = data_stripes + mirror_num - 2;
6108
6109
6110 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
6111 &stripe_index);
6112 if (!need_full_stripe(op) && mirror_num <= 1)
6113 mirror_num = 1;
6114 }
6115 } else {
6116
6117
6118
6119
6120
6121 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6122 &stripe_index);
6123 mirror_num = stripe_index + 1;
6124 }
6125 if (stripe_index >= map->num_stripes) {
6126 btrfs_crit(fs_info,
6127 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
6128 stripe_index, map->num_stripes);
6129 ret = -EINVAL;
6130 goto out;
6131 }
6132
6133 num_alloc_stripes = num_stripes;
6134 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
6135 if (op == BTRFS_MAP_WRITE)
6136 num_alloc_stripes <<= 1;
6137 if (op == BTRFS_MAP_GET_READ_MIRRORS)
6138 num_alloc_stripes++;
6139 tgtdev_indexes = num_stripes;
6140 }
6141
6142 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
6143 if (!bbio) {
6144 ret = -ENOMEM;
6145 goto out;
6146 }
6147 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
6148 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
6149
6150
6151 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
6152 (need_full_stripe(op) || mirror_num > 1)) {
6153 u64 tmp;
6154 unsigned rot;
6155
6156 bbio->raid_map = (u64 *)((void *)bbio->stripes +
6157 sizeof(struct btrfs_bio_stripe) *
6158 num_alloc_stripes +
6159 sizeof(int) * tgtdev_indexes);
6160
6161
6162 div_u64_rem(stripe_nr, num_stripes, &rot);
6163
6164
6165 tmp = stripe_nr * data_stripes;
6166 for (i = 0; i < data_stripes; i++)
6167 bbio->raid_map[(i+rot) % num_stripes] =
6168 em->start + (tmp + i) * map->stripe_len;
6169
6170 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
6171 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
6172 bbio->raid_map[(i+rot+1) % num_stripes] =
6173 RAID6_Q_STRIPE;
6174 }
6175
6176
6177 for (i = 0; i < num_stripes; i++) {
6178 bbio->stripes[i].physical =
6179 map->stripes[stripe_index].physical +
6180 stripe_offset +
6181 stripe_nr * map->stripe_len;
6182 bbio->stripes[i].dev =
6183 map->stripes[stripe_index].dev;
6184 stripe_index++;
6185 }
6186
6187 if (need_full_stripe(op))
6188 max_errors = btrfs_chunk_max_errors(map);
6189
6190 if (bbio->raid_map)
6191 sort_parity_stripes(bbio, num_stripes);
6192
6193 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
6194 need_full_stripe(op)) {
6195 handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
6196 &max_errors);
6197 }
6198
6199 *bbio_ret = bbio;
6200 bbio->map_type = map->type;
6201 bbio->num_stripes = num_stripes;
6202 bbio->max_errors = max_errors;
6203 bbio->mirror_num = mirror_num;
6204
6205
6206
6207
6208
6209
6210 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6211 WARN_ON(num_stripes > 1);
6212 bbio->stripes[0].dev = dev_replace->tgtdev;
6213 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6214 bbio->mirror_num = map->num_stripes + 1;
6215 }
6216out:
6217 if (dev_replace_is_ongoing) {
6218 lockdep_assert_held(&dev_replace->rwsem);
6219
6220 up_read(&dev_replace->rwsem);
6221 }
6222 free_extent_map(em);
6223 return ret;
6224}
6225
6226int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6227 u64 logical, u64 *length,
6228 struct btrfs_bio **bbio_ret, int mirror_num)
6229{
6230 if (op == BTRFS_MAP_DISCARD)
6231 return __btrfs_map_block_for_discard(fs_info, logical,
6232 length, bbio_ret);
6233
6234 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6235 mirror_num, 0);
6236}
6237
6238
6239int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6240 u64 logical, u64 *length,
6241 struct btrfs_bio **bbio_ret)
6242{
6243 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6244}
6245
6246static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6247{
6248 bio->bi_private = bbio->private;
6249 bio->bi_end_io = bbio->end_io;
6250 bio_endio(bio);
6251
6252 btrfs_put_bbio(bbio);
6253}
6254
6255static void btrfs_end_bio(struct bio *bio)
6256{
6257 struct btrfs_bio *bbio = bio->bi_private;
6258 int is_orig_bio = 0;
6259
6260 if (bio->bi_status) {
6261 atomic_inc(&bbio->error);
6262 if (bio->bi_status == BLK_STS_IOERR ||
6263 bio->bi_status == BLK_STS_TARGET) {
6264 unsigned int stripe_index =
6265 btrfs_io_bio(bio)->stripe_index;
6266 struct btrfs_device *dev;
6267
6268 BUG_ON(stripe_index >= bbio->num_stripes);
6269 dev = bbio->stripes[stripe_index].dev;
6270 if (dev->bdev) {
6271 if (bio_op(bio) == REQ_OP_WRITE)
6272 btrfs_dev_stat_inc_and_print(dev,
6273 BTRFS_DEV_STAT_WRITE_ERRS);
6274 else if (!(bio->bi_opf & REQ_RAHEAD))
6275 btrfs_dev_stat_inc_and_print(dev,
6276 BTRFS_DEV_STAT_READ_ERRS);
6277 if (bio->bi_opf & REQ_PREFLUSH)
6278 btrfs_dev_stat_inc_and_print(dev,
6279 BTRFS_DEV_STAT_FLUSH_ERRS);
6280 }
6281 }
6282 }
6283
6284 if (bio == bbio->orig_bio)
6285 is_orig_bio = 1;
6286
6287 btrfs_bio_counter_dec(bbio->fs_info);
6288
6289 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6290 if (!is_orig_bio) {
6291 bio_put(bio);
6292 bio = bbio->orig_bio;
6293 }
6294
6295 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6296
6297
6298
6299 if (atomic_read(&bbio->error) > bbio->max_errors) {
6300 bio->bi_status = BLK_STS_IOERR;
6301 } else {
6302
6303
6304
6305
6306 bio->bi_status = BLK_STS_OK;
6307 }
6308
6309 btrfs_end_bbio(bbio, bio);
6310 } else if (!is_orig_bio) {
6311 bio_put(bio);
6312 }
6313}
6314
6315static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6316 u64 physical, int dev_nr)
6317{
6318 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
6319 struct btrfs_fs_info *fs_info = bbio->fs_info;
6320
6321 bio->bi_private = bbio;
6322 btrfs_io_bio(bio)->stripe_index = dev_nr;
6323 bio->bi_end_io = btrfs_end_bio;
6324 bio->bi_iter.bi_sector = physical >> 9;
6325 btrfs_debug_in_rcu(fs_info,
6326 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6327 bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
6328 (unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
6329 dev->devid, bio->bi_iter.bi_size);
6330 bio_set_dev(bio, dev->bdev);
6331
6332 btrfs_bio_counter_inc_noblocked(fs_info);
6333
6334 btrfsic_submit_bio(bio);
6335}
6336
6337static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6338{
6339 atomic_inc(&bbio->error);
6340 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6341
6342 WARN_ON(bio != bbio->orig_bio);
6343
6344 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6345 bio->bi_iter.bi_sector = logical >> 9;
6346 if (atomic_read(&bbio->error) > bbio->max_errors)
6347 bio->bi_status = BLK_STS_IOERR;
6348 else
6349 bio->bi_status = BLK_STS_OK;
6350 btrfs_end_bbio(bbio, bio);
6351 }
6352}
6353
6354blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6355 int mirror_num)
6356{
6357 struct btrfs_device *dev;
6358 struct bio *first_bio = bio;
6359 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
6360 u64 length = 0;
6361 u64 map_length;
6362 int ret;
6363 int dev_nr;
6364 int total_devs;
6365 struct btrfs_bio *bbio = NULL;
6366
6367 length = bio->bi_iter.bi_size;
6368 map_length = length;
6369
6370 btrfs_bio_counter_inc_blocked(fs_info);
6371 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6372 &map_length, &bbio, mirror_num, 1);
6373 if (ret) {
6374 btrfs_bio_counter_dec(fs_info);
6375 return errno_to_blk_status(ret);
6376 }
6377
6378 total_devs = bbio->num_stripes;
6379 bbio->orig_bio = first_bio;
6380 bbio->private = first_bio->bi_private;
6381 bbio->end_io = first_bio->bi_end_io;
6382 bbio->fs_info = fs_info;
6383 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6384
6385 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6386 ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
6387
6388
6389 if (bio_op(bio) == REQ_OP_WRITE) {
6390 ret = raid56_parity_write(fs_info, bio, bbio,
6391 map_length);
6392 } else {
6393 ret = raid56_parity_recover(fs_info, bio, bbio,
6394 map_length, mirror_num, 1);
6395 }
6396
6397 btrfs_bio_counter_dec(fs_info);
6398 return errno_to_blk_status(ret);
6399 }
6400
6401 if (map_length < length) {
6402 btrfs_crit(fs_info,
6403 "mapping failed logical %llu bio len %llu len %llu",
6404 logical, length, map_length);
6405 BUG();
6406 }
6407
6408 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6409 dev = bbio->stripes[dev_nr].dev;
6410 if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
6411 &dev->dev_state) ||
6412 (bio_op(first_bio) == REQ_OP_WRITE &&
6413 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6414 bbio_error(bbio, first_bio, logical);
6415 continue;
6416 }
6417
6418 if (dev_nr < total_devs - 1)
6419 bio = btrfs_bio_clone(first_bio);
6420 else
6421 bio = first_bio;
6422
6423 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
6424 dev_nr);
6425 }
6426 btrfs_bio_counter_dec(fs_info);
6427 return BLK_STS_OK;
6428}
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438
6439struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
6440 u64 devid, u8 *uuid, u8 *fsid,
6441 bool seed)
6442{
6443 struct btrfs_device *device;
6444
6445 while (fs_devices) {
6446 if (!fsid ||
6447 !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6448 list_for_each_entry(device, &fs_devices->devices,
6449 dev_list) {
6450 if (device->devid == devid &&
6451 (!uuid || memcmp(device->uuid, uuid,
6452 BTRFS_UUID_SIZE) == 0))
6453 return device;
6454 }
6455 }
6456 if (seed)
6457 fs_devices = fs_devices->seed;
6458 else
6459 return NULL;
6460 }
6461 return NULL;
6462}
6463
6464static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6465 u64 devid, u8 *dev_uuid)
6466{
6467 struct btrfs_device *device;
6468
6469 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6470 if (IS_ERR(device))
6471 return device;
6472
6473 list_add(&device->dev_list, &fs_devices->devices);
6474 device->fs_devices = fs_devices;
6475 fs_devices->num_devices++;
6476
6477 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6478 fs_devices->missing_devices++;
6479
6480 return device;
6481}
6482
6483
6484
6485
6486
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6497 const u64 *devid,
6498 const u8 *uuid)
6499{
6500 struct btrfs_device *dev;
6501 u64 tmp;
6502
6503 if (WARN_ON(!devid && !fs_info))
6504 return ERR_PTR(-EINVAL);
6505
6506 dev = __alloc_device();
6507 if (IS_ERR(dev))
6508 return dev;
6509
6510 if (devid)
6511 tmp = *devid;
6512 else {
6513 int ret;
6514
6515 ret = find_next_devid(fs_info, &tmp);
6516 if (ret) {
6517 btrfs_free_device(dev);
6518 return ERR_PTR(ret);
6519 }
6520 }
6521 dev->devid = tmp;
6522
6523 if (uuid)
6524 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6525 else
6526 generate_random_uuid(dev->uuid);
6527
6528 return dev;
6529}
6530
6531static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6532 u64 devid, u8 *uuid, bool error)
6533{
6534 if (error)
6535 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6536 devid, uuid);
6537 else
6538 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6539 devid, uuid);
6540}
6541
6542static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6543{
6544 int index = btrfs_bg_flags_to_raid_index(type);
6545 int ncopies = btrfs_raid_array[index].ncopies;
6546 const int nparity = btrfs_raid_array[index].nparity;
6547 int data_stripes;
6548
6549 if (nparity)
6550 data_stripes = num_stripes - nparity;
6551 else
6552 data_stripes = num_stripes / ncopies;
6553
6554 return div_u64(chunk_len, data_stripes);
6555}
6556
6557static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
6558 struct btrfs_chunk *chunk)
6559{
6560 struct btrfs_fs_info *fs_info = leaf->fs_info;
6561 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
6562 struct map_lookup *map;
6563 struct extent_map *em;
6564 u64 logical;
6565 u64 length;
6566 u64 devid;
6567 u8 uuid[BTRFS_UUID_SIZE];
6568 int num_stripes;
6569 int ret;
6570 int i;
6571
6572 logical = key->offset;
6573 length = btrfs_chunk_length(leaf, chunk);
6574 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6575
6576
6577
6578
6579
6580 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
6581 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
6582 if (ret)
6583 return ret;
6584 }
6585
6586 read_lock(&map_tree->lock);
6587 em = lookup_extent_mapping(map_tree, logical, 1);
6588 read_unlock(&map_tree->lock);
6589
6590
6591 if (em && em->start <= logical && em->start + em->len > logical) {
6592 free_extent_map(em);
6593 return 0;
6594 } else if (em) {
6595 free_extent_map(em);
6596 }
6597
6598 em = alloc_extent_map();
6599 if (!em)
6600 return -ENOMEM;
6601 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
6602 if (!map) {
6603 free_extent_map(em);
6604 return -ENOMEM;
6605 }
6606
6607 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
6608 em->map_lookup = map;
6609 em->start = logical;
6610 em->len = length;
6611 em->orig_start = 0;
6612 em->block_start = 0;
6613 em->block_len = em->len;
6614
6615 map->num_stripes = num_stripes;
6616 map->io_width = btrfs_chunk_io_width(leaf, chunk);
6617 map->io_align = btrfs_chunk_io_align(leaf, chunk);
6618 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6619 map->type = btrfs_chunk_type(leaf, chunk);
6620 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6621 map->verified_stripes = 0;
6622 em->orig_block_len = calc_stripe_length(map->type, em->len,
6623 map->num_stripes);
6624 for (i = 0; i < num_stripes; i++) {
6625 map->stripes[i].physical =
6626 btrfs_stripe_offset_nr(leaf, chunk, i);
6627 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
6628 read_extent_buffer(leaf, uuid, (unsigned long)
6629 btrfs_stripe_dev_uuid_nr(chunk, i),
6630 BTRFS_UUID_SIZE);
6631 map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
6632 devid, uuid, NULL, true);
6633 if (!map->stripes[i].dev &&
6634 !btrfs_test_opt(fs_info, DEGRADED)) {
6635 free_extent_map(em);
6636 btrfs_report_missing_device(fs_info, devid, uuid, true);
6637 return -ENOENT;
6638 }
6639 if (!map->stripes[i].dev) {
6640 map->stripes[i].dev =
6641 add_missing_dev(fs_info->fs_devices, devid,
6642 uuid);
6643 if (IS_ERR(map->stripes[i].dev)) {
6644 free_extent_map(em);
6645 btrfs_err(fs_info,
6646 "failed to init missing dev %llu: %ld",
6647 devid, PTR_ERR(map->stripes[i].dev));
6648 return PTR_ERR(map->stripes[i].dev);
6649 }
6650 btrfs_report_missing_device(fs_info, devid, uuid, false);
6651 }
6652 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
6653 &(map->stripes[i].dev->dev_state));
6654
6655 }
6656
6657 write_lock(&map_tree->lock);
6658 ret = add_extent_mapping(map_tree, em, 0);
6659 write_unlock(&map_tree->lock);
6660 if (ret < 0) {
6661 btrfs_err(fs_info,
6662 "failed to add chunk map, start=%llu len=%llu: %d",
6663 em->start, em->len, ret);
6664 }
6665 free_extent_map(em);
6666
6667 return ret;
6668}
6669
6670static void fill_device_from_item(struct extent_buffer *leaf,
6671 struct btrfs_dev_item *dev_item,
6672 struct btrfs_device *device)
6673{
6674 unsigned long ptr;
6675
6676 device->devid = btrfs_device_id(leaf, dev_item);
6677 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
6678 device->total_bytes = device->disk_total_bytes;
6679 device->commit_total_bytes = device->disk_total_bytes;
6680 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
6681 device->commit_bytes_used = device->bytes_used;
6682 device->type = btrfs_device_type(leaf, dev_item);
6683 device->io_align = btrfs_device_io_align(leaf, dev_item);
6684 device->io_width = btrfs_device_io_width(leaf, dev_item);
6685 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
6686 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
6687 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
6688
6689 ptr = btrfs_device_uuid(dev_item);
6690 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
6691}
6692
6693static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
6694 u8 *fsid)
6695{
6696 struct btrfs_fs_devices *fs_devices;
6697 int ret;
6698
6699 lockdep_assert_held(&uuid_mutex);
6700 ASSERT(fsid);
6701
6702 fs_devices = fs_info->fs_devices->seed;
6703 while (fs_devices) {
6704 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
6705 return fs_devices;
6706
6707 fs_devices = fs_devices->seed;
6708 }
6709
6710 fs_devices = find_fsid(fsid, NULL);
6711 if (!fs_devices) {
6712 if (!btrfs_test_opt(fs_info, DEGRADED))
6713 return ERR_PTR(-ENOENT);
6714
6715 fs_devices = alloc_fs_devices(fsid, NULL);
6716 if (IS_ERR(fs_devices))
6717 return fs_devices;
6718
6719 fs_devices->seeding = true;
6720 fs_devices->opened = 1;
6721 return fs_devices;
6722 }
6723
6724 fs_devices = clone_fs_devices(fs_devices);
6725 if (IS_ERR(fs_devices))
6726 return fs_devices;
6727
6728 ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
6729 if (ret) {
6730 free_fs_devices(fs_devices);
6731 fs_devices = ERR_PTR(ret);
6732 goto out;
6733 }
6734
6735 if (!fs_devices->seeding) {
6736 close_fs_devices(fs_devices);
6737 free_fs_devices(fs_devices);
6738 fs_devices = ERR_PTR(-EINVAL);
6739 goto out;
6740 }
6741
6742 fs_devices->seed = fs_info->fs_devices->seed;
6743 fs_info->fs_devices->seed = fs_devices;
6744out:
6745 return fs_devices;
6746}
6747
6748static int read_one_dev(struct extent_buffer *leaf,
6749 struct btrfs_dev_item *dev_item)
6750{
6751 struct btrfs_fs_info *fs_info = leaf->fs_info;
6752 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6753 struct btrfs_device *device;
6754 u64 devid;
6755 int ret;
6756 u8 fs_uuid[BTRFS_FSID_SIZE];
6757 u8 dev_uuid[BTRFS_UUID_SIZE];
6758
6759 devid = btrfs_device_id(leaf, dev_item);
6760 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
6761 BTRFS_UUID_SIZE);
6762 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
6763 BTRFS_FSID_SIZE);
6764
6765 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
6766 fs_devices = open_seed_devices(fs_info, fs_uuid);
6767 if (IS_ERR(fs_devices))
6768 return PTR_ERR(fs_devices);
6769 }
6770
6771 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
6772 fs_uuid, true);
6773 if (!device) {
6774 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6775 btrfs_report_missing_device(fs_info, devid,
6776 dev_uuid, true);
6777 return -ENOENT;
6778 }
6779
6780 device = add_missing_dev(fs_devices, devid, dev_uuid);
6781 if (IS_ERR(device)) {
6782 btrfs_err(fs_info,
6783 "failed to add missing dev %llu: %ld",
6784 devid, PTR_ERR(device));
6785 return PTR_ERR(device);
6786 }
6787 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
6788 } else {
6789 if (!device->bdev) {
6790 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6791 btrfs_report_missing_device(fs_info,
6792 devid, dev_uuid, true);
6793 return -ENOENT;
6794 }
6795 btrfs_report_missing_device(fs_info, devid,
6796 dev_uuid, false);
6797 }
6798
6799 if (!device->bdev &&
6800 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
6801
6802
6803
6804
6805
6806
6807 device->fs_devices->missing_devices++;
6808 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6809 }
6810
6811
6812 if (device->fs_devices != fs_devices) {
6813 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
6814 &device->dev_state));
6815
6816 list_move(&device->dev_list, &fs_devices->devices);
6817 device->fs_devices->num_devices--;
6818 fs_devices->num_devices++;
6819
6820 device->fs_devices->missing_devices--;
6821 fs_devices->missing_devices++;
6822
6823 device->fs_devices = fs_devices;
6824 }
6825 }
6826
6827 if (device->fs_devices != fs_info->fs_devices) {
6828 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
6829 if (device->generation !=
6830 btrfs_device_generation(leaf, dev_item))
6831 return -EINVAL;
6832 }
6833
6834 fill_device_from_item(leaf, dev_item, device);
6835 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
6836 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
6837 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
6838 device->fs_devices->total_rw_bytes += device->total_bytes;
6839 atomic64_add(device->total_bytes - device->bytes_used,
6840 &fs_info->free_chunk_space);
6841 }
6842 ret = 0;
6843 return ret;
6844}
6845
6846int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
6847{
6848 struct btrfs_root *root = fs_info->tree_root;
6849 struct btrfs_super_block *super_copy = fs_info->super_copy;
6850 struct extent_buffer *sb;
6851 struct btrfs_disk_key *disk_key;
6852 struct btrfs_chunk *chunk;
6853 u8 *array_ptr;
6854 unsigned long sb_array_offset;
6855 int ret = 0;
6856 u32 num_stripes;
6857 u32 array_size;
6858 u32 len = 0;
6859 u32 cur_offset;
6860 u64 type;
6861 struct btrfs_key key;
6862
6863 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
6864
6865
6866
6867
6868
6869 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
6870 if (IS_ERR(sb))
6871 return PTR_ERR(sb);
6872 set_extent_buffer_uptodate(sb);
6873 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884
6885
6886 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
6887 SetPageUptodate(sb->pages[0]);
6888
6889 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
6890 array_size = btrfs_super_sys_array_size(super_copy);
6891
6892 array_ptr = super_copy->sys_chunk_array;
6893 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
6894 cur_offset = 0;
6895
6896 while (cur_offset < array_size) {
6897 disk_key = (struct btrfs_disk_key *)array_ptr;
6898 len = sizeof(*disk_key);
6899 if (cur_offset + len > array_size)
6900 goto out_short_read;
6901
6902 btrfs_disk_key_to_cpu(&key, disk_key);
6903
6904 array_ptr += len;
6905 sb_array_offset += len;
6906 cur_offset += len;
6907
6908 if (key.type != BTRFS_CHUNK_ITEM_KEY) {
6909 btrfs_err(fs_info,
6910 "unexpected item type %u in sys_array at offset %u",
6911 (u32)key.type, cur_offset);
6912 ret = -EIO;
6913 break;
6914 }
6915
6916 chunk = (struct btrfs_chunk *)sb_array_offset;
6917
6918
6919
6920
6921 len = btrfs_chunk_item_size(1);
6922 if (cur_offset + len > array_size)
6923 goto out_short_read;
6924
6925 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6926 if (!num_stripes) {
6927 btrfs_err(fs_info,
6928 "invalid number of stripes %u in sys_array at offset %u",
6929 num_stripes, cur_offset);
6930 ret = -EIO;
6931 break;
6932 }
6933
6934 type = btrfs_chunk_type(sb, chunk);
6935 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
6936 btrfs_err(fs_info,
6937 "invalid chunk type %llu in sys_array at offset %u",
6938 type, cur_offset);
6939 ret = -EIO;
6940 break;
6941 }
6942
6943 len = btrfs_chunk_item_size(num_stripes);
6944 if (cur_offset + len > array_size)
6945 goto out_short_read;
6946
6947 ret = read_one_chunk(&key, sb, chunk);
6948 if (ret)
6949 break;
6950
6951 array_ptr += len;
6952 sb_array_offset += len;
6953 cur_offset += len;
6954 }
6955 clear_extent_buffer_uptodate(sb);
6956 free_extent_buffer_stale(sb);
6957 return ret;
6958
6959out_short_read:
6960 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
6961 len, cur_offset);
6962 clear_extent_buffer_uptodate(sb);
6963 free_extent_buffer_stale(sb);
6964 return -EIO;
6965}
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
6976 struct btrfs_device *failing_dev)
6977{
6978 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
6979 struct extent_map *em;
6980 u64 next_start = 0;
6981 bool ret = true;
6982
6983 read_lock(&map_tree->lock);
6984 em = lookup_extent_mapping(map_tree, 0, (u64)-1);
6985 read_unlock(&map_tree->lock);
6986
6987 if (!em) {
6988 ret = false;
6989 goto out;
6990 }
6991 while (em) {
6992 struct map_lookup *map;
6993 int missing = 0;
6994 int max_tolerated;
6995 int i;
6996
6997 map = em->map_lookup;
6998 max_tolerated =
6999 btrfs_get_num_tolerated_disk_barrier_failures(
7000 map->type);
7001 for (i = 0; i < map->num_stripes; i++) {
7002 struct btrfs_device *dev = map->stripes[i].dev;
7003
7004 if (!dev || !dev->bdev ||
7005 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
7006 dev->last_flush_error)
7007 missing++;
7008 else if (failing_dev && failing_dev == dev)
7009 missing++;
7010 }
7011 if (missing > max_tolerated) {
7012 if (!failing_dev)
7013 btrfs_warn(fs_info,
7014 "chunk %llu missing %d devices, max tolerance is %d for writable mount",
7015 em->start, missing, max_tolerated);
7016 free_extent_map(em);
7017 ret = false;
7018 goto out;
7019 }
7020 next_start = extent_map_end(em);
7021 free_extent_map(em);
7022
7023 read_lock(&map_tree->lock);
7024 em = lookup_extent_mapping(map_tree, next_start,
7025 (u64)(-1) - next_start);
7026 read_unlock(&map_tree->lock);
7027 }
7028out:
7029 return ret;
7030}
7031
7032int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7033{
7034 struct btrfs_root *root = fs_info->chunk_root;
7035 struct btrfs_path *path;
7036 struct extent_buffer *leaf;
7037 struct btrfs_key key;
7038 struct btrfs_key found_key;
7039 int ret;
7040 int slot;
7041 u64 total_dev = 0;
7042
7043 path = btrfs_alloc_path();
7044 if (!path)
7045 return -ENOMEM;
7046
7047
7048
7049
7050
7051 mutex_lock(&uuid_mutex);
7052 mutex_lock(&fs_info->chunk_mutex);
7053
7054
7055
7056
7057
7058
7059
7060 fs_info->fs_devices->total_rw_bytes = 0;
7061
7062
7063
7064
7065
7066
7067
7068 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7069 key.offset = 0;
7070 key.type = 0;
7071 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7072 if (ret < 0)
7073 goto error;
7074 while (1) {
7075 leaf = path->nodes[0];
7076 slot = path->slots[0];
7077 if (slot >= btrfs_header_nritems(leaf)) {
7078 ret = btrfs_next_leaf(root, path);
7079 if (ret == 0)
7080 continue;
7081 if (ret < 0)
7082 goto error;
7083 break;
7084 }
7085 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7086 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7087 struct btrfs_dev_item *dev_item;
7088 dev_item = btrfs_item_ptr(leaf, slot,
7089 struct btrfs_dev_item);
7090 ret = read_one_dev(leaf, dev_item);
7091 if (ret)
7092 goto error;
7093 total_dev++;
7094 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7095 struct btrfs_chunk *chunk;
7096 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7097 ret = read_one_chunk(&found_key, leaf, chunk);
7098 if (ret)
7099 goto error;
7100 }
7101 path->slots[0]++;
7102 }
7103
7104
7105
7106
7107
7108 if (total_dev != fs_info->fs_devices->total_devices) {
7109 btrfs_err(fs_info,
7110 "super_num_devices %llu mismatch with num_devices %llu found here",
7111 btrfs_super_num_devices(fs_info->super_copy),
7112 total_dev);
7113 ret = -EINVAL;
7114 goto error;
7115 }
7116 if (btrfs_super_total_bytes(fs_info->super_copy) <
7117 fs_info->fs_devices->total_rw_bytes) {
7118 btrfs_err(fs_info,
7119 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7120 btrfs_super_total_bytes(fs_info->super_copy),
7121 fs_info->fs_devices->total_rw_bytes);
7122 ret = -EINVAL;
7123 goto error;
7124 }
7125 ret = 0;
7126error:
7127 mutex_unlock(&fs_info->chunk_mutex);
7128 mutex_unlock(&uuid_mutex);
7129
7130 btrfs_free_path(path);
7131 return ret;
7132}
7133
7134void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7135{
7136 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7137 struct btrfs_device *device;
7138
7139 while (fs_devices) {
7140 mutex_lock(&fs_devices->device_list_mutex);
7141 list_for_each_entry(device, &fs_devices->devices, dev_list)
7142 device->fs_info = fs_info;
7143 mutex_unlock(&fs_devices->device_list_mutex);
7144
7145 fs_devices = fs_devices->seed;
7146 }
7147}
7148
7149static u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
7150 const struct btrfs_dev_stats_item *ptr,
7151 int index)
7152{
7153 u64 val;
7154
7155 read_extent_buffer(eb, &val,
7156 offsetof(struct btrfs_dev_stats_item, values) +
7157 ((unsigned long)ptr) + (index * sizeof(u64)),
7158 sizeof(val));
7159 return val;
7160}
7161
7162static void btrfs_set_dev_stats_value(struct extent_buffer *eb,
7163 struct btrfs_dev_stats_item *ptr,
7164 int index, u64 val)
7165{
7166 write_extent_buffer(eb, &val,
7167 offsetof(struct btrfs_dev_stats_item, values) +
7168 ((unsigned long)ptr) + (index * sizeof(u64)),
7169 sizeof(val));
7170}
7171
7172int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7173{
7174 struct btrfs_key key;
7175 struct btrfs_root *dev_root = fs_info->dev_root;
7176 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7177 struct extent_buffer *eb;
7178 int slot;
7179 int ret = 0;
7180 struct btrfs_device *device;
7181 struct btrfs_path *path = NULL;
7182 int i;
7183
7184 path = btrfs_alloc_path();
7185 if (!path)
7186 return -ENOMEM;
7187
7188 mutex_lock(&fs_devices->device_list_mutex);
7189 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7190 int item_size;
7191 struct btrfs_dev_stats_item *ptr;
7192
7193 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7194 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7195 key.offset = device->devid;
7196 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
7197 if (ret) {
7198 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7199 btrfs_dev_stat_set(device, i, 0);
7200 device->dev_stats_valid = 1;
7201 btrfs_release_path(path);
7202 continue;
7203 }
7204 slot = path->slots[0];
7205 eb = path->nodes[0];
7206 item_size = btrfs_item_size_nr(eb, slot);
7207
7208 ptr = btrfs_item_ptr(eb, slot,
7209 struct btrfs_dev_stats_item);
7210
7211 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7212 if (item_size >= (1 + i) * sizeof(__le64))
7213 btrfs_dev_stat_set(device, i,
7214 btrfs_dev_stats_value(eb, ptr, i));
7215 else
7216 btrfs_dev_stat_set(device, i, 0);
7217 }
7218
7219 device->dev_stats_valid = 1;
7220 btrfs_dev_stat_print_on_load(device);
7221 btrfs_release_path(path);
7222 }
7223 mutex_unlock(&fs_devices->device_list_mutex);
7224
7225 btrfs_free_path(path);
7226 return ret < 0 ? ret : 0;
7227}
7228
7229static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7230 struct btrfs_device *device)
7231{
7232 struct btrfs_fs_info *fs_info = trans->fs_info;
7233 struct btrfs_root *dev_root = fs_info->dev_root;
7234 struct btrfs_path *path;
7235 struct btrfs_key key;
7236 struct extent_buffer *eb;
7237 struct btrfs_dev_stats_item *ptr;
7238 int ret;
7239 int i;
7240
7241 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7242 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7243 key.offset = device->devid;
7244
7245 path = btrfs_alloc_path();
7246 if (!path)
7247 return -ENOMEM;
7248 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7249 if (ret < 0) {
7250 btrfs_warn_in_rcu(fs_info,
7251 "error %d while searching for dev_stats item for device %s",
7252 ret, rcu_str_deref(device->name));
7253 goto out;
7254 }
7255
7256 if (ret == 0 &&
7257 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7258
7259 ret = btrfs_del_item(trans, dev_root, path);
7260 if (ret != 0) {
7261 btrfs_warn_in_rcu(fs_info,
7262 "delete too small dev_stats item for device %s failed %d",
7263 rcu_str_deref(device->name), ret);
7264 goto out;
7265 }
7266 ret = 1;
7267 }
7268
7269 if (ret == 1) {
7270
7271 btrfs_release_path(path);
7272 ret = btrfs_insert_empty_item(trans, dev_root, path,
7273 &key, sizeof(*ptr));
7274 if (ret < 0) {
7275 btrfs_warn_in_rcu(fs_info,
7276 "insert dev_stats item for device %s failed %d",
7277 rcu_str_deref(device->name), ret);
7278 goto out;
7279 }
7280 }
7281
7282 eb = path->nodes[0];
7283 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7284 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7285 btrfs_set_dev_stats_value(eb, ptr, i,
7286 btrfs_dev_stat_read(device, i));
7287 btrfs_mark_buffer_dirty(eb);
7288
7289out:
7290 btrfs_free_path(path);
7291 return ret;
7292}
7293
7294
7295
7296
7297int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7298{
7299 struct btrfs_fs_info *fs_info = trans->fs_info;
7300 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7301 struct btrfs_device *device;
7302 int stats_cnt;
7303 int ret = 0;
7304
7305 mutex_lock(&fs_devices->device_list_mutex);
7306 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7307 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7308 if (!device->dev_stats_valid || stats_cnt == 0)
7309 continue;
7310
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323 smp_rmb();
7324
7325 ret = update_dev_stat_item(trans, device);
7326 if (!ret)
7327 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7328 }
7329 mutex_unlock(&fs_devices->device_list_mutex);
7330
7331 return ret;
7332}
7333
7334void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7335{
7336 btrfs_dev_stat_inc(dev, index);
7337 btrfs_dev_stat_print_on_error(dev);
7338}
7339
7340static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7341{
7342 if (!dev->dev_stats_valid)
7343 return;
7344 btrfs_err_rl_in_rcu(dev->fs_info,
7345 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7346 rcu_str_deref(dev->name),
7347 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7348 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7349 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7350 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7351 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7352}
7353
7354static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7355{
7356 int i;
7357
7358 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7359 if (btrfs_dev_stat_read(dev, i) != 0)
7360 break;
7361 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7362 return;
7363
7364 btrfs_info_in_rcu(dev->fs_info,
7365 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7366 rcu_str_deref(dev->name),
7367 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7368 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7369 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7370 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7371 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7372}
7373
7374int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7375 struct btrfs_ioctl_get_dev_stats *stats)
7376{
7377 struct btrfs_device *dev;
7378 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7379 int i;
7380
7381 mutex_lock(&fs_devices->device_list_mutex);
7382 dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
7383 true);
7384 mutex_unlock(&fs_devices->device_list_mutex);
7385
7386 if (!dev) {
7387 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7388 return -ENODEV;
7389 } else if (!dev->dev_stats_valid) {
7390 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7391 return -ENODEV;
7392 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7393 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7394 if (stats->nr_items > i)
7395 stats->values[i] =
7396 btrfs_dev_stat_read_and_reset(dev, i);
7397 else
7398 btrfs_dev_stat_set(dev, i, 0);
7399 }
7400 btrfs_info(fs_info, "device stats zeroed by %s (%d)",
7401 current->comm, task_pid_nr(current));
7402 } else {
7403 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7404 if (stats->nr_items > i)
7405 stats->values[i] = btrfs_dev_stat_read(dev, i);
7406 }
7407 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7408 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7409 return 0;
7410}
7411
7412
7413
7414
7415
7416
7417
7418
7419void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7420{
7421 struct btrfs_device *curr, *next;
7422
7423 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7424
7425 if (list_empty(&trans->dev_update_list))
7426 return;
7427
7428
7429
7430
7431
7432
7433 mutex_lock(&trans->fs_info->chunk_mutex);
7434 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7435 post_commit_list) {
7436 list_del_init(&curr->post_commit_list);
7437 curr->commit_total_bytes = curr->disk_total_bytes;
7438 curr->commit_bytes_used = curr->bytes_used;
7439 }
7440 mutex_unlock(&trans->fs_info->chunk_mutex);
7441}
7442
7443void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
7444{
7445 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7446 while (fs_devices) {
7447 fs_devices->fs_info = fs_info;
7448 fs_devices = fs_devices->seed;
7449 }
7450}
7451
7452void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
7453{
7454 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7455 while (fs_devices) {
7456 fs_devices->fs_info = NULL;
7457 fs_devices = fs_devices->seed;
7458 }
7459}
7460
7461
7462
7463
7464int btrfs_bg_type_to_factor(u64 flags)
7465{
7466 const int index = btrfs_bg_flags_to_raid_index(flags);
7467
7468 return btrfs_raid_array[index].ncopies;
7469}
7470
7471
7472
7473static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7474 u64 chunk_offset, u64 devid,
7475 u64 physical_offset, u64 physical_len)
7476{
7477 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7478 struct extent_map *em;
7479 struct map_lookup *map;
7480 struct btrfs_device *dev;
7481 u64 stripe_len;
7482 bool found = false;
7483 int ret = 0;
7484 int i;
7485
7486 read_lock(&em_tree->lock);
7487 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
7488 read_unlock(&em_tree->lock);
7489
7490 if (!em) {
7491 btrfs_err(fs_info,
7492"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
7493 physical_offset, devid);
7494 ret = -EUCLEAN;
7495 goto out;
7496 }
7497
7498 map = em->map_lookup;
7499 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
7500 if (physical_len != stripe_len) {
7501 btrfs_err(fs_info,
7502"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
7503 physical_offset, devid, em->start, physical_len,
7504 stripe_len);
7505 ret = -EUCLEAN;
7506 goto out;
7507 }
7508
7509 for (i = 0; i < map->num_stripes; i++) {
7510 if (map->stripes[i].dev->devid == devid &&
7511 map->stripes[i].physical == physical_offset) {
7512 found = true;
7513 if (map->verified_stripes >= map->num_stripes) {
7514 btrfs_err(fs_info,
7515 "too many dev extents for chunk %llu found",
7516 em->start);
7517 ret = -EUCLEAN;
7518 goto out;
7519 }
7520 map->verified_stripes++;
7521 break;
7522 }
7523 }
7524 if (!found) {
7525 btrfs_err(fs_info,
7526 "dev extent physical offset %llu devid %llu has no corresponding chunk",
7527 physical_offset, devid);
7528 ret = -EUCLEAN;
7529 }
7530
7531
7532 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
7533 if (!dev) {
7534 btrfs_err(fs_info, "failed to find devid %llu", devid);
7535 ret = -EUCLEAN;
7536 goto out;
7537 }
7538
7539
7540 if (dev->disk_total_bytes == 0) {
7541 dev = btrfs_find_device(fs_info->fs_devices->seed, devid, NULL,
7542 NULL, false);
7543 if (!dev) {
7544 btrfs_err(fs_info, "failed to find seed devid %llu",
7545 devid);
7546 ret = -EUCLEAN;
7547 goto out;
7548 }
7549 }
7550
7551 if (physical_offset + physical_len > dev->disk_total_bytes) {
7552 btrfs_err(fs_info,
7553"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
7554 devid, physical_offset, physical_len,
7555 dev->disk_total_bytes);
7556 ret = -EUCLEAN;
7557 goto out;
7558 }
7559out:
7560 free_extent_map(em);
7561 return ret;
7562}
7563
7564static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
7565{
7566 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7567 struct extent_map *em;
7568 struct rb_node *node;
7569 int ret = 0;
7570
7571 read_lock(&em_tree->lock);
7572 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
7573 em = rb_entry(node, struct extent_map, rb_node);
7574 if (em->map_lookup->num_stripes !=
7575 em->map_lookup->verified_stripes) {
7576 btrfs_err(fs_info,
7577 "chunk %llu has missing dev extent, have %d expect %d",
7578 em->start, em->map_lookup->verified_stripes,
7579 em->map_lookup->num_stripes);
7580 ret = -EUCLEAN;
7581 goto out;
7582 }
7583 }
7584out:
7585 read_unlock(&em_tree->lock);
7586 return ret;
7587}
7588
7589
7590
7591
7592
7593
7594
7595
7596int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
7597{
7598 struct btrfs_path *path;
7599 struct btrfs_root *root = fs_info->dev_root;
7600 struct btrfs_key key;
7601 u64 prev_devid = 0;
7602 u64 prev_dev_ext_end = 0;
7603 int ret = 0;
7604
7605 key.objectid = 1;
7606 key.type = BTRFS_DEV_EXTENT_KEY;
7607 key.offset = 0;
7608
7609 path = btrfs_alloc_path();
7610 if (!path)
7611 return -ENOMEM;
7612
7613 path->reada = READA_FORWARD;
7614 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7615 if (ret < 0)
7616 goto out;
7617
7618 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7619 ret = btrfs_next_item(root, path);
7620 if (ret < 0)
7621 goto out;
7622
7623 if (ret > 0) {
7624 ret = -EUCLEAN;
7625 goto out;
7626 }
7627 }
7628 while (1) {
7629 struct extent_buffer *leaf = path->nodes[0];
7630 struct btrfs_dev_extent *dext;
7631 int slot = path->slots[0];
7632 u64 chunk_offset;
7633 u64 physical_offset;
7634 u64 physical_len;
7635 u64 devid;
7636
7637 btrfs_item_key_to_cpu(leaf, &key, slot);
7638 if (key.type != BTRFS_DEV_EXTENT_KEY)
7639 break;
7640 devid = key.objectid;
7641 physical_offset = key.offset;
7642
7643 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7644 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
7645 physical_len = btrfs_dev_extent_length(leaf, dext);
7646
7647
7648 if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
7649 btrfs_err(fs_info,
7650"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
7651 devid, physical_offset, prev_dev_ext_end);
7652 ret = -EUCLEAN;
7653 goto out;
7654 }
7655
7656 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
7657 physical_offset, physical_len);
7658 if (ret < 0)
7659 goto out;
7660 prev_devid = devid;
7661 prev_dev_ext_end = physical_offset + physical_len;
7662
7663 ret = btrfs_next_item(root, path);
7664 if (ret < 0)
7665 goto out;
7666 if (ret > 0) {
7667 ret = 0;
7668 break;
7669 }
7670 }
7671
7672
7673 ret = verify_chunk_dev_extent_mapping(fs_info);
7674out:
7675 btrfs_free_path(path);
7676 return ret;
7677}
7678
7679
7680
7681
7682
7683bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
7684{
7685 struct btrfs_swapfile_pin *sp;
7686 struct rb_node *node;
7687
7688 spin_lock(&fs_info->swapfile_pins_lock);
7689 node = fs_info->swapfile_pins.rb_node;
7690 while (node) {
7691 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
7692 if (ptr < sp->ptr)
7693 node = node->rb_left;
7694 else if (ptr > sp->ptr)
7695 node = node->rb_right;
7696 else
7697 break;
7698 }
7699 spin_unlock(&fs_info->swapfile_pins_lock);
7700 return node != NULL;
7701}
7702