1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/bio.h>
8#include <linux/slab.h>
9#include <linux/buffer_head.h>
10#include <linux/blkdev.h>
11#include <linux/ratelimit.h>
12#include <linux/kthread.h>
13#include <linux/raid/pq.h>
14#include <linux/semaphore.h>
15#include <linux/uuid.h>
16#include <linux/list_sort.h>
17#include "ctree.h"
18#include "extent_map.h"
19#include "disk-io.h"
20#include "transaction.h"
21#include "print-tree.h"
22#include "volumes.h"
23#include "raid56.h"
24#include "async-thread.h"
25#include "check-integrity.h"
26#include "rcu-string.h"
27#include "math.h"
28#include "dev-replace.h"
29#include "sysfs.h"
30#include "tree-checker.h"
31#include "space-info.h"
32
33const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
34 [BTRFS_RAID_RAID10] = {
35 .sub_stripes = 2,
36 .dev_stripes = 1,
37 .devs_max = 0,
38 .devs_min = 4,
39 .tolerated_failures = 1,
40 .devs_increment = 2,
41 .ncopies = 2,
42 .nparity = 0,
43 .raid_name = "raid10",
44 .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
45 .mindev_error = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
46 },
47 [BTRFS_RAID_RAID1] = {
48 .sub_stripes = 1,
49 .dev_stripes = 1,
50 .devs_max = 2,
51 .devs_min = 2,
52 .tolerated_failures = 1,
53 .devs_increment = 2,
54 .ncopies = 2,
55 .nparity = 0,
56 .raid_name = "raid1",
57 .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
58 .mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
59 },
60 [BTRFS_RAID_DUP] = {
61 .sub_stripes = 1,
62 .dev_stripes = 2,
63 .devs_max = 1,
64 .devs_min = 1,
65 .tolerated_failures = 0,
66 .devs_increment = 1,
67 .ncopies = 2,
68 .nparity = 0,
69 .raid_name = "dup",
70 .bg_flag = BTRFS_BLOCK_GROUP_DUP,
71 .mindev_error = 0,
72 },
73 [BTRFS_RAID_RAID0] = {
74 .sub_stripes = 1,
75 .dev_stripes = 1,
76 .devs_max = 0,
77 .devs_min = 2,
78 .tolerated_failures = 0,
79 .devs_increment = 1,
80 .ncopies = 1,
81 .nparity = 0,
82 .raid_name = "raid0",
83 .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
84 .mindev_error = 0,
85 },
86 [BTRFS_RAID_SINGLE] = {
87 .sub_stripes = 1,
88 .dev_stripes = 1,
89 .devs_max = 1,
90 .devs_min = 1,
91 .tolerated_failures = 0,
92 .devs_increment = 1,
93 .ncopies = 1,
94 .nparity = 0,
95 .raid_name = "single",
96 .bg_flag = 0,
97 .mindev_error = 0,
98 },
99 [BTRFS_RAID_RAID5] = {
100 .sub_stripes = 1,
101 .dev_stripes = 1,
102 .devs_max = 0,
103 .devs_min = 2,
104 .tolerated_failures = 1,
105 .devs_increment = 1,
106 .ncopies = 1,
107 .nparity = 1,
108 .raid_name = "raid5",
109 .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
110 .mindev_error = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
111 },
112 [BTRFS_RAID_RAID6] = {
113 .sub_stripes = 1,
114 .dev_stripes = 1,
115 .devs_max = 0,
116 .devs_min = 3,
117 .tolerated_failures = 2,
118 .devs_increment = 1,
119 .ncopies = 1,
120 .nparity = 2,
121 .raid_name = "raid6",
122 .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
123 .mindev_error = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
124 },
125};
126
127const char *btrfs_bg_type_to_raid_name(u64 flags)
128{
129 const int index = btrfs_bg_flags_to_raid_index(flags);
130
131 if (index >= BTRFS_NR_RAID_TYPES)
132 return NULL;
133
134 return btrfs_raid_array[index].raid_name;
135}
136
137
138
139
140
141void btrfs_describe_block_groups(u64 bg_flags, char *buf, u32 size_buf)
142{
143 int i;
144 int ret;
145 char *bp = buf;
146 u64 flags = bg_flags;
147 u32 size_bp = size_buf;
148
149 if (!flags) {
150 strcpy(bp, "NONE");
151 return;
152 }
153
154#define DESCRIBE_FLAG(flag, desc) \
155 do { \
156 if (flags & (flag)) { \
157 ret = snprintf(bp, size_bp, "%s|", (desc)); \
158 if (ret < 0 || ret >= size_bp) \
159 goto out_overflow; \
160 size_bp -= ret; \
161 bp += ret; \
162 flags &= ~(flag); \
163 } \
164 } while (0)
165
166 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_DATA, "data");
167 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_SYSTEM, "system");
168 DESCRIBE_FLAG(BTRFS_BLOCK_GROUP_METADATA, "metadata");
169
170 DESCRIBE_FLAG(BTRFS_AVAIL_ALLOC_BIT_SINGLE, "single");
171 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
172 DESCRIBE_FLAG(btrfs_raid_array[i].bg_flag,
173 btrfs_raid_array[i].raid_name);
174#undef DESCRIBE_FLAG
175
176 if (flags) {
177 ret = snprintf(bp, size_bp, "0x%llx|", flags);
178 size_bp -= ret;
179 }
180
181 if (size_bp < size_buf)
182 buf[size_buf - size_bp - 1] = '\0';
183
184
185
186
187
188out_overflow:;
189}
190
191static int init_first_rw_device(struct btrfs_trans_handle *trans);
192static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
193static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
194static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
195static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
196static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
197 enum btrfs_map_op op,
198 u64 logical, u64 *length,
199 struct btrfs_bio **bbio_ret,
200 int mirror_num, int need_raid_map);
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298DEFINE_MUTEX(uuid_mutex);
299static LIST_HEAD(fs_uuids);
300struct list_head *btrfs_get_fs_uuids(void)
301{
302 return &fs_uuids;
303}
304
305
306
307
308
309
310
311
312
313
314static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
315 const u8 *metadata_fsid)
316{
317 struct btrfs_fs_devices *fs_devs;
318
319 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
320 if (!fs_devs)
321 return ERR_PTR(-ENOMEM);
322
323 mutex_init(&fs_devs->device_list_mutex);
324
325 INIT_LIST_HEAD(&fs_devs->devices);
326 INIT_LIST_HEAD(&fs_devs->alloc_list);
327 INIT_LIST_HEAD(&fs_devs->fs_list);
328 if (fsid)
329 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
330
331 if (metadata_fsid)
332 memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
333 else if (fsid)
334 memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
335
336 return fs_devs;
337}
338
339void btrfs_free_device(struct btrfs_device *device)
340{
341 WARN_ON(!list_empty(&device->post_commit_list));
342 rcu_string_free(device->name);
343 extent_io_tree_release(&device->alloc_state);
344 bio_put(device->flush_bio);
345 kfree(device);
346}
347
348static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
349{
350 struct btrfs_device *device;
351 WARN_ON(fs_devices->opened);
352 while (!list_empty(&fs_devices->devices)) {
353 device = list_entry(fs_devices->devices.next,
354 struct btrfs_device, dev_list);
355 list_del(&device->dev_list);
356 btrfs_free_device(device);
357 }
358 kfree(fs_devices);
359}
360
361static void btrfs_kobject_uevent(struct block_device *bdev,
362 enum kobject_action action)
363{
364 int ret;
365
366 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
367 if (ret)
368 pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
369 action,
370 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
371 &disk_to_dev(bdev->bd_disk)->kobj);
372}
373
374void __exit btrfs_cleanup_fs_uuids(void)
375{
376 struct btrfs_fs_devices *fs_devices;
377
378 while (!list_empty(&fs_uuids)) {
379 fs_devices = list_entry(fs_uuids.next,
380 struct btrfs_fs_devices, fs_list);
381 list_del(&fs_devices->fs_list);
382 free_fs_devices(fs_devices);
383 }
384}
385
386
387
388
389
390
391static struct btrfs_device *__alloc_device(void)
392{
393 struct btrfs_device *dev;
394
395 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
396 if (!dev)
397 return ERR_PTR(-ENOMEM);
398
399
400
401
402
403 dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
404 if (!dev->flush_bio) {
405 kfree(dev);
406 return ERR_PTR(-ENOMEM);
407 }
408
409 INIT_LIST_HEAD(&dev->dev_list);
410 INIT_LIST_HEAD(&dev->dev_alloc_list);
411 INIT_LIST_HEAD(&dev->post_commit_list);
412
413 spin_lock_init(&dev->io_lock);
414
415 atomic_set(&dev->reada_in_flight, 0);
416 atomic_set(&dev->dev_stats_ccnt, 0);
417 btrfs_device_data_ordered_init(dev);
418 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
419 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
420 extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
421
422 return dev;
423}
424
425static noinline struct btrfs_fs_devices *find_fsid(
426 const u8 *fsid, const u8 *metadata_fsid)
427{
428 struct btrfs_fs_devices *fs_devices;
429
430 ASSERT(fsid);
431
432 if (metadata_fsid) {
433
434
435
436
437
438
439 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
440 if (fs_devices->fsid_change &&
441 memcmp(metadata_fsid, fs_devices->fsid,
442 BTRFS_FSID_SIZE) == 0 &&
443 memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
444 BTRFS_FSID_SIZE) == 0) {
445 return fs_devices;
446 }
447 }
448
449
450
451
452
453
454 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
455 if (fs_devices->fsid_change &&
456 memcmp(fs_devices->metadata_uuid,
457 fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
458 memcmp(metadata_fsid, fs_devices->metadata_uuid,
459 BTRFS_FSID_SIZE) == 0) {
460 return fs_devices;
461 }
462 }
463 }
464
465
466 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
467 if (metadata_fsid) {
468 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
469 && memcmp(metadata_fsid, fs_devices->metadata_uuid,
470 BTRFS_FSID_SIZE) == 0)
471 return fs_devices;
472 } else {
473 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
474 return fs_devices;
475 }
476 }
477 return NULL;
478}
479
480static int
481btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
482 int flush, struct block_device **bdev,
483 struct buffer_head **bh)
484{
485 int ret;
486
487 *bdev = blkdev_get_by_path(device_path, flags, holder);
488
489 if (IS_ERR(*bdev)) {
490 ret = PTR_ERR(*bdev);
491 goto error;
492 }
493
494 if (flush)
495 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
496 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
497 if (ret) {
498 blkdev_put(*bdev, flags);
499 goto error;
500 }
501 invalidate_bdev(*bdev);
502 *bh = btrfs_read_dev_super(*bdev);
503 if (IS_ERR(*bh)) {
504 ret = PTR_ERR(*bh);
505 blkdev_put(*bdev, flags);
506 goto error;
507 }
508
509 return 0;
510
511error:
512 *bdev = NULL;
513 *bh = NULL;
514 return ret;
515}
516
517static void requeue_list(struct btrfs_pending_bios *pending_bios,
518 struct bio *head, struct bio *tail)
519{
520
521 struct bio *old_head;
522
523 old_head = pending_bios->head;
524 pending_bios->head = head;
525 if (pending_bios->tail)
526 tail->bi_next = old_head;
527 else
528 pending_bios->tail = tail;
529}
530
531
532
533
534
535
536
537
538
539
540
541
542static noinline void run_scheduled_bios(struct btrfs_device *device)
543{
544 struct btrfs_fs_info *fs_info = device->fs_info;
545 struct bio *pending;
546 struct backing_dev_info *bdi;
547 struct btrfs_pending_bios *pending_bios;
548 struct bio *tail;
549 struct bio *cur;
550 int again = 0;
551 unsigned long num_run;
552 unsigned long batch_run = 0;
553 unsigned long last_waited = 0;
554 int force_reg = 0;
555 int sync_pending = 0;
556 struct blk_plug plug;
557
558
559
560
561
562
563
564 blk_start_plug(&plug);
565
566 bdi = device->bdev->bd_bdi;
567
568loop:
569 spin_lock(&device->io_lock);
570
571loop_lock:
572 num_run = 0;
573
574
575
576
577
578
579 if (!force_reg && device->pending_sync_bios.head) {
580 pending_bios = &device->pending_sync_bios;
581 force_reg = 1;
582 } else {
583 pending_bios = &device->pending_bios;
584 force_reg = 0;
585 }
586
587 pending = pending_bios->head;
588 tail = pending_bios->tail;
589 WARN_ON(pending && !tail);
590
591
592
593
594
595
596
597
598
599 if (device->pending_sync_bios.head == NULL &&
600 device->pending_bios.head == NULL) {
601 again = 0;
602 device->running_pending = 0;
603 } else {
604 again = 1;
605 device->running_pending = 1;
606 }
607
608 pending_bios->head = NULL;
609 pending_bios->tail = NULL;
610
611 spin_unlock(&device->io_lock);
612
613 while (pending) {
614
615 rmb();
616
617
618
619 if ((num_run > 32 &&
620 pending_bios != &device->pending_sync_bios &&
621 device->pending_sync_bios.head) ||
622 (num_run > 64 && pending_bios == &device->pending_sync_bios &&
623 device->pending_bios.head)) {
624 spin_lock(&device->io_lock);
625 requeue_list(pending_bios, pending, tail);
626 goto loop_lock;
627 }
628
629 cur = pending;
630 pending = pending->bi_next;
631 cur->bi_next = NULL;
632
633 BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
634
635
636
637
638
639
640
641
642
643 if (pending_bios == &device->pending_sync_bios) {
644 sync_pending = 1;
645 } else if (sync_pending) {
646 blk_finish_plug(&plug);
647 blk_start_plug(&plug);
648 sync_pending = 0;
649 }
650
651 btrfsic_submit_bio(cur);
652 num_run++;
653 batch_run++;
654
655 cond_resched();
656
657
658
659
660
661
662 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
663 fs_info->fs_devices->open_devices > 1) {
664 struct io_context *ioc;
665
666 ioc = current->io_context;
667
668
669
670
671
672
673
674
675
676
677 if (ioc && ioc->nr_batch_requests > 0 &&
678 time_before(jiffies, ioc->last_waited + HZ/50UL) &&
679 (last_waited == 0 ||
680 ioc->last_waited == last_waited)) {
681
682
683
684
685
686
687 last_waited = ioc->last_waited;
688 cond_resched();
689 continue;
690 }
691 spin_lock(&device->io_lock);
692 requeue_list(pending_bios, pending, tail);
693 device->running_pending = 1;
694
695 spin_unlock(&device->io_lock);
696 btrfs_queue_work(fs_info->submit_workers,
697 &device->work);
698 goto done;
699 }
700 }
701
702 cond_resched();
703 if (again)
704 goto loop;
705
706 spin_lock(&device->io_lock);
707 if (device->pending_bios.head || device->pending_sync_bios.head)
708 goto loop_lock;
709 spin_unlock(&device->io_lock);
710
711done:
712 blk_finish_plug(&plug);
713}
714
715static void pending_bios_fn(struct btrfs_work *work)
716{
717 struct btrfs_device *device;
718
719 device = container_of(work, struct btrfs_device, work);
720 run_scheduled_bios(device);
721}
722
723static bool device_path_matched(const char *path, struct btrfs_device *device)
724{
725 int found;
726
727 rcu_read_lock();
728 found = strcmp(rcu_str_deref(device->name), path);
729 rcu_read_unlock();
730
731 return found == 0;
732}
733
734
735
736
737
738
739
740
741
742
743
744
745static int btrfs_free_stale_devices(const char *path,
746 struct btrfs_device *skip_device)
747{
748 struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
749 struct btrfs_device *device, *tmp_device;
750 int ret = 0;
751
752 if (path)
753 ret = -ENOENT;
754
755 list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
756
757 mutex_lock(&fs_devices->device_list_mutex);
758 list_for_each_entry_safe(device, tmp_device,
759 &fs_devices->devices, dev_list) {
760 if (skip_device && skip_device == device)
761 continue;
762 if (path && !device->name)
763 continue;
764 if (path && !device_path_matched(path, device))
765 continue;
766 if (fs_devices->opened) {
767
768 if (path && ret != 0)
769 ret = -EBUSY;
770 break;
771 }
772
773
774 fs_devices->num_devices--;
775 list_del(&device->dev_list);
776 btrfs_free_device(device);
777
778 ret = 0;
779 if (fs_devices->num_devices == 0)
780 break;
781 }
782 mutex_unlock(&fs_devices->device_list_mutex);
783
784 if (fs_devices->num_devices == 0) {
785 btrfs_sysfs_remove_fsid(fs_devices);
786 list_del(&fs_devices->fs_list);
787 free_fs_devices(fs_devices);
788 }
789 }
790
791 return ret;
792}
793
794static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
795 struct btrfs_device *device, fmode_t flags,
796 void *holder)
797{
798 struct request_queue *q;
799 struct block_device *bdev;
800 struct buffer_head *bh;
801 struct btrfs_super_block *disk_super;
802 u64 devid;
803 int ret;
804
805 if (device->bdev)
806 return -EINVAL;
807 if (!device->name)
808 return -EINVAL;
809
810 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
811 &bdev, &bh);
812 if (ret)
813 return ret;
814
815 disk_super = (struct btrfs_super_block *)bh->b_data;
816 devid = btrfs_stack_device_id(&disk_super->dev_item);
817 if (devid != device->devid)
818 goto error_brelse;
819
820 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
821 goto error_brelse;
822
823 device->generation = btrfs_super_generation(disk_super);
824
825 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
826 if (btrfs_super_incompat_flags(disk_super) &
827 BTRFS_FEATURE_INCOMPAT_METADATA_UUID) {
828 pr_err(
829 "BTRFS: Invalid seeding and uuid-changed device detected\n");
830 goto error_brelse;
831 }
832
833 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
834 fs_devices->seeding = 1;
835 } else {
836 if (bdev_read_only(bdev))
837 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
838 else
839 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
840 }
841
842 q = bdev_get_queue(bdev);
843 if (!blk_queue_nonrot(q))
844 fs_devices->rotating = 1;
845
846 device->bdev = bdev;
847 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
848 device->mode = flags;
849
850 fs_devices->open_devices++;
851 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
852 device->devid != BTRFS_DEV_REPLACE_DEVID) {
853 fs_devices->rw_devices++;
854 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
855 }
856 brelse(bh);
857
858 return 0;
859
860error_brelse:
861 brelse(bh);
862 blkdev_put(bdev, flags);
863
864 return -EINVAL;
865}
866
867
868
869
870
871static struct btrfs_fs_devices *find_fsid_inprogress(
872 struct btrfs_super_block *disk_super)
873{
874 struct btrfs_fs_devices *fs_devices;
875
876 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
877 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
878 BTRFS_FSID_SIZE) != 0 &&
879 memcmp(fs_devices->metadata_uuid, disk_super->fsid,
880 BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
881 return fs_devices;
882 }
883 }
884
885 return NULL;
886}
887
888
889static struct btrfs_fs_devices *find_fsid_changed(
890 struct btrfs_super_block *disk_super)
891{
892 struct btrfs_fs_devices *fs_devices;
893
894
895
896
897
898
899 list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
900 if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
901 BTRFS_FSID_SIZE) != 0 &&
902 memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
903 BTRFS_FSID_SIZE) == 0 &&
904 memcmp(fs_devices->fsid, disk_super->fsid,
905 BTRFS_FSID_SIZE) != 0) {
906 return fs_devices;
907 }
908 }
909
910 return NULL;
911}
912
913
914
915
916
917
918
919static noinline struct btrfs_device *device_list_add(const char *path,
920 struct btrfs_super_block *disk_super,
921 bool *new_device_added)
922{
923 struct btrfs_device *device;
924 struct btrfs_fs_devices *fs_devices = NULL;
925 struct rcu_string *name;
926 u64 found_transid = btrfs_super_generation(disk_super);
927 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
928 bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
929 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
930 bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
931 BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
932
933 if (fsid_change_in_progress) {
934 if (!has_metadata_uuid) {
935
936
937
938
939
940
941 fs_devices = find_fsid_inprogress(disk_super);
942 if (!fs_devices)
943 fs_devices = find_fsid(disk_super->fsid, NULL);
944 } else {
945 fs_devices = find_fsid_changed(disk_super);
946 }
947 } else if (has_metadata_uuid) {
948 fs_devices = find_fsid(disk_super->fsid,
949 disk_super->metadata_uuid);
950 } else {
951 fs_devices = find_fsid(disk_super->fsid, NULL);
952 }
953
954
955 if (!fs_devices) {
956 if (has_metadata_uuid)
957 fs_devices = alloc_fs_devices(disk_super->fsid,
958 disk_super->metadata_uuid);
959 else
960 fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
961
962 if (IS_ERR(fs_devices))
963 return ERR_CAST(fs_devices);
964
965 fs_devices->fsid_change = fsid_change_in_progress;
966
967 mutex_lock(&fs_devices->device_list_mutex);
968 list_add(&fs_devices->fs_list, &fs_uuids);
969
970 device = NULL;
971 } else {
972 mutex_lock(&fs_devices->device_list_mutex);
973 device = btrfs_find_device(fs_devices, devid,
974 disk_super->dev_item.uuid, NULL, false);
975
976
977
978
979
980
981 if (has_metadata_uuid && fs_devices->fsid_change &&
982 found_transid > fs_devices->latest_generation) {
983 memcpy(fs_devices->fsid, disk_super->fsid,
984 BTRFS_FSID_SIZE);
985 memcpy(fs_devices->metadata_uuid,
986 disk_super->metadata_uuid, BTRFS_FSID_SIZE);
987
988 fs_devices->fsid_change = false;
989 }
990 }
991
992 if (!device) {
993 if (fs_devices->opened) {
994 mutex_unlock(&fs_devices->device_list_mutex);
995 return ERR_PTR(-EBUSY);
996 }
997
998 device = btrfs_alloc_device(NULL, &devid,
999 disk_super->dev_item.uuid);
1000 if (IS_ERR(device)) {
1001 mutex_unlock(&fs_devices->device_list_mutex);
1002
1003 return device;
1004 }
1005
1006 name = rcu_string_strdup(path, GFP_NOFS);
1007 if (!name) {
1008 btrfs_free_device(device);
1009 mutex_unlock(&fs_devices->device_list_mutex);
1010 return ERR_PTR(-ENOMEM);
1011 }
1012 rcu_assign_pointer(device->name, name);
1013
1014 list_add_rcu(&device->dev_list, &fs_devices->devices);
1015 fs_devices->num_devices++;
1016
1017 device->fs_devices = fs_devices;
1018 *new_device_added = true;
1019
1020 if (disk_super->label[0])
1021 pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
1022 disk_super->label, devid, found_transid, path);
1023 else
1024 pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n",
1025 disk_super->fsid, devid, found_transid, path);
1026
1027 } else if (!device->name || strcmp(device->name->str, path)) {
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054 if (!fs_devices->opened && found_transid < device->generation) {
1055
1056
1057
1058
1059
1060
1061
1062 mutex_unlock(&fs_devices->device_list_mutex);
1063 return ERR_PTR(-EEXIST);
1064 }
1065
1066
1067
1068
1069
1070 if (device->bdev) {
1071 struct block_device *path_bdev;
1072
1073 path_bdev = lookup_bdev(path);
1074 if (IS_ERR(path_bdev)) {
1075 mutex_unlock(&fs_devices->device_list_mutex);
1076 return ERR_CAST(path_bdev);
1077 }
1078
1079 if (device->bdev != path_bdev) {
1080 bdput(path_bdev);
1081 mutex_unlock(&fs_devices->device_list_mutex);
1082 btrfs_warn_in_rcu(device->fs_info,
1083 "duplicate device fsid:devid for %pU:%llu old:%s new:%s",
1084 disk_super->fsid, devid,
1085 rcu_str_deref(device->name), path);
1086 return ERR_PTR(-EEXIST);
1087 }
1088 bdput(path_bdev);
1089 btrfs_info_in_rcu(device->fs_info,
1090 "device fsid %pU devid %llu moved old:%s new:%s",
1091 disk_super->fsid, devid,
1092 rcu_str_deref(device->name), path);
1093 }
1094
1095 name = rcu_string_strdup(path, GFP_NOFS);
1096 if (!name) {
1097 mutex_unlock(&fs_devices->device_list_mutex);
1098 return ERR_PTR(-ENOMEM);
1099 }
1100 rcu_string_free(device->name);
1101 rcu_assign_pointer(device->name, name);
1102 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
1103 fs_devices->missing_devices--;
1104 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
1105 }
1106 }
1107
1108
1109
1110
1111
1112
1113
1114 if (!fs_devices->opened) {
1115 device->generation = found_transid;
1116 fs_devices->latest_generation = max_t(u64, found_transid,
1117 fs_devices->latest_generation);
1118 }
1119
1120 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
1121
1122 mutex_unlock(&fs_devices->device_list_mutex);
1123 return device;
1124}
1125
1126static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
1127{
1128 struct btrfs_fs_devices *fs_devices;
1129 struct btrfs_device *device;
1130 struct btrfs_device *orig_dev;
1131
1132 fs_devices = alloc_fs_devices(orig->fsid, NULL);
1133 if (IS_ERR(fs_devices))
1134 return fs_devices;
1135
1136 mutex_lock(&orig->device_list_mutex);
1137 fs_devices->total_devices = orig->total_devices;
1138
1139 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
1140 struct rcu_string *name;
1141
1142 device = btrfs_alloc_device(NULL, &orig_dev->devid,
1143 orig_dev->uuid);
1144 if (IS_ERR(device))
1145 goto error;
1146
1147
1148
1149
1150
1151 if (orig_dev->name) {
1152 name = rcu_string_strdup(orig_dev->name->str,
1153 GFP_KERNEL);
1154 if (!name) {
1155 btrfs_free_device(device);
1156 goto error;
1157 }
1158 rcu_assign_pointer(device->name, name);
1159 }
1160
1161 list_add(&device->dev_list, &fs_devices->devices);
1162 device->fs_devices = fs_devices;
1163 fs_devices->num_devices++;
1164 }
1165 mutex_unlock(&orig->device_list_mutex);
1166 return fs_devices;
1167error:
1168 mutex_unlock(&orig->device_list_mutex);
1169 free_fs_devices(fs_devices);
1170 return ERR_PTR(-ENOMEM);
1171}
1172
1173
1174
1175
1176
1177void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
1178{
1179 struct btrfs_device *device, *next;
1180 struct btrfs_device *latest_dev = NULL;
1181
1182 mutex_lock(&uuid_mutex);
1183again:
1184
1185 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
1186 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
1187 &device->dev_state)) {
1188 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1189 &device->dev_state) &&
1190 (!latest_dev ||
1191 device->generation > latest_dev->generation)) {
1192 latest_dev = device;
1193 }
1194 continue;
1195 }
1196
1197 if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208 if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1209 &device->dev_state)) {
1210 continue;
1211 }
1212 }
1213 if (device->bdev) {
1214 blkdev_put(device->bdev, device->mode);
1215 device->bdev = NULL;
1216 fs_devices->open_devices--;
1217 }
1218 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1219 list_del_init(&device->dev_alloc_list);
1220 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
1221 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
1222 &device->dev_state))
1223 fs_devices->rw_devices--;
1224 }
1225 list_del_init(&device->dev_list);
1226 fs_devices->num_devices--;
1227 btrfs_free_device(device);
1228 }
1229
1230 if (fs_devices->seed) {
1231 fs_devices = fs_devices->seed;
1232 goto again;
1233 }
1234
1235 fs_devices->latest_bdev = latest_dev->bdev;
1236
1237 mutex_unlock(&uuid_mutex);
1238}
1239
1240static void btrfs_close_bdev(struct btrfs_device *device)
1241{
1242 if (!device->bdev)
1243 return;
1244
1245 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1246 sync_blockdev(device->bdev);
1247 invalidate_bdev(device->bdev);
1248 }
1249
1250 blkdev_put(device->bdev, device->mode);
1251}
1252
1253static void btrfs_close_one_device(struct btrfs_device *device)
1254{
1255 struct btrfs_fs_devices *fs_devices = device->fs_devices;
1256 struct btrfs_device *new_device;
1257 struct rcu_string *name;
1258
1259 if (device->bdev)
1260 fs_devices->open_devices--;
1261
1262 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1263 device->devid != BTRFS_DEV_REPLACE_DEVID) {
1264 list_del_init(&device->dev_alloc_list);
1265 fs_devices->rw_devices--;
1266 }
1267
1268 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1269 fs_devices->missing_devices--;
1270
1271 btrfs_close_bdev(device);
1272
1273 new_device = btrfs_alloc_device(NULL, &device->devid,
1274 device->uuid);
1275 BUG_ON(IS_ERR(new_device));
1276
1277
1278 if (device->name) {
1279 name = rcu_string_strdup(device->name->str, GFP_NOFS);
1280 BUG_ON(!name);
1281 rcu_assign_pointer(new_device->name, name);
1282 }
1283
1284 list_replace_rcu(&device->dev_list, &new_device->dev_list);
1285 new_device->fs_devices = device->fs_devices;
1286
1287 synchronize_rcu();
1288 btrfs_free_device(device);
1289}
1290
1291static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
1292{
1293 struct btrfs_device *device, *tmp;
1294
1295 if (--fs_devices->opened > 0)
1296 return 0;
1297
1298 mutex_lock(&fs_devices->device_list_mutex);
1299 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
1300 btrfs_close_one_device(device);
1301 }
1302 mutex_unlock(&fs_devices->device_list_mutex);
1303
1304 WARN_ON(fs_devices->open_devices);
1305 WARN_ON(fs_devices->rw_devices);
1306 fs_devices->opened = 0;
1307 fs_devices->seeding = 0;
1308
1309 return 0;
1310}
1311
1312int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1313{
1314 struct btrfs_fs_devices *seed_devices = NULL;
1315 int ret;
1316
1317 mutex_lock(&uuid_mutex);
1318 ret = close_fs_devices(fs_devices);
1319 if (!fs_devices->opened) {
1320 seed_devices = fs_devices->seed;
1321 fs_devices->seed = NULL;
1322 }
1323 mutex_unlock(&uuid_mutex);
1324
1325 while (seed_devices) {
1326 fs_devices = seed_devices;
1327 seed_devices = fs_devices->seed;
1328 close_fs_devices(fs_devices);
1329 free_fs_devices(fs_devices);
1330 }
1331 return ret;
1332}
1333
1334static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
1335 fmode_t flags, void *holder)
1336{
1337 struct btrfs_device *device;
1338 struct btrfs_device *latest_dev = NULL;
1339 int ret = 0;
1340
1341 flags |= FMODE_EXCL;
1342
1343 list_for_each_entry(device, &fs_devices->devices, dev_list) {
1344
1345 if (btrfs_open_one_device(fs_devices, device, flags, holder))
1346 continue;
1347
1348 if (!latest_dev ||
1349 device->generation > latest_dev->generation)
1350 latest_dev = device;
1351 }
1352 if (fs_devices->open_devices == 0) {
1353 ret = -EINVAL;
1354 goto out;
1355 }
1356 fs_devices->opened = 1;
1357 fs_devices->latest_bdev = latest_dev->bdev;
1358 fs_devices->total_rw_bytes = 0;
1359out:
1360 return ret;
1361}
1362
1363static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
1364{
1365 struct btrfs_device *dev1, *dev2;
1366
1367 dev1 = list_entry(a, struct btrfs_device, dev_list);
1368 dev2 = list_entry(b, struct btrfs_device, dev_list);
1369
1370 if (dev1->devid < dev2->devid)
1371 return -1;
1372 else if (dev1->devid > dev2->devid)
1373 return 1;
1374 return 0;
1375}
1376
1377int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1378 fmode_t flags, void *holder)
1379{
1380 int ret;
1381
1382 lockdep_assert_held(&uuid_mutex);
1383
1384 mutex_lock(&fs_devices->device_list_mutex);
1385 if (fs_devices->opened) {
1386 fs_devices->opened++;
1387 ret = 0;
1388 } else {
1389 list_sort(NULL, &fs_devices->devices, devid_cmp);
1390 ret = open_fs_devices(fs_devices, flags, holder);
1391 }
1392 mutex_unlock(&fs_devices->device_list_mutex);
1393
1394 return ret;
1395}
1396
1397static void btrfs_release_disk_super(struct page *page)
1398{
1399 kunmap(page);
1400 put_page(page);
1401}
1402
1403static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
1404 struct page **page,
1405 struct btrfs_super_block **disk_super)
1406{
1407 void *p;
1408 pgoff_t index;
1409
1410
1411 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1412 return 1;
1413
1414
1415 if (sizeof(**disk_super) > PAGE_SIZE)
1416 return 1;
1417
1418
1419 index = bytenr >> PAGE_SHIFT;
1420 if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
1421 return 1;
1422
1423
1424 *page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
1425 index, GFP_KERNEL);
1426
1427 if (IS_ERR_OR_NULL(*page))
1428 return 1;
1429
1430 p = kmap(*page);
1431
1432
1433 *disk_super = p + offset_in_page(bytenr);
1434
1435 if (btrfs_super_bytenr(*disk_super) != bytenr ||
1436 btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
1437 btrfs_release_disk_super(*page);
1438 return 1;
1439 }
1440
1441 if ((*disk_super)->label[0] &&
1442 (*disk_super)->label[BTRFS_LABEL_SIZE - 1])
1443 (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
1444
1445 return 0;
1446}
1447
1448int btrfs_forget_devices(const char *path)
1449{
1450 int ret;
1451
1452 mutex_lock(&uuid_mutex);
1453 ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
1454 mutex_unlock(&uuid_mutex);
1455
1456 return ret;
1457}
1458
1459
1460
1461
1462
1463
1464struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
1465 void *holder)
1466{
1467 struct btrfs_super_block *disk_super;
1468 bool new_device_added = false;
1469 struct btrfs_device *device = NULL;
1470 struct block_device *bdev;
1471 struct page *page;
1472 u64 bytenr;
1473
1474 lockdep_assert_held(&uuid_mutex);
1475
1476
1477
1478
1479
1480
1481
1482 bytenr = btrfs_sb_offset(0);
1483 flags |= FMODE_EXCL;
1484
1485 bdev = blkdev_get_by_path(path, flags, holder);
1486 if (IS_ERR(bdev))
1487 return ERR_CAST(bdev);
1488
1489 if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
1490 device = ERR_PTR(-EINVAL);
1491 goto error_bdev_put;
1492 }
1493
1494 device = device_list_add(path, disk_super, &new_device_added);
1495 if (!IS_ERR(device)) {
1496 if (new_device_added)
1497 btrfs_free_stale_devices(path, device);
1498 }
1499
1500 btrfs_release_disk_super(page);
1501
1502error_bdev_put:
1503 blkdev_put(bdev, flags);
1504
1505 return device;
1506}
1507
1508
1509
1510
1511
1512static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
1513 u64 len)
1514{
1515 u64 physical_start, physical_end;
1516
1517 lockdep_assert_held(&device->fs_info->chunk_mutex);
1518
1519 if (!find_first_extent_bit(&device->alloc_state, *start,
1520 &physical_start, &physical_end,
1521 CHUNK_ALLOCATED, NULL)) {
1522
1523 if (in_range(physical_start, *start, len) ||
1524 in_range(*start, physical_start,
1525 physical_end - physical_start)) {
1526 *start = physical_end + 1;
1527 return true;
1528 }
1529 }
1530 return false;
1531}
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555int find_free_dev_extent_start(struct btrfs_device *device, u64 num_bytes,
1556 u64 search_start, u64 *start, u64 *len)
1557{
1558 struct btrfs_fs_info *fs_info = device->fs_info;
1559 struct btrfs_root *root = fs_info->dev_root;
1560 struct btrfs_key key;
1561 struct btrfs_dev_extent *dev_extent;
1562 struct btrfs_path *path;
1563 u64 hole_size;
1564 u64 max_hole_start;
1565 u64 max_hole_size;
1566 u64 extent_end;
1567 u64 search_end = device->total_bytes;
1568 int ret;
1569 int slot;
1570 struct extent_buffer *l;
1571
1572
1573
1574
1575
1576
1577 search_start = max_t(u64, search_start, SZ_1M);
1578
1579 path = btrfs_alloc_path();
1580 if (!path)
1581 return -ENOMEM;
1582
1583 max_hole_start = search_start;
1584 max_hole_size = 0;
1585
1586again:
1587 if (search_start >= search_end ||
1588 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1589 ret = -ENOSPC;
1590 goto out;
1591 }
1592
1593 path->reada = READA_FORWARD;
1594 path->search_commit_root = 1;
1595 path->skip_locking = 1;
1596
1597 key.objectid = device->devid;
1598 key.offset = search_start;
1599 key.type = BTRFS_DEV_EXTENT_KEY;
1600
1601 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1602 if (ret < 0)
1603 goto out;
1604 if (ret > 0) {
1605 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1606 if (ret < 0)
1607 goto out;
1608 }
1609
1610 while (1) {
1611 l = path->nodes[0];
1612 slot = path->slots[0];
1613 if (slot >= btrfs_header_nritems(l)) {
1614 ret = btrfs_next_leaf(root, path);
1615 if (ret == 0)
1616 continue;
1617 if (ret < 0)
1618 goto out;
1619
1620 break;
1621 }
1622 btrfs_item_key_to_cpu(l, &key, slot);
1623
1624 if (key.objectid < device->devid)
1625 goto next;
1626
1627 if (key.objectid > device->devid)
1628 break;
1629
1630 if (key.type != BTRFS_DEV_EXTENT_KEY)
1631 goto next;
1632
1633 if (key.offset > search_start) {
1634 hole_size = key.offset - search_start;
1635
1636
1637
1638
1639
1640 if (contains_pending_extent(device, &search_start,
1641 hole_size)) {
1642 if (key.offset >= search_start)
1643 hole_size = key.offset - search_start;
1644 else
1645 hole_size = 0;
1646 }
1647
1648 if (hole_size > max_hole_size) {
1649 max_hole_start = search_start;
1650 max_hole_size = hole_size;
1651 }
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662 if (hole_size >= num_bytes) {
1663 ret = 0;
1664 goto out;
1665 }
1666 }
1667
1668 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1669 extent_end = key.offset + btrfs_dev_extent_length(l,
1670 dev_extent);
1671 if (extent_end > search_start)
1672 search_start = extent_end;
1673next:
1674 path->slots[0]++;
1675 cond_resched();
1676 }
1677
1678
1679
1680
1681
1682
1683 if (search_end > search_start) {
1684 hole_size = search_end - search_start;
1685
1686 if (contains_pending_extent(device, &search_start, hole_size)) {
1687 btrfs_release_path(path);
1688 goto again;
1689 }
1690
1691 if (hole_size > max_hole_size) {
1692 max_hole_start = search_start;
1693 max_hole_size = hole_size;
1694 }
1695 }
1696
1697
1698 if (max_hole_size < num_bytes)
1699 ret = -ENOSPC;
1700 else
1701 ret = 0;
1702
1703out:
1704 btrfs_free_path(path);
1705 *start = max_hole_start;
1706 if (len)
1707 *len = max_hole_size;
1708 return ret;
1709}
1710
1711int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
1712 u64 *start, u64 *len)
1713{
1714
1715 return find_free_dev_extent_start(device, num_bytes, 0, start, len);
1716}
1717
1718static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1719 struct btrfs_device *device,
1720 u64 start, u64 *dev_extent_len)
1721{
1722 struct btrfs_fs_info *fs_info = device->fs_info;
1723 struct btrfs_root *root = fs_info->dev_root;
1724 int ret;
1725 struct btrfs_path *path;
1726 struct btrfs_key key;
1727 struct btrfs_key found_key;
1728 struct extent_buffer *leaf = NULL;
1729 struct btrfs_dev_extent *extent = NULL;
1730
1731 path = btrfs_alloc_path();
1732 if (!path)
1733 return -ENOMEM;
1734
1735 key.objectid = device->devid;
1736 key.offset = start;
1737 key.type = BTRFS_DEV_EXTENT_KEY;
1738again:
1739 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1740 if (ret > 0) {
1741 ret = btrfs_previous_item(root, path, key.objectid,
1742 BTRFS_DEV_EXTENT_KEY);
1743 if (ret)
1744 goto out;
1745 leaf = path->nodes[0];
1746 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1747 extent = btrfs_item_ptr(leaf, path->slots[0],
1748 struct btrfs_dev_extent);
1749 BUG_ON(found_key.offset > start || found_key.offset +
1750 btrfs_dev_extent_length(leaf, extent) < start);
1751 key = found_key;
1752 btrfs_release_path(path);
1753 goto again;
1754 } else if (ret == 0) {
1755 leaf = path->nodes[0];
1756 extent = btrfs_item_ptr(leaf, path->slots[0],
1757 struct btrfs_dev_extent);
1758 } else {
1759 btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
1760 goto out;
1761 }
1762
1763 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1764
1765 ret = btrfs_del_item(trans, root, path);
1766 if (ret) {
1767 btrfs_handle_fs_error(fs_info, ret,
1768 "Failed to remove dev extent item");
1769 } else {
1770 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1771 }
1772out:
1773 btrfs_free_path(path);
1774 return ret;
1775}
1776
1777static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1778 struct btrfs_device *device,
1779 u64 chunk_offset, u64 start, u64 num_bytes)
1780{
1781 int ret;
1782 struct btrfs_path *path;
1783 struct btrfs_fs_info *fs_info = device->fs_info;
1784 struct btrfs_root *root = fs_info->dev_root;
1785 struct btrfs_dev_extent *extent;
1786 struct extent_buffer *leaf;
1787 struct btrfs_key key;
1788
1789 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1790 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1791 path = btrfs_alloc_path();
1792 if (!path)
1793 return -ENOMEM;
1794
1795 key.objectid = device->devid;
1796 key.offset = start;
1797 key.type = BTRFS_DEV_EXTENT_KEY;
1798 ret = btrfs_insert_empty_item(trans, root, path, &key,
1799 sizeof(*extent));
1800 if (ret)
1801 goto out;
1802
1803 leaf = path->nodes[0];
1804 extent = btrfs_item_ptr(leaf, path->slots[0],
1805 struct btrfs_dev_extent);
1806 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1807 BTRFS_CHUNK_TREE_OBJECTID);
1808 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1809 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1810 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1811
1812 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1813 btrfs_mark_buffer_dirty(leaf);
1814out:
1815 btrfs_free_path(path);
1816 return ret;
1817}
1818
1819static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1820{
1821 struct extent_map_tree *em_tree;
1822 struct extent_map *em;
1823 struct rb_node *n;
1824 u64 ret = 0;
1825
1826 em_tree = &fs_info->mapping_tree;
1827 read_lock(&em_tree->lock);
1828 n = rb_last(&em_tree->map.rb_root);
1829 if (n) {
1830 em = rb_entry(n, struct extent_map, rb_node);
1831 ret = em->start + em->len;
1832 }
1833 read_unlock(&em_tree->lock);
1834
1835 return ret;
1836}
1837
1838static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1839 u64 *devid_ret)
1840{
1841 int ret;
1842 struct btrfs_key key;
1843 struct btrfs_key found_key;
1844 struct btrfs_path *path;
1845
1846 path = btrfs_alloc_path();
1847 if (!path)
1848 return -ENOMEM;
1849
1850 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1851 key.type = BTRFS_DEV_ITEM_KEY;
1852 key.offset = (u64)-1;
1853
1854 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1855 if (ret < 0)
1856 goto error;
1857
1858 BUG_ON(ret == 0);
1859
1860 ret = btrfs_previous_item(fs_info->chunk_root, path,
1861 BTRFS_DEV_ITEMS_OBJECTID,
1862 BTRFS_DEV_ITEM_KEY);
1863 if (ret) {
1864 *devid_ret = 1;
1865 } else {
1866 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1867 path->slots[0]);
1868 *devid_ret = found_key.offset + 1;
1869 }
1870 ret = 0;
1871error:
1872 btrfs_free_path(path);
1873 return ret;
1874}
1875
1876
1877
1878
1879
1880static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1881 struct btrfs_device *device)
1882{
1883 int ret;
1884 struct btrfs_path *path;
1885 struct btrfs_dev_item *dev_item;
1886 struct extent_buffer *leaf;
1887 struct btrfs_key key;
1888 unsigned long ptr;
1889
1890 path = btrfs_alloc_path();
1891 if (!path)
1892 return -ENOMEM;
1893
1894 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1895 key.type = BTRFS_DEV_ITEM_KEY;
1896 key.offset = device->devid;
1897
1898 ret = btrfs_insert_empty_item(trans, trans->fs_info->chunk_root, path,
1899 &key, sizeof(*dev_item));
1900 if (ret)
1901 goto out;
1902
1903 leaf = path->nodes[0];
1904 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1905
1906 btrfs_set_device_id(leaf, dev_item, device->devid);
1907 btrfs_set_device_generation(leaf, dev_item, 0);
1908 btrfs_set_device_type(leaf, dev_item, device->type);
1909 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1910 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1911 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1912 btrfs_set_device_total_bytes(leaf, dev_item,
1913 btrfs_device_get_disk_total_bytes(device));
1914 btrfs_set_device_bytes_used(leaf, dev_item,
1915 btrfs_device_get_bytes_used(device));
1916 btrfs_set_device_group(leaf, dev_item, 0);
1917 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1918 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1919 btrfs_set_device_start_offset(leaf, dev_item, 0);
1920
1921 ptr = btrfs_device_uuid(dev_item);
1922 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1923 ptr = btrfs_device_fsid(dev_item);
1924 write_extent_buffer(leaf, trans->fs_info->fs_devices->metadata_uuid,
1925 ptr, BTRFS_FSID_SIZE);
1926 btrfs_mark_buffer_dirty(leaf);
1927
1928 ret = 0;
1929out:
1930 btrfs_free_path(path);
1931 return ret;
1932}
1933
1934
1935
1936
1937
1938static void update_dev_time(const char *path_name)
1939{
1940 struct file *filp;
1941
1942 filp = filp_open(path_name, O_RDWR, 0);
1943 if (IS_ERR(filp))
1944 return;
1945 file_update_time(filp);
1946 filp_close(filp, NULL);
1947}
1948
1949static int btrfs_rm_dev_item(struct btrfs_device *device)
1950{
1951 struct btrfs_root *root = device->fs_info->chunk_root;
1952 int ret;
1953 struct btrfs_path *path;
1954 struct btrfs_key key;
1955 struct btrfs_trans_handle *trans;
1956
1957 path = btrfs_alloc_path();
1958 if (!path)
1959 return -ENOMEM;
1960
1961 trans = btrfs_start_transaction(root, 0);
1962 if (IS_ERR(trans)) {
1963 btrfs_free_path(path);
1964 return PTR_ERR(trans);
1965 }
1966 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1967 key.type = BTRFS_DEV_ITEM_KEY;
1968 key.offset = device->devid;
1969
1970 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1971 if (ret) {
1972 if (ret > 0)
1973 ret = -ENOENT;
1974 btrfs_abort_transaction(trans, ret);
1975 btrfs_end_transaction(trans);
1976 goto out;
1977 }
1978
1979 ret = btrfs_del_item(trans, root, path);
1980 if (ret) {
1981 btrfs_abort_transaction(trans, ret);
1982 btrfs_end_transaction(trans);
1983 }
1984
1985out:
1986 btrfs_free_path(path);
1987 if (!ret)
1988 ret = btrfs_commit_transaction(trans);
1989 return ret;
1990}
1991
1992
1993
1994
1995
1996
1997static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
1998 u64 num_devices)
1999{
2000 u64 all_avail;
2001 unsigned seq;
2002 int i;
2003
2004 do {
2005 seq = read_seqbegin(&fs_info->profiles_lock);
2006
2007 all_avail = fs_info->avail_data_alloc_bits |
2008 fs_info->avail_system_alloc_bits |
2009 fs_info->avail_metadata_alloc_bits;
2010 } while (read_seqretry(&fs_info->profiles_lock, seq));
2011
2012 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2013 if (!(all_avail & btrfs_raid_array[i].bg_flag))
2014 continue;
2015
2016 if (num_devices < btrfs_raid_array[i].devs_min) {
2017 int ret = btrfs_raid_array[i].mindev_error;
2018
2019 if (ret)
2020 return ret;
2021 }
2022 }
2023
2024 return 0;
2025}
2026
2027static struct btrfs_device * btrfs_find_next_active_device(
2028 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
2029{
2030 struct btrfs_device *next_device;
2031
2032 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
2033 if (next_device != device &&
2034 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
2035 && next_device->bdev)
2036 return next_device;
2037 }
2038
2039 return NULL;
2040}
2041
2042
2043
2044
2045
2046
2047
2048void btrfs_assign_next_active_device(struct btrfs_device *device,
2049 struct btrfs_device *this_dev)
2050{
2051 struct btrfs_fs_info *fs_info = device->fs_info;
2052 struct btrfs_device *next_device;
2053
2054 if (this_dev)
2055 next_device = this_dev;
2056 else
2057 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
2058 device);
2059 ASSERT(next_device);
2060
2061 if (fs_info->sb->s_bdev &&
2062 (fs_info->sb->s_bdev == device->bdev))
2063 fs_info->sb->s_bdev = next_device->bdev;
2064
2065 if (fs_info->fs_devices->latest_bdev == device->bdev)
2066 fs_info->fs_devices->latest_bdev = next_device->bdev;
2067}
2068
2069
2070
2071
2072
2073static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
2074{
2075 u64 num_devices = fs_info->fs_devices->num_devices;
2076
2077 down_read(&fs_info->dev_replace.rwsem);
2078 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
2079 ASSERT(num_devices > 1);
2080 num_devices--;
2081 }
2082 up_read(&fs_info->dev_replace.rwsem);
2083
2084 return num_devices;
2085}
2086
2087int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
2088 u64 devid)
2089{
2090 struct btrfs_device *device;
2091 struct btrfs_fs_devices *cur_devices;
2092 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2093 u64 num_devices;
2094 int ret = 0;
2095
2096 mutex_lock(&uuid_mutex);
2097
2098 num_devices = btrfs_num_devices(fs_info);
2099
2100 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
2101 if (ret)
2102 goto out;
2103
2104 device = btrfs_find_device_by_devspec(fs_info, devid, device_path);
2105
2106 if (IS_ERR(device)) {
2107 if (PTR_ERR(device) == -ENOENT &&
2108 strcmp(device_path, "missing") == 0)
2109 ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2110 else
2111 ret = PTR_ERR(device);
2112 goto out;
2113 }
2114
2115 if (btrfs_pinned_by_swapfile(fs_info, device)) {
2116 btrfs_warn_in_rcu(fs_info,
2117 "cannot remove device %s (devid %llu) due to active swapfile",
2118 rcu_str_deref(device->name), device->devid);
2119 ret = -ETXTBSY;
2120 goto out;
2121 }
2122
2123 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2124 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
2125 goto out;
2126 }
2127
2128 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
2129 fs_info->fs_devices->rw_devices == 1) {
2130 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
2131 goto out;
2132 }
2133
2134 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2135 mutex_lock(&fs_info->chunk_mutex);
2136 list_del_init(&device->dev_alloc_list);
2137 device->fs_devices->rw_devices--;
2138 mutex_unlock(&fs_info->chunk_mutex);
2139 }
2140
2141 mutex_unlock(&uuid_mutex);
2142 ret = btrfs_shrink_device(device, 0);
2143 mutex_lock(&uuid_mutex);
2144 if (ret)
2145 goto error_undo;
2146
2147
2148
2149
2150
2151
2152 ret = btrfs_rm_dev_item(device);
2153 if (ret)
2154 goto error_undo;
2155
2156 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2157 btrfs_scrub_cancel_dev(device);
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174 cur_devices = device->fs_devices;
2175 mutex_lock(&fs_devices->device_list_mutex);
2176 list_del_rcu(&device->dev_list);
2177
2178 cur_devices->num_devices--;
2179 cur_devices->total_devices--;
2180
2181 if (cur_devices != fs_devices)
2182 fs_devices->total_devices--;
2183
2184 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
2185 cur_devices->missing_devices--;
2186
2187 btrfs_assign_next_active_device(device, NULL);
2188
2189 if (device->bdev) {
2190 cur_devices->open_devices--;
2191
2192 btrfs_sysfs_rm_device_link(fs_devices, device);
2193 }
2194
2195 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2196 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2197 mutex_unlock(&fs_devices->device_list_mutex);
2198
2199
2200
2201
2202
2203
2204 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2205 btrfs_scratch_superblocks(device->bdev, device->name->str);
2206
2207 btrfs_close_bdev(device);
2208 synchronize_rcu();
2209 btrfs_free_device(device);
2210
2211 if (cur_devices->open_devices == 0) {
2212 while (fs_devices) {
2213 if (fs_devices->seed == cur_devices) {
2214 fs_devices->seed = cur_devices->seed;
2215 break;
2216 }
2217 fs_devices = fs_devices->seed;
2218 }
2219 cur_devices->seed = NULL;
2220 close_fs_devices(cur_devices);
2221 free_fs_devices(cur_devices);
2222 }
2223
2224out:
2225 mutex_unlock(&uuid_mutex);
2226 return ret;
2227
2228error_undo:
2229 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2230 mutex_lock(&fs_info->chunk_mutex);
2231 list_add(&device->dev_alloc_list,
2232 &fs_devices->alloc_list);
2233 device->fs_devices->rw_devices++;
2234 mutex_unlock(&fs_info->chunk_mutex);
2235 }
2236 goto out;
2237}
2238
2239void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev)
2240{
2241 struct btrfs_fs_devices *fs_devices;
2242
2243 lockdep_assert_held(&srcdev->fs_info->fs_devices->device_list_mutex);
2244
2245
2246
2247
2248
2249
2250
2251 fs_devices = srcdev->fs_devices;
2252
2253 list_del_rcu(&srcdev->dev_list);
2254 list_del(&srcdev->dev_alloc_list);
2255 fs_devices->num_devices--;
2256 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2257 fs_devices->missing_devices--;
2258
2259 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2260 fs_devices->rw_devices--;
2261
2262 if (srcdev->bdev)
2263 fs_devices->open_devices--;
2264}
2265
2266void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
2267{
2268 struct btrfs_fs_info *fs_info = srcdev->fs_info;
2269 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2270
2271 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
2272
2273 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
2274 }
2275
2276 btrfs_close_bdev(srcdev);
2277 synchronize_rcu();
2278 btrfs_free_device(srcdev);
2279
2280
2281 if (!fs_devices->num_devices) {
2282 struct btrfs_fs_devices *tmp_fs_devices;
2283
2284
2285
2286
2287
2288
2289
2290 ASSERT(fs_devices->seeding);
2291
2292 tmp_fs_devices = fs_info->fs_devices;
2293 while (tmp_fs_devices) {
2294 if (tmp_fs_devices->seed == fs_devices) {
2295 tmp_fs_devices->seed = fs_devices->seed;
2296 break;
2297 }
2298 tmp_fs_devices = tmp_fs_devices->seed;
2299 }
2300 fs_devices->seed = NULL;
2301 close_fs_devices(fs_devices);
2302 free_fs_devices(fs_devices);
2303 }
2304}
2305
2306void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
2307{
2308 struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
2309
2310 WARN_ON(!tgtdev);
2311 mutex_lock(&fs_devices->device_list_mutex);
2312
2313 btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
2314
2315 if (tgtdev->bdev)
2316 fs_devices->open_devices--;
2317
2318 fs_devices->num_devices--;
2319
2320 btrfs_assign_next_active_device(tgtdev, NULL);
2321
2322 list_del_rcu(&tgtdev->dev_list);
2323
2324 mutex_unlock(&fs_devices->device_list_mutex);
2325
2326
2327
2328
2329
2330
2331
2332
2333 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
2334
2335 btrfs_close_bdev(tgtdev);
2336 synchronize_rcu();
2337 btrfs_free_device(tgtdev);
2338}
2339
2340static struct btrfs_device *btrfs_find_device_by_path(
2341 struct btrfs_fs_info *fs_info, const char *device_path)
2342{
2343 int ret = 0;
2344 struct btrfs_super_block *disk_super;
2345 u64 devid;
2346 u8 *dev_uuid;
2347 struct block_device *bdev;
2348 struct buffer_head *bh;
2349 struct btrfs_device *device;
2350
2351 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2352 fs_info->bdev_holder, 0, &bdev, &bh);
2353 if (ret)
2354 return ERR_PTR(ret);
2355 disk_super = (struct btrfs_super_block *)bh->b_data;
2356 devid = btrfs_stack_device_id(&disk_super->dev_item);
2357 dev_uuid = disk_super->dev_item.uuid;
2358 if (btrfs_fs_incompat(fs_info, METADATA_UUID))
2359 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2360 disk_super->metadata_uuid, true);
2361 else
2362 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2363 disk_super->fsid, true);
2364
2365 brelse(bh);
2366 if (!device)
2367 device = ERR_PTR(-ENOENT);
2368 blkdev_put(bdev, FMODE_READ);
2369 return device;
2370}
2371
2372
2373
2374
2375struct btrfs_device *btrfs_find_device_by_devspec(
2376 struct btrfs_fs_info *fs_info, u64 devid,
2377 const char *device_path)
2378{
2379 struct btrfs_device *device;
2380
2381 if (devid) {
2382 device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
2383 NULL, true);
2384 if (!device)
2385 return ERR_PTR(-ENOENT);
2386 return device;
2387 }
2388
2389 if (!device_path || !device_path[0])
2390 return ERR_PTR(-EINVAL);
2391
2392 if (strcmp(device_path, "missing") == 0) {
2393
2394 list_for_each_entry(device, &fs_info->fs_devices->devices,
2395 dev_list) {
2396 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2397 &device->dev_state) && !device->bdev)
2398 return device;
2399 }
2400 return ERR_PTR(-ENOENT);
2401 }
2402
2403 return btrfs_find_device_by_path(fs_info, device_path);
2404}
2405
2406
2407
2408
2409static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2410{
2411 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2412 struct btrfs_fs_devices *old_devices;
2413 struct btrfs_fs_devices *seed_devices;
2414 struct btrfs_super_block *disk_super = fs_info->super_copy;
2415 struct btrfs_device *device;
2416 u64 super_flags;
2417
2418 lockdep_assert_held(&uuid_mutex);
2419 if (!fs_devices->seeding)
2420 return -EINVAL;
2421
2422 seed_devices = alloc_fs_devices(NULL, NULL);
2423 if (IS_ERR(seed_devices))
2424 return PTR_ERR(seed_devices);
2425
2426 old_devices = clone_fs_devices(fs_devices);
2427 if (IS_ERR(old_devices)) {
2428 kfree(seed_devices);
2429 return PTR_ERR(old_devices);
2430 }
2431
2432 list_add(&old_devices->fs_list, &fs_uuids);
2433
2434 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2435 seed_devices->opened = 1;
2436 INIT_LIST_HEAD(&seed_devices->devices);
2437 INIT_LIST_HEAD(&seed_devices->alloc_list);
2438 mutex_init(&seed_devices->device_list_mutex);
2439
2440 mutex_lock(&fs_devices->device_list_mutex);
2441 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2442 synchronize_rcu);
2443 list_for_each_entry(device, &seed_devices->devices, dev_list)
2444 device->fs_devices = seed_devices;
2445
2446 mutex_lock(&fs_info->chunk_mutex);
2447 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
2448 mutex_unlock(&fs_info->chunk_mutex);
2449
2450 fs_devices->seeding = 0;
2451 fs_devices->num_devices = 0;
2452 fs_devices->open_devices = 0;
2453 fs_devices->missing_devices = 0;
2454 fs_devices->rotating = 0;
2455 fs_devices->seed = seed_devices;
2456
2457 generate_random_uuid(fs_devices->fsid);
2458 memcpy(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE);
2459 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2460 mutex_unlock(&fs_devices->device_list_mutex);
2461
2462 super_flags = btrfs_super_flags(disk_super) &
2463 ~BTRFS_SUPER_FLAG_SEEDING;
2464 btrfs_set_super_flags(disk_super, super_flags);
2465
2466 return 0;
2467}
2468
2469
2470
2471
2472static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
2473{
2474 struct btrfs_fs_info *fs_info = trans->fs_info;
2475 struct btrfs_root *root = fs_info->chunk_root;
2476 struct btrfs_path *path;
2477 struct extent_buffer *leaf;
2478 struct btrfs_dev_item *dev_item;
2479 struct btrfs_device *device;
2480 struct btrfs_key key;
2481 u8 fs_uuid[BTRFS_FSID_SIZE];
2482 u8 dev_uuid[BTRFS_UUID_SIZE];
2483 u64 devid;
2484 int ret;
2485
2486 path = btrfs_alloc_path();
2487 if (!path)
2488 return -ENOMEM;
2489
2490 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2491 key.offset = 0;
2492 key.type = BTRFS_DEV_ITEM_KEY;
2493
2494 while (1) {
2495 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2496 if (ret < 0)
2497 goto error;
2498
2499 leaf = path->nodes[0];
2500next_slot:
2501 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2502 ret = btrfs_next_leaf(root, path);
2503 if (ret > 0)
2504 break;
2505 if (ret < 0)
2506 goto error;
2507 leaf = path->nodes[0];
2508 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2509 btrfs_release_path(path);
2510 continue;
2511 }
2512
2513 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2514 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2515 key.type != BTRFS_DEV_ITEM_KEY)
2516 break;
2517
2518 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2519 struct btrfs_dev_item);
2520 devid = btrfs_device_id(leaf, dev_item);
2521 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2522 BTRFS_UUID_SIZE);
2523 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2524 BTRFS_FSID_SIZE);
2525 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
2526 fs_uuid, true);
2527 BUG_ON(!device);
2528
2529 if (device->fs_devices->seeding) {
2530 btrfs_set_device_generation(leaf, dev_item,
2531 device->generation);
2532 btrfs_mark_buffer_dirty(leaf);
2533 }
2534
2535 path->slots[0]++;
2536 goto next_slot;
2537 }
2538 ret = 0;
2539error:
2540 btrfs_free_path(path);
2541 return ret;
2542}
2543
2544int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2545{
2546 struct btrfs_root *root = fs_info->dev_root;
2547 struct request_queue *q;
2548 struct btrfs_trans_handle *trans;
2549 struct btrfs_device *device;
2550 struct block_device *bdev;
2551 struct super_block *sb = fs_info->sb;
2552 struct rcu_string *name;
2553 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2554 u64 orig_super_total_bytes;
2555 u64 orig_super_num_devices;
2556 int seeding_dev = 0;
2557 int ret = 0;
2558 bool unlocked = false;
2559
2560 if (sb_rdonly(sb) && !fs_devices->seeding)
2561 return -EROFS;
2562
2563 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2564 fs_info->bdev_holder);
2565 if (IS_ERR(bdev))
2566 return PTR_ERR(bdev);
2567
2568 if (fs_devices->seeding) {
2569 seeding_dev = 1;
2570 down_write(&sb->s_umount);
2571 mutex_lock(&uuid_mutex);
2572 }
2573
2574 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2575
2576 mutex_lock(&fs_devices->device_list_mutex);
2577 list_for_each_entry(device, &fs_devices->devices, dev_list) {
2578 if (device->bdev == bdev) {
2579 ret = -EEXIST;
2580 mutex_unlock(
2581 &fs_devices->device_list_mutex);
2582 goto error;
2583 }
2584 }
2585 mutex_unlock(&fs_devices->device_list_mutex);
2586
2587 device = btrfs_alloc_device(fs_info, NULL, NULL);
2588 if (IS_ERR(device)) {
2589
2590 ret = PTR_ERR(device);
2591 goto error;
2592 }
2593
2594 name = rcu_string_strdup(device_path, GFP_KERNEL);
2595 if (!name) {
2596 ret = -ENOMEM;
2597 goto error_free_device;
2598 }
2599 rcu_assign_pointer(device->name, name);
2600
2601 trans = btrfs_start_transaction(root, 0);
2602 if (IS_ERR(trans)) {
2603 ret = PTR_ERR(trans);
2604 goto error_free_device;
2605 }
2606
2607 q = bdev_get_queue(bdev);
2608 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2609 device->generation = trans->transid;
2610 device->io_width = fs_info->sectorsize;
2611 device->io_align = fs_info->sectorsize;
2612 device->sector_size = fs_info->sectorsize;
2613 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2614 fs_info->sectorsize);
2615 device->disk_total_bytes = device->total_bytes;
2616 device->commit_total_bytes = device->total_bytes;
2617 device->fs_info = fs_info;
2618 device->bdev = bdev;
2619 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2620 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2621 device->mode = FMODE_EXCL;
2622 device->dev_stats_valid = 1;
2623 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2624
2625 if (seeding_dev) {
2626 sb->s_flags &= ~SB_RDONLY;
2627 ret = btrfs_prepare_sprout(fs_info);
2628 if (ret) {
2629 btrfs_abort_transaction(trans, ret);
2630 goto error_trans;
2631 }
2632 }
2633
2634 device->fs_devices = fs_devices;
2635
2636 mutex_lock(&fs_devices->device_list_mutex);
2637 mutex_lock(&fs_info->chunk_mutex);
2638 list_add_rcu(&device->dev_list, &fs_devices->devices);
2639 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
2640 fs_devices->num_devices++;
2641 fs_devices->open_devices++;
2642 fs_devices->rw_devices++;
2643 fs_devices->total_devices++;
2644 fs_devices->total_rw_bytes += device->total_bytes;
2645
2646 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2647
2648 if (!blk_queue_nonrot(q))
2649 fs_devices->rotating = 1;
2650
2651 orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
2652 btrfs_set_super_total_bytes(fs_info->super_copy,
2653 round_down(orig_super_total_bytes + device->total_bytes,
2654 fs_info->sectorsize));
2655
2656 orig_super_num_devices = btrfs_super_num_devices(fs_info->super_copy);
2657 btrfs_set_super_num_devices(fs_info->super_copy,
2658 orig_super_num_devices + 1);
2659
2660
2661 btrfs_sysfs_add_device_link(fs_devices, device);
2662
2663
2664
2665
2666
2667 btrfs_clear_space_info_full(fs_info);
2668
2669 mutex_unlock(&fs_info->chunk_mutex);
2670 mutex_unlock(&fs_devices->device_list_mutex);
2671
2672 if (seeding_dev) {
2673 mutex_lock(&fs_info->chunk_mutex);
2674 ret = init_first_rw_device(trans);
2675 mutex_unlock(&fs_info->chunk_mutex);
2676 if (ret) {
2677 btrfs_abort_transaction(trans, ret);
2678 goto error_sysfs;
2679 }
2680 }
2681
2682 ret = btrfs_add_dev_item(trans, device);
2683 if (ret) {
2684 btrfs_abort_transaction(trans, ret);
2685 goto error_sysfs;
2686 }
2687
2688 if (seeding_dev) {
2689 char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
2690
2691 ret = btrfs_finish_sprout(trans);
2692 if (ret) {
2693 btrfs_abort_transaction(trans, ret);
2694 goto error_sysfs;
2695 }
2696
2697
2698
2699
2700 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
2701 fs_info->fs_devices->fsid);
2702 if (kobject_rename(&fs_devices->fsid_kobj, fsid_buf))
2703 btrfs_warn(fs_info,
2704 "sysfs: failed to create fsid for sprout");
2705 }
2706
2707 ret = btrfs_commit_transaction(trans);
2708
2709 if (seeding_dev) {
2710 mutex_unlock(&uuid_mutex);
2711 up_write(&sb->s_umount);
2712 unlocked = true;
2713
2714 if (ret)
2715 return ret;
2716
2717 ret = btrfs_relocate_sys_chunks(fs_info);
2718 if (ret < 0)
2719 btrfs_handle_fs_error(fs_info, ret,
2720 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2721 trans = btrfs_attach_transaction(root);
2722 if (IS_ERR(trans)) {
2723 if (PTR_ERR(trans) == -ENOENT)
2724 return 0;
2725 ret = PTR_ERR(trans);
2726 trans = NULL;
2727 goto error_sysfs;
2728 }
2729 ret = btrfs_commit_transaction(trans);
2730 }
2731
2732
2733 update_dev_time(device_path);
2734 return ret;
2735
2736error_sysfs:
2737 btrfs_sysfs_rm_device_link(fs_devices, device);
2738 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2739 mutex_lock(&fs_info->chunk_mutex);
2740 list_del_rcu(&device->dev_list);
2741 list_del(&device->dev_alloc_list);
2742 fs_info->fs_devices->num_devices--;
2743 fs_info->fs_devices->open_devices--;
2744 fs_info->fs_devices->rw_devices--;
2745 fs_info->fs_devices->total_devices--;
2746 fs_info->fs_devices->total_rw_bytes -= device->total_bytes;
2747 atomic64_sub(device->total_bytes, &fs_info->free_chunk_space);
2748 btrfs_set_super_total_bytes(fs_info->super_copy,
2749 orig_super_total_bytes);
2750 btrfs_set_super_num_devices(fs_info->super_copy,
2751 orig_super_num_devices);
2752 mutex_unlock(&fs_info->chunk_mutex);
2753 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2754error_trans:
2755 if (seeding_dev)
2756 sb->s_flags |= SB_RDONLY;
2757 if (trans)
2758 btrfs_end_transaction(trans);
2759error_free_device:
2760 btrfs_free_device(device);
2761error:
2762 blkdev_put(bdev, FMODE_EXCL);
2763 if (seeding_dev && !unlocked) {
2764 mutex_unlock(&uuid_mutex);
2765 up_write(&sb->s_umount);
2766 }
2767 return ret;
2768}
2769
2770static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2771 struct btrfs_device *device)
2772{
2773 int ret;
2774 struct btrfs_path *path;
2775 struct btrfs_root *root = device->fs_info->chunk_root;
2776 struct btrfs_dev_item *dev_item;
2777 struct extent_buffer *leaf;
2778 struct btrfs_key key;
2779
2780 path = btrfs_alloc_path();
2781 if (!path)
2782 return -ENOMEM;
2783
2784 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2785 key.type = BTRFS_DEV_ITEM_KEY;
2786 key.offset = device->devid;
2787
2788 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2789 if (ret < 0)
2790 goto out;
2791
2792 if (ret > 0) {
2793 ret = -ENOENT;
2794 goto out;
2795 }
2796
2797 leaf = path->nodes[0];
2798 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2799
2800 btrfs_set_device_id(leaf, dev_item, device->devid);
2801 btrfs_set_device_type(leaf, dev_item, device->type);
2802 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2803 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2804 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2805 btrfs_set_device_total_bytes(leaf, dev_item,
2806 btrfs_device_get_disk_total_bytes(device));
2807 btrfs_set_device_bytes_used(leaf, dev_item,
2808 btrfs_device_get_bytes_used(device));
2809 btrfs_mark_buffer_dirty(leaf);
2810
2811out:
2812 btrfs_free_path(path);
2813 return ret;
2814}
2815
2816int btrfs_grow_device(struct btrfs_trans_handle *trans,
2817 struct btrfs_device *device, u64 new_size)
2818{
2819 struct btrfs_fs_info *fs_info = device->fs_info;
2820 struct btrfs_super_block *super_copy = fs_info->super_copy;
2821 u64 old_total;
2822 u64 diff;
2823
2824 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2825 return -EACCES;
2826
2827 new_size = round_down(new_size, fs_info->sectorsize);
2828
2829 mutex_lock(&fs_info->chunk_mutex);
2830 old_total = btrfs_super_total_bytes(super_copy);
2831 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2832
2833 if (new_size <= device->total_bytes ||
2834 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2835 mutex_unlock(&fs_info->chunk_mutex);
2836 return -EINVAL;
2837 }
2838
2839 btrfs_set_super_total_bytes(super_copy,
2840 round_down(old_total + diff, fs_info->sectorsize));
2841 device->fs_devices->total_rw_bytes += diff;
2842
2843 btrfs_device_set_total_bytes(device, new_size);
2844 btrfs_device_set_disk_total_bytes(device, new_size);
2845 btrfs_clear_space_info_full(device->fs_info);
2846 if (list_empty(&device->post_commit_list))
2847 list_add_tail(&device->post_commit_list,
2848 &trans->transaction->dev_update_list);
2849 mutex_unlock(&fs_info->chunk_mutex);
2850
2851 return btrfs_update_device(trans, device);
2852}
2853
2854static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2855{
2856 struct btrfs_fs_info *fs_info = trans->fs_info;
2857 struct btrfs_root *root = fs_info->chunk_root;
2858 int ret;
2859 struct btrfs_path *path;
2860 struct btrfs_key key;
2861
2862 path = btrfs_alloc_path();
2863 if (!path)
2864 return -ENOMEM;
2865
2866 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2867 key.offset = chunk_offset;
2868 key.type = BTRFS_CHUNK_ITEM_KEY;
2869
2870 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2871 if (ret < 0)
2872 goto out;
2873 else if (ret > 0) {
2874 btrfs_handle_fs_error(fs_info, -ENOENT,
2875 "Failed lookup while freeing chunk.");
2876 ret = -ENOENT;
2877 goto out;
2878 }
2879
2880 ret = btrfs_del_item(trans, root, path);
2881 if (ret < 0)
2882 btrfs_handle_fs_error(fs_info, ret,
2883 "Failed to delete chunk item.");
2884out:
2885 btrfs_free_path(path);
2886 return ret;
2887}
2888
2889static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2890{
2891 struct btrfs_super_block *super_copy = fs_info->super_copy;
2892 struct btrfs_disk_key *disk_key;
2893 struct btrfs_chunk *chunk;
2894 u8 *ptr;
2895 int ret = 0;
2896 u32 num_stripes;
2897 u32 array_size;
2898 u32 len = 0;
2899 u32 cur;
2900 struct btrfs_key key;
2901
2902 mutex_lock(&fs_info->chunk_mutex);
2903 array_size = btrfs_super_sys_array_size(super_copy);
2904
2905 ptr = super_copy->sys_chunk_array;
2906 cur = 0;
2907
2908 while (cur < array_size) {
2909 disk_key = (struct btrfs_disk_key *)ptr;
2910 btrfs_disk_key_to_cpu(&key, disk_key);
2911
2912 len = sizeof(*disk_key);
2913
2914 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2915 chunk = (struct btrfs_chunk *)(ptr + len);
2916 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2917 len += btrfs_chunk_item_size(num_stripes);
2918 } else {
2919 ret = -EIO;
2920 break;
2921 }
2922 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2923 key.offset == chunk_offset) {
2924 memmove(ptr, ptr + len, array_size - (cur + len));
2925 array_size -= len;
2926 btrfs_set_super_sys_array_size(super_copy, array_size);
2927 } else {
2928 ptr += len;
2929 cur += len;
2930 }
2931 }
2932 mutex_unlock(&fs_info->chunk_mutex);
2933 return ret;
2934}
2935
2936
2937
2938
2939
2940
2941
2942
2943struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
2944 u64 logical, u64 length)
2945{
2946 struct extent_map_tree *em_tree;
2947 struct extent_map *em;
2948
2949 em_tree = &fs_info->mapping_tree;
2950 read_lock(&em_tree->lock);
2951 em = lookup_extent_mapping(em_tree, logical, length);
2952 read_unlock(&em_tree->lock);
2953
2954 if (!em) {
2955 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2956 logical, length);
2957 return ERR_PTR(-EINVAL);
2958 }
2959
2960 if (em->start > logical || em->start + em->len < logical) {
2961 btrfs_crit(fs_info,
2962 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
2963 logical, length, em->start, em->start + em->len);
2964 free_extent_map(em);
2965 return ERR_PTR(-EINVAL);
2966 }
2967
2968
2969 return em;
2970}
2971
2972int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
2973{
2974 struct btrfs_fs_info *fs_info = trans->fs_info;
2975 struct extent_map *em;
2976 struct map_lookup *map;
2977 u64 dev_extent_len = 0;
2978 int i, ret = 0;
2979 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2980
2981 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
2982 if (IS_ERR(em)) {
2983
2984
2985
2986
2987
2988 ASSERT(0);
2989 return PTR_ERR(em);
2990 }
2991 map = em->map_lookup;
2992 mutex_lock(&fs_info->chunk_mutex);
2993 check_system_chunk(trans, map->type);
2994 mutex_unlock(&fs_info->chunk_mutex);
2995
2996
2997
2998
2999
3000
3001 mutex_lock(&fs_devices->device_list_mutex);
3002 for (i = 0; i < map->num_stripes; i++) {
3003 struct btrfs_device *device = map->stripes[i].dev;
3004 ret = btrfs_free_dev_extent(trans, device,
3005 map->stripes[i].physical,
3006 &dev_extent_len);
3007 if (ret) {
3008 mutex_unlock(&fs_devices->device_list_mutex);
3009 btrfs_abort_transaction(trans, ret);
3010 goto out;
3011 }
3012
3013 if (device->bytes_used > 0) {
3014 mutex_lock(&fs_info->chunk_mutex);
3015 btrfs_device_set_bytes_used(device,
3016 device->bytes_used - dev_extent_len);
3017 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
3018 btrfs_clear_space_info_full(fs_info);
3019 mutex_unlock(&fs_info->chunk_mutex);
3020 }
3021
3022 ret = btrfs_update_device(trans, device);
3023 if (ret) {
3024 mutex_unlock(&fs_devices->device_list_mutex);
3025 btrfs_abort_transaction(trans, ret);
3026 goto out;
3027 }
3028 }
3029 mutex_unlock(&fs_devices->device_list_mutex);
3030
3031 ret = btrfs_free_chunk(trans, chunk_offset);
3032 if (ret) {
3033 btrfs_abort_transaction(trans, ret);
3034 goto out;
3035 }
3036
3037 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
3038
3039 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
3040 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
3041 if (ret) {
3042 btrfs_abort_transaction(trans, ret);
3043 goto out;
3044 }
3045 }
3046
3047 ret = btrfs_remove_block_group(trans, chunk_offset, em);
3048 if (ret) {
3049 btrfs_abort_transaction(trans, ret);
3050 goto out;
3051 }
3052
3053out:
3054
3055 free_extent_map(em);
3056 return ret;
3057}
3058
3059static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3060{
3061 struct btrfs_root *root = fs_info->chunk_root;
3062 struct btrfs_trans_handle *trans;
3063 int ret;
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077 lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
3078
3079 ret = btrfs_can_relocate(fs_info, chunk_offset);
3080 if (ret)
3081 return -ENOSPC;
3082
3083
3084 btrfs_scrub_pause(fs_info);
3085 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
3086 btrfs_scrub_continue(fs_info);
3087 if (ret)
3088 return ret;
3089
3090 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3091 chunk_offset);
3092 if (IS_ERR(trans)) {
3093 ret = PTR_ERR(trans);
3094 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3095 return ret;
3096 }
3097
3098
3099
3100
3101
3102 ret = btrfs_remove_chunk(trans, chunk_offset);
3103 btrfs_end_transaction(trans);
3104 return ret;
3105}
3106
3107static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3108{
3109 struct btrfs_root *chunk_root = fs_info->chunk_root;
3110 struct btrfs_path *path;
3111 struct extent_buffer *leaf;
3112 struct btrfs_chunk *chunk;
3113 struct btrfs_key key;
3114 struct btrfs_key found_key;
3115 u64 chunk_type;
3116 bool retried = false;
3117 int failed = 0;
3118 int ret;
3119
3120 path = btrfs_alloc_path();
3121 if (!path)
3122 return -ENOMEM;
3123
3124again:
3125 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3126 key.offset = (u64)-1;
3127 key.type = BTRFS_CHUNK_ITEM_KEY;
3128
3129 while (1) {
3130 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3131 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3132 if (ret < 0) {
3133 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3134 goto error;
3135 }
3136 BUG_ON(ret == 0);
3137
3138 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3139 key.type);
3140 if (ret)
3141 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3142 if (ret < 0)
3143 goto error;
3144 if (ret > 0)
3145 break;
3146
3147 leaf = path->nodes[0];
3148 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3149
3150 chunk = btrfs_item_ptr(leaf, path->slots[0],
3151 struct btrfs_chunk);
3152 chunk_type = btrfs_chunk_type(leaf, chunk);
3153 btrfs_release_path(path);
3154
3155 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3156 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3157 if (ret == -ENOSPC)
3158 failed++;
3159 else
3160 BUG_ON(ret);
3161 }
3162 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3163
3164 if (found_key.offset == 0)
3165 break;
3166 key.offset = found_key.offset - 1;
3167 }
3168 ret = 0;
3169 if (failed && !retried) {
3170 failed = 0;
3171 retried = true;
3172 goto again;
3173 } else if (WARN_ON(failed && retried)) {
3174 ret = -ENOSPC;
3175 }
3176error:
3177 btrfs_free_path(path);
3178 return ret;
3179}
3180
3181
3182
3183
3184
3185
3186static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3187 u64 chunk_offset)
3188{
3189 struct btrfs_block_group_cache *cache;
3190 u64 bytes_used;
3191 u64 chunk_type;
3192
3193 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3194 ASSERT(cache);
3195 chunk_type = cache->flags;
3196 btrfs_put_block_group(cache);
3197
3198 if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
3199 spin_lock(&fs_info->data_sinfo->lock);
3200 bytes_used = fs_info->data_sinfo->bytes_used;
3201 spin_unlock(&fs_info->data_sinfo->lock);
3202
3203 if (!bytes_used) {
3204 struct btrfs_trans_handle *trans;
3205 int ret;
3206
3207 trans = btrfs_join_transaction(fs_info->tree_root);
3208 if (IS_ERR(trans))
3209 return PTR_ERR(trans);
3210
3211 ret = btrfs_force_chunk_alloc(trans,
3212 BTRFS_BLOCK_GROUP_DATA);
3213 btrfs_end_transaction(trans);
3214 if (ret < 0)
3215 return ret;
3216 return 1;
3217 }
3218 }
3219 return 0;
3220}
3221
3222static int insert_balance_item(struct btrfs_fs_info *fs_info,
3223 struct btrfs_balance_control *bctl)
3224{
3225 struct btrfs_root *root = fs_info->tree_root;
3226 struct btrfs_trans_handle *trans;
3227 struct btrfs_balance_item *item;
3228 struct btrfs_disk_balance_args disk_bargs;
3229 struct btrfs_path *path;
3230 struct extent_buffer *leaf;
3231 struct btrfs_key key;
3232 int ret, err;
3233
3234 path = btrfs_alloc_path();
3235 if (!path)
3236 return -ENOMEM;
3237
3238 trans = btrfs_start_transaction(root, 0);
3239 if (IS_ERR(trans)) {
3240 btrfs_free_path(path);
3241 return PTR_ERR(trans);
3242 }
3243
3244 key.objectid = BTRFS_BALANCE_OBJECTID;
3245 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3246 key.offset = 0;
3247
3248 ret = btrfs_insert_empty_item(trans, root, path, &key,
3249 sizeof(*item));
3250 if (ret)
3251 goto out;
3252
3253 leaf = path->nodes[0];
3254 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3255
3256 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3257
3258 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3259 btrfs_set_balance_data(leaf, item, &disk_bargs);
3260 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3261 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3262 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3263 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3264
3265 btrfs_set_balance_flags(leaf, item, bctl->flags);
3266
3267 btrfs_mark_buffer_dirty(leaf);
3268out:
3269 btrfs_free_path(path);
3270 err = btrfs_commit_transaction(trans);
3271 if (err && !ret)
3272 ret = err;
3273 return ret;
3274}
3275
3276static int del_balance_item(struct btrfs_fs_info *fs_info)
3277{
3278 struct btrfs_root *root = fs_info->tree_root;
3279 struct btrfs_trans_handle *trans;
3280 struct btrfs_path *path;
3281 struct btrfs_key key;
3282 int ret, err;
3283
3284 path = btrfs_alloc_path();
3285 if (!path)
3286 return -ENOMEM;
3287
3288 trans = btrfs_start_transaction(root, 0);
3289 if (IS_ERR(trans)) {
3290 btrfs_free_path(path);
3291 return PTR_ERR(trans);
3292 }
3293
3294 key.objectid = BTRFS_BALANCE_OBJECTID;
3295 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3296 key.offset = 0;
3297
3298 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3299 if (ret < 0)
3300 goto out;
3301 if (ret > 0) {
3302 ret = -ENOENT;
3303 goto out;
3304 }
3305
3306 ret = btrfs_del_item(trans, root, path);
3307out:
3308 btrfs_free_path(path);
3309 err = btrfs_commit_transaction(trans);
3310 if (err && !ret)
3311 ret = err;
3312 return ret;
3313}
3314
3315
3316
3317
3318
3319static void update_balance_args(struct btrfs_balance_control *bctl)
3320{
3321
3322
3323
3324 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3325 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3326 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3327 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3328 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3329 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3330
3331
3332
3333
3334
3335
3336
3337
3338 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3339 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3340 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3341 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3342 bctl->data.usage = 90;
3343 }
3344 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3345 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3346 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3347 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3348 bctl->sys.usage = 90;
3349 }
3350 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3351 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3352 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3353 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3354 bctl->meta.usage = 90;
3355 }
3356}
3357
3358
3359
3360
3361static void reset_balance_state(struct btrfs_fs_info *fs_info)
3362{
3363 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3364 int ret;
3365
3366 BUG_ON(!fs_info->balance_ctl);
3367
3368 spin_lock(&fs_info->balance_lock);
3369 fs_info->balance_ctl = NULL;
3370 spin_unlock(&fs_info->balance_lock);
3371
3372 kfree(bctl);
3373 ret = del_balance_item(fs_info);
3374 if (ret)
3375 btrfs_handle_fs_error(fs_info, ret, NULL);
3376}
3377
3378
3379
3380
3381
3382static int chunk_profiles_filter(u64 chunk_type,
3383 struct btrfs_balance_args *bargs)
3384{
3385 chunk_type = chunk_to_extended(chunk_type) &
3386 BTRFS_EXTENDED_PROFILE_MASK;
3387
3388 if (bargs->profiles & chunk_type)
3389 return 0;
3390
3391 return 1;
3392}
3393
3394static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3395 struct btrfs_balance_args *bargs)
3396{
3397 struct btrfs_block_group_cache *cache;
3398 u64 chunk_used;
3399 u64 user_thresh_min;
3400 u64 user_thresh_max;
3401 int ret = 1;
3402
3403 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3404 chunk_used = btrfs_block_group_used(&cache->item);
3405
3406 if (bargs->usage_min == 0)
3407 user_thresh_min = 0;
3408 else
3409 user_thresh_min = div_factor_fine(cache->key.offset,
3410 bargs->usage_min);
3411
3412 if (bargs->usage_max == 0)
3413 user_thresh_max = 1;
3414 else if (bargs->usage_max > 100)
3415 user_thresh_max = cache->key.offset;
3416 else
3417 user_thresh_max = div_factor_fine(cache->key.offset,
3418 bargs->usage_max);
3419
3420 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3421 ret = 0;
3422
3423 btrfs_put_block_group(cache);
3424 return ret;
3425}
3426
3427static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3428 u64 chunk_offset, struct btrfs_balance_args *bargs)
3429{
3430 struct btrfs_block_group_cache *cache;
3431 u64 chunk_used, user_thresh;
3432 int ret = 1;
3433
3434 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3435 chunk_used = btrfs_block_group_used(&cache->item);
3436
3437 if (bargs->usage_min == 0)
3438 user_thresh = 1;
3439 else if (bargs->usage > 100)
3440 user_thresh = cache->key.offset;
3441 else
3442 user_thresh = div_factor_fine(cache->key.offset,
3443 bargs->usage);
3444
3445 if (chunk_used < user_thresh)
3446 ret = 0;
3447
3448 btrfs_put_block_group(cache);
3449 return ret;
3450}
3451
3452static int chunk_devid_filter(struct extent_buffer *leaf,
3453 struct btrfs_chunk *chunk,
3454 struct btrfs_balance_args *bargs)
3455{
3456 struct btrfs_stripe *stripe;
3457 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3458 int i;
3459
3460 for (i = 0; i < num_stripes; i++) {
3461 stripe = btrfs_stripe_nr(chunk, i);
3462 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3463 return 0;
3464 }
3465
3466 return 1;
3467}
3468
3469static u64 calc_data_stripes(u64 type, int num_stripes)
3470{
3471 const int index = btrfs_bg_flags_to_raid_index(type);
3472 const int ncopies = btrfs_raid_array[index].ncopies;
3473 const int nparity = btrfs_raid_array[index].nparity;
3474
3475 if (nparity)
3476 return num_stripes - nparity;
3477 else
3478 return num_stripes / ncopies;
3479}
3480
3481
3482static int chunk_drange_filter(struct extent_buffer *leaf,
3483 struct btrfs_chunk *chunk,
3484 struct btrfs_balance_args *bargs)
3485{
3486 struct btrfs_stripe *stripe;
3487 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3488 u64 stripe_offset;
3489 u64 stripe_length;
3490 u64 type;
3491 int factor;
3492 int i;
3493
3494 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3495 return 0;
3496
3497 type = btrfs_chunk_type(leaf, chunk);
3498 factor = calc_data_stripes(type, num_stripes);
3499
3500 for (i = 0; i < num_stripes; i++) {
3501 stripe = btrfs_stripe_nr(chunk, i);
3502 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3503 continue;
3504
3505 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3506 stripe_length = btrfs_chunk_length(leaf, chunk);
3507 stripe_length = div_u64(stripe_length, factor);
3508
3509 if (stripe_offset < bargs->pend &&
3510 stripe_offset + stripe_length > bargs->pstart)
3511 return 0;
3512 }
3513
3514 return 1;
3515}
3516
3517
3518static int chunk_vrange_filter(struct extent_buffer *leaf,
3519 struct btrfs_chunk *chunk,
3520 u64 chunk_offset,
3521 struct btrfs_balance_args *bargs)
3522{
3523 if (chunk_offset < bargs->vend &&
3524 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3525
3526 return 0;
3527
3528 return 1;
3529}
3530
3531static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3532 struct btrfs_chunk *chunk,
3533 struct btrfs_balance_args *bargs)
3534{
3535 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3536
3537 if (bargs->stripes_min <= num_stripes
3538 && num_stripes <= bargs->stripes_max)
3539 return 0;
3540
3541 return 1;
3542}
3543
3544static int chunk_soft_convert_filter(u64 chunk_type,
3545 struct btrfs_balance_args *bargs)
3546{
3547 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3548 return 0;
3549
3550 chunk_type = chunk_to_extended(chunk_type) &
3551 BTRFS_EXTENDED_PROFILE_MASK;
3552
3553 if (bargs->target == chunk_type)
3554 return 1;
3555
3556 return 0;
3557}
3558
3559static int should_balance_chunk(struct extent_buffer *leaf,
3560 struct btrfs_chunk *chunk, u64 chunk_offset)
3561{
3562 struct btrfs_fs_info *fs_info = leaf->fs_info;
3563 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3564 struct btrfs_balance_args *bargs = NULL;
3565 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3566
3567
3568 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3569 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3570 return 0;
3571 }
3572
3573 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3574 bargs = &bctl->data;
3575 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3576 bargs = &bctl->sys;
3577 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3578 bargs = &bctl->meta;
3579
3580
3581 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3582 chunk_profiles_filter(chunk_type, bargs)) {
3583 return 0;
3584 }
3585
3586
3587 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3588 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3589 return 0;
3590 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3591 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3592 return 0;
3593 }
3594
3595
3596 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3597 chunk_devid_filter(leaf, chunk, bargs)) {
3598 return 0;
3599 }
3600
3601
3602 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3603 chunk_drange_filter(leaf, chunk, bargs)) {
3604 return 0;
3605 }
3606
3607
3608 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3609 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3610 return 0;
3611 }
3612
3613
3614 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3615 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3616 return 0;
3617 }
3618
3619
3620 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3621 chunk_soft_convert_filter(chunk_type, bargs)) {
3622 return 0;
3623 }
3624
3625
3626
3627
3628 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3629 if (bargs->limit == 0)
3630 return 0;
3631 else
3632 bargs->limit--;
3633 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3634
3635
3636
3637
3638
3639 if (bargs->limit_max == 0)
3640 return 0;
3641 else
3642 bargs->limit_max--;
3643 }
3644
3645 return 1;
3646}
3647
3648static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3649{
3650 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3651 struct btrfs_root *chunk_root = fs_info->chunk_root;
3652 u64 chunk_type;
3653 struct btrfs_chunk *chunk;
3654 struct btrfs_path *path = NULL;
3655 struct btrfs_key key;
3656 struct btrfs_key found_key;
3657 struct extent_buffer *leaf;
3658 int slot;
3659 int ret;
3660 int enospc_errors = 0;
3661 bool counting = true;
3662
3663 u64 limit_data = bctl->data.limit;
3664 u64 limit_meta = bctl->meta.limit;
3665 u64 limit_sys = bctl->sys.limit;
3666 u32 count_data = 0;
3667 u32 count_meta = 0;
3668 u32 count_sys = 0;
3669 int chunk_reserved = 0;
3670
3671 path = btrfs_alloc_path();
3672 if (!path) {
3673 ret = -ENOMEM;
3674 goto error;
3675 }
3676
3677
3678 spin_lock(&fs_info->balance_lock);
3679 memset(&bctl->stat, 0, sizeof(bctl->stat));
3680 spin_unlock(&fs_info->balance_lock);
3681again:
3682 if (!counting) {
3683
3684
3685
3686
3687 bctl->data.limit = limit_data;
3688 bctl->meta.limit = limit_meta;
3689 bctl->sys.limit = limit_sys;
3690 }
3691 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3692 key.offset = (u64)-1;
3693 key.type = BTRFS_CHUNK_ITEM_KEY;
3694
3695 while (1) {
3696 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3697 atomic_read(&fs_info->balance_cancel_req)) {
3698 ret = -ECANCELED;
3699 goto error;
3700 }
3701
3702 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3703 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3704 if (ret < 0) {
3705 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3706 goto error;
3707 }
3708
3709
3710
3711
3712
3713 if (ret == 0)
3714 BUG();
3715
3716 ret = btrfs_previous_item(chunk_root, path, 0,
3717 BTRFS_CHUNK_ITEM_KEY);
3718 if (ret) {
3719 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3720 ret = 0;
3721 break;
3722 }
3723
3724 leaf = path->nodes[0];
3725 slot = path->slots[0];
3726 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3727
3728 if (found_key.objectid != key.objectid) {
3729 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3730 break;
3731 }
3732
3733 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3734 chunk_type = btrfs_chunk_type(leaf, chunk);
3735
3736 if (!counting) {
3737 spin_lock(&fs_info->balance_lock);
3738 bctl->stat.considered++;
3739 spin_unlock(&fs_info->balance_lock);
3740 }
3741
3742 ret = should_balance_chunk(leaf, chunk, found_key.offset);
3743
3744 btrfs_release_path(path);
3745 if (!ret) {
3746 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3747 goto loop;
3748 }
3749
3750 if (counting) {
3751 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3752 spin_lock(&fs_info->balance_lock);
3753 bctl->stat.expected++;
3754 spin_unlock(&fs_info->balance_lock);
3755
3756 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3757 count_data++;
3758 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3759 count_sys++;
3760 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3761 count_meta++;
3762
3763 goto loop;
3764 }
3765
3766
3767
3768
3769
3770 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3771 count_data < bctl->data.limit_min)
3772 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3773 count_meta < bctl->meta.limit_min)
3774 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3775 count_sys < bctl->sys.limit_min)) {
3776 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3777 goto loop;
3778 }
3779
3780 if (!chunk_reserved) {
3781
3782
3783
3784
3785
3786
3787 ret = btrfs_may_alloc_data_chunk(fs_info,
3788 found_key.offset);
3789 if (ret < 0) {
3790 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3791 goto error;
3792 } else if (ret == 1) {
3793 chunk_reserved = 1;
3794 }
3795 }
3796
3797 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3798 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3799 if (ret == -ENOSPC) {
3800 enospc_errors++;
3801 } else if (ret == -ETXTBSY) {
3802 btrfs_info(fs_info,
3803 "skipping relocation of block group %llu due to active swapfile",
3804 found_key.offset);
3805 ret = 0;
3806 } else if (ret) {
3807 goto error;
3808 } else {
3809 spin_lock(&fs_info->balance_lock);
3810 bctl->stat.completed++;
3811 spin_unlock(&fs_info->balance_lock);
3812 }
3813loop:
3814 if (found_key.offset == 0)
3815 break;
3816 key.offset = found_key.offset - 1;
3817 }
3818
3819 if (counting) {
3820 btrfs_release_path(path);
3821 counting = false;
3822 goto again;
3823 }
3824error:
3825 btrfs_free_path(path);
3826 if (enospc_errors) {
3827 btrfs_info(fs_info, "%d enospc errors during balance",
3828 enospc_errors);
3829 if (!ret)
3830 ret = -ENOSPC;
3831 }
3832
3833 return ret;
3834}
3835
3836
3837
3838
3839
3840
3841static int alloc_profile_is_valid(u64 flags, int extended)
3842{
3843 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3844 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3845
3846 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3847
3848
3849 if (flags & ~mask)
3850 return 0;
3851
3852
3853 if (flags == 0)
3854 return !extended;
3855
3856
3857 return is_power_of_2(flags);
3858}
3859
3860static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3861{
3862
3863 return atomic_read(&fs_info->balance_cancel_req) ||
3864 (atomic_read(&fs_info->balance_pause_req) == 0 &&
3865 atomic_read(&fs_info->balance_cancel_req) == 0);
3866}
3867
3868
3869static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
3870 u64 allowed)
3871{
3872 return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3873 (!alloc_profile_is_valid(bctl_arg->target, 1) ||
3874 (bctl_arg->target & ~allowed)));
3875}
3876
3877
3878
3879
3880
3881
3882static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
3883 u32 size_buf)
3884{
3885 int ret;
3886 u32 size_bp = size_buf;
3887 char *bp = buf;
3888 u64 flags = bargs->flags;
3889 char tmp_buf[128] = {'\0'};
3890
3891 if (!flags)
3892 return;
3893
3894#define CHECK_APPEND_NOARG(a) \
3895 do { \
3896 ret = snprintf(bp, size_bp, (a)); \
3897 if (ret < 0 || ret >= size_bp) \
3898 goto out_overflow; \
3899 size_bp -= ret; \
3900 bp += ret; \
3901 } while (0)
3902
3903#define CHECK_APPEND_1ARG(a, v1) \
3904 do { \
3905 ret = snprintf(bp, size_bp, (a), (v1)); \
3906 if (ret < 0 || ret >= size_bp) \
3907 goto out_overflow; \
3908 size_bp -= ret; \
3909 bp += ret; \
3910 } while (0)
3911
3912#define CHECK_APPEND_2ARG(a, v1, v2) \
3913 do { \
3914 ret = snprintf(bp, size_bp, (a), (v1), (v2)); \
3915 if (ret < 0 || ret >= size_bp) \
3916 goto out_overflow; \
3917 size_bp -= ret; \
3918 bp += ret; \
3919 } while (0)
3920
3921 if (flags & BTRFS_BALANCE_ARGS_CONVERT)
3922 CHECK_APPEND_1ARG("convert=%s,",
3923 btrfs_bg_type_to_raid_name(bargs->target));
3924
3925 if (flags & BTRFS_BALANCE_ARGS_SOFT)
3926 CHECK_APPEND_NOARG("soft,");
3927
3928 if (flags & BTRFS_BALANCE_ARGS_PROFILES) {
3929 btrfs_describe_block_groups(bargs->profiles, tmp_buf,
3930 sizeof(tmp_buf));
3931 CHECK_APPEND_1ARG("profiles=%s,", tmp_buf);
3932 }
3933
3934 if (flags & BTRFS_BALANCE_ARGS_USAGE)
3935 CHECK_APPEND_1ARG("usage=%llu,", bargs->usage);
3936
3937 if (flags & BTRFS_BALANCE_ARGS_USAGE_RANGE)
3938 CHECK_APPEND_2ARG("usage=%u..%u,",
3939 bargs->usage_min, bargs->usage_max);
3940
3941 if (flags & BTRFS_BALANCE_ARGS_DEVID)
3942 CHECK_APPEND_1ARG("devid=%llu,", bargs->devid);
3943
3944 if (flags & BTRFS_BALANCE_ARGS_DRANGE)
3945 CHECK_APPEND_2ARG("drange=%llu..%llu,",
3946 bargs->pstart, bargs->pend);
3947
3948 if (flags & BTRFS_BALANCE_ARGS_VRANGE)
3949 CHECK_APPEND_2ARG("vrange=%llu..%llu,",
3950 bargs->vstart, bargs->vend);
3951
3952 if (flags & BTRFS_BALANCE_ARGS_LIMIT)
3953 CHECK_APPEND_1ARG("limit=%llu,", bargs->limit);
3954
3955 if (flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)
3956 CHECK_APPEND_2ARG("limit=%u..%u,",
3957 bargs->limit_min, bargs->limit_max);
3958
3959 if (flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE)
3960 CHECK_APPEND_2ARG("stripes=%u..%u,",
3961 bargs->stripes_min, bargs->stripes_max);
3962
3963#undef CHECK_APPEND_2ARG
3964#undef CHECK_APPEND_1ARG
3965#undef CHECK_APPEND_NOARG
3966
3967out_overflow:
3968
3969 if (size_bp < size_buf)
3970 buf[size_buf - size_bp - 1] = '\0';
3971 else
3972 buf[0] = '\0';
3973}
3974
3975static void describe_balance_start_or_resume(struct btrfs_fs_info *fs_info)
3976{
3977 u32 size_buf = 1024;
3978 char tmp_buf[192] = {'\0'};
3979 char *buf;
3980 char *bp;
3981 u32 size_bp = size_buf;
3982 int ret;
3983 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3984
3985 buf = kzalloc(size_buf, GFP_KERNEL);
3986 if (!buf)
3987 return;
3988
3989 bp = buf;
3990
3991#define CHECK_APPEND_1ARG(a, v1) \
3992 do { \
3993 ret = snprintf(bp, size_bp, (a), (v1)); \
3994 if (ret < 0 || ret >= size_bp) \
3995 goto out_overflow; \
3996 size_bp -= ret; \
3997 bp += ret; \
3998 } while (0)
3999
4000 if (bctl->flags & BTRFS_BALANCE_FORCE)
4001 CHECK_APPEND_1ARG("%s", "-f ");
4002
4003 if (bctl->flags & BTRFS_BALANCE_DATA) {
4004 describe_balance_args(&bctl->data, tmp_buf, sizeof(tmp_buf));
4005 CHECK_APPEND_1ARG("-d%s ", tmp_buf);
4006 }
4007
4008 if (bctl->flags & BTRFS_BALANCE_METADATA) {
4009 describe_balance_args(&bctl->meta, tmp_buf, sizeof(tmp_buf));
4010 CHECK_APPEND_1ARG("-m%s ", tmp_buf);
4011 }
4012
4013 if (bctl->flags & BTRFS_BALANCE_SYSTEM) {
4014 describe_balance_args(&bctl->sys, tmp_buf, sizeof(tmp_buf));
4015 CHECK_APPEND_1ARG("-s%s ", tmp_buf);
4016 }
4017
4018#undef CHECK_APPEND_1ARG
4019
4020out_overflow:
4021
4022 if (size_bp < size_buf)
4023 buf[size_buf - size_bp - 1] = '\0';
4024 btrfs_info(fs_info, "balance: %s %s",
4025 (bctl->flags & BTRFS_BALANCE_RESUME) ?
4026 "resume" : "start", buf);
4027
4028 kfree(buf);
4029}
4030
4031
4032
4033
4034int btrfs_balance(struct btrfs_fs_info *fs_info,
4035 struct btrfs_balance_control *bctl,
4036 struct btrfs_ioctl_balance_args *bargs)
4037{
4038 u64 meta_target, data_target;
4039 u64 allowed;
4040 int mixed = 0;
4041 int ret;
4042 u64 num_devices;
4043 unsigned seq;
4044 bool reducing_integrity;
4045 int i;
4046
4047 if (btrfs_fs_closing(fs_info) ||
4048 atomic_read(&fs_info->balance_pause_req) ||
4049 atomic_read(&fs_info->balance_cancel_req)) {
4050 ret = -EINVAL;
4051 goto out;
4052 }
4053
4054 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
4055 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
4056 mixed = 1;
4057
4058
4059
4060
4061
4062 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
4063 if (mixed && (bctl->flags & allowed)) {
4064 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
4065 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
4066 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
4067 btrfs_err(fs_info,
4068 "balance: mixed groups data and metadata options must be the same");
4069 ret = -EINVAL;
4070 goto out;
4071 }
4072 }
4073
4074 num_devices = btrfs_num_devices(fs_info);
4075 allowed = 0;
4076 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
4077 if (num_devices >= btrfs_raid_array[i].devs_min)
4078 allowed |= btrfs_raid_array[i].bg_flag;
4079
4080 if (validate_convert_profile(&bctl->data, allowed)) {
4081 btrfs_err(fs_info,
4082 "balance: invalid convert data profile %s",
4083 btrfs_bg_type_to_raid_name(bctl->data.target));
4084 ret = -EINVAL;
4085 goto out;
4086 }
4087 if (validate_convert_profile(&bctl->meta, allowed)) {
4088 btrfs_err(fs_info,
4089 "balance: invalid convert metadata profile %s",
4090 btrfs_bg_type_to_raid_name(bctl->meta.target));
4091 ret = -EINVAL;
4092 goto out;
4093 }
4094 if (validate_convert_profile(&bctl->sys, allowed)) {
4095 btrfs_err(fs_info,
4096 "balance: invalid convert system profile %s",
4097 btrfs_bg_type_to_raid_name(bctl->sys.target));
4098 ret = -EINVAL;
4099 goto out;
4100 }
4101
4102
4103
4104
4105
4106 allowed = 0;
4107 for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
4108 if (btrfs_raid_array[i].ncopies >= 2 ||
4109 btrfs_raid_array[i].tolerated_failures >= 1)
4110 allowed |= btrfs_raid_array[i].bg_flag;
4111 }
4112 do {
4113 seq = read_seqbegin(&fs_info->profiles_lock);
4114
4115 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4116 (fs_info->avail_system_alloc_bits & allowed) &&
4117 !(bctl->sys.target & allowed)) ||
4118 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
4119 (fs_info->avail_metadata_alloc_bits & allowed) &&
4120 !(bctl->meta.target & allowed)))
4121 reducing_integrity = true;
4122 else
4123 reducing_integrity = false;
4124
4125
4126 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4127 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
4128 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
4129 bctl->data.target : fs_info->avail_data_alloc_bits;
4130 } while (read_seqretry(&fs_info->profiles_lock, seq));
4131
4132 if (reducing_integrity) {
4133 if (bctl->flags & BTRFS_BALANCE_FORCE) {
4134 btrfs_info(fs_info,
4135 "balance: force reducing metadata integrity");
4136 } else {
4137 btrfs_err(fs_info,
4138 "balance: reduces metadata integrity, use --force if you want this");
4139 ret = -EINVAL;
4140 goto out;
4141 }
4142 }
4143
4144 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
4145 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
4146 btrfs_warn(fs_info,
4147 "balance: metadata profile %s has lower redundancy than data profile %s",
4148 btrfs_bg_type_to_raid_name(meta_target),
4149 btrfs_bg_type_to_raid_name(data_target));
4150 }
4151
4152 if (fs_info->send_in_progress) {
4153 btrfs_warn_rl(fs_info,
4154"cannot run balance while send operations are in progress (%d in progress)",
4155 fs_info->send_in_progress);
4156 ret = -EAGAIN;
4157 goto out;
4158 }
4159
4160 ret = insert_balance_item(fs_info, bctl);
4161 if (ret && ret != -EEXIST)
4162 goto out;
4163
4164 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
4165 BUG_ON(ret == -EEXIST);
4166 BUG_ON(fs_info->balance_ctl);
4167 spin_lock(&fs_info->balance_lock);
4168 fs_info->balance_ctl = bctl;
4169 spin_unlock(&fs_info->balance_lock);
4170 } else {
4171 BUG_ON(ret != -EEXIST);
4172 spin_lock(&fs_info->balance_lock);
4173 update_balance_args(bctl);
4174 spin_unlock(&fs_info->balance_lock);
4175 }
4176
4177 ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4178 set_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4179 describe_balance_start_or_resume(fs_info);
4180 mutex_unlock(&fs_info->balance_mutex);
4181
4182 ret = __btrfs_balance(fs_info);
4183
4184 mutex_lock(&fs_info->balance_mutex);
4185 if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req))
4186 btrfs_info(fs_info, "balance: paused");
4187 else if (ret == -ECANCELED && atomic_read(&fs_info->balance_cancel_req))
4188 btrfs_info(fs_info, "balance: canceled");
4189 else
4190 btrfs_info(fs_info, "balance: ended with status: %d", ret);
4191
4192 clear_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags);
4193
4194 if (bargs) {
4195 memset(bargs, 0, sizeof(*bargs));
4196 btrfs_update_ioctl_balance_args(fs_info, bargs);
4197 }
4198
4199 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4200 balance_need_close(fs_info)) {
4201 reset_balance_state(fs_info);
4202 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4203 }
4204
4205 wake_up(&fs_info->balance_wait_q);
4206
4207 return ret;
4208out:
4209 if (bctl->flags & BTRFS_BALANCE_RESUME)
4210 reset_balance_state(fs_info);
4211 else
4212 kfree(bctl);
4213 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4214
4215 return ret;
4216}
4217
4218static int balance_kthread(void *data)
4219{
4220 struct btrfs_fs_info *fs_info = data;
4221 int ret = 0;
4222
4223 mutex_lock(&fs_info->balance_mutex);
4224 if (fs_info->balance_ctl)
4225 ret = btrfs_balance(fs_info, fs_info->balance_ctl, NULL);
4226 mutex_unlock(&fs_info->balance_mutex);
4227
4228 return ret;
4229}
4230
4231int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4232{
4233 struct task_struct *tsk;
4234
4235 mutex_lock(&fs_info->balance_mutex);
4236 if (!fs_info->balance_ctl) {
4237 mutex_unlock(&fs_info->balance_mutex);
4238 return 0;
4239 }
4240 mutex_unlock(&fs_info->balance_mutex);
4241
4242 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4243 btrfs_info(fs_info, "balance: resume skipped");
4244 return 0;
4245 }
4246
4247
4248
4249
4250
4251
4252 spin_lock(&fs_info->balance_lock);
4253 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4254 spin_unlock(&fs_info->balance_lock);
4255
4256 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4257 return PTR_ERR_OR_ZERO(tsk);
4258}
4259
4260int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4261{
4262 struct btrfs_balance_control *bctl;
4263 struct btrfs_balance_item *item;
4264 struct btrfs_disk_balance_args disk_bargs;
4265 struct btrfs_path *path;
4266 struct extent_buffer *leaf;
4267 struct btrfs_key key;
4268 int ret;
4269
4270 path = btrfs_alloc_path();
4271 if (!path)
4272 return -ENOMEM;
4273
4274 key.objectid = BTRFS_BALANCE_OBJECTID;
4275 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4276 key.offset = 0;
4277
4278 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4279 if (ret < 0)
4280 goto out;
4281 if (ret > 0) {
4282 ret = 0;
4283 goto out;
4284 }
4285
4286 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4287 if (!bctl) {
4288 ret = -ENOMEM;
4289 goto out;
4290 }
4291
4292 leaf = path->nodes[0];
4293 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4294
4295 bctl->flags = btrfs_balance_flags(leaf, item);
4296 bctl->flags |= BTRFS_BALANCE_RESUME;
4297
4298 btrfs_balance_data(leaf, item, &disk_bargs);
4299 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4300 btrfs_balance_meta(leaf, item, &disk_bargs);
4301 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4302 btrfs_balance_sys(leaf, item, &disk_bargs);
4303 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4304
4305
4306
4307
4308
4309
4310
4311
4312
4313
4314
4315 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags))
4316 btrfs_warn(fs_info,
4317 "balance: cannot set exclusive op status, resume manually");
4318
4319 mutex_lock(&fs_info->balance_mutex);
4320 BUG_ON(fs_info->balance_ctl);
4321 spin_lock(&fs_info->balance_lock);
4322 fs_info->balance_ctl = bctl;
4323 spin_unlock(&fs_info->balance_lock);
4324 mutex_unlock(&fs_info->balance_mutex);
4325out:
4326 btrfs_free_path(path);
4327 return ret;
4328}
4329
4330int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4331{
4332 int ret = 0;
4333
4334 mutex_lock(&fs_info->balance_mutex);
4335 if (!fs_info->balance_ctl) {
4336 mutex_unlock(&fs_info->balance_mutex);
4337 return -ENOTCONN;
4338 }
4339
4340 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4341 atomic_inc(&fs_info->balance_pause_req);
4342 mutex_unlock(&fs_info->balance_mutex);
4343
4344 wait_event(fs_info->balance_wait_q,
4345 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4346
4347 mutex_lock(&fs_info->balance_mutex);
4348
4349 BUG_ON(test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4350 atomic_dec(&fs_info->balance_pause_req);
4351 } else {
4352 ret = -ENOTCONN;
4353 }
4354
4355 mutex_unlock(&fs_info->balance_mutex);
4356 return ret;
4357}
4358
4359int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4360{
4361 mutex_lock(&fs_info->balance_mutex);
4362 if (!fs_info->balance_ctl) {
4363 mutex_unlock(&fs_info->balance_mutex);
4364 return -ENOTCONN;
4365 }
4366
4367
4368
4369
4370
4371
4372 if (sb_rdonly(fs_info->sb)) {
4373 mutex_unlock(&fs_info->balance_mutex);
4374 return -EROFS;
4375 }
4376
4377 atomic_inc(&fs_info->balance_cancel_req);
4378
4379
4380
4381
4382 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
4383 mutex_unlock(&fs_info->balance_mutex);
4384 wait_event(fs_info->balance_wait_q,
4385 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4386 mutex_lock(&fs_info->balance_mutex);
4387 } else {
4388 mutex_unlock(&fs_info->balance_mutex);
4389
4390
4391
4392
4393 mutex_lock(&fs_info->balance_mutex);
4394
4395 if (fs_info->balance_ctl) {
4396 reset_balance_state(fs_info);
4397 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4398 btrfs_info(fs_info, "balance: canceled");
4399 }
4400 }
4401
4402 BUG_ON(fs_info->balance_ctl ||
4403 test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
4404 atomic_dec(&fs_info->balance_cancel_req);
4405 mutex_unlock(&fs_info->balance_mutex);
4406 return 0;
4407}
4408
4409static int btrfs_uuid_scan_kthread(void *data)
4410{
4411 struct btrfs_fs_info *fs_info = data;
4412 struct btrfs_root *root = fs_info->tree_root;
4413 struct btrfs_key key;
4414 struct btrfs_path *path = NULL;
4415 int ret = 0;
4416 struct extent_buffer *eb;
4417 int slot;
4418 struct btrfs_root_item root_item;
4419 u32 item_size;
4420 struct btrfs_trans_handle *trans = NULL;
4421
4422 path = btrfs_alloc_path();
4423 if (!path) {
4424 ret = -ENOMEM;
4425 goto out;
4426 }
4427
4428 key.objectid = 0;
4429 key.type = BTRFS_ROOT_ITEM_KEY;
4430 key.offset = 0;
4431
4432 while (1) {
4433 ret = btrfs_search_forward(root, &key, path,
4434 BTRFS_OLDEST_GENERATION);
4435 if (ret) {
4436 if (ret > 0)
4437 ret = 0;
4438 break;
4439 }
4440
4441 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4442 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4443 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4444 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4445 goto skip;
4446
4447 eb = path->nodes[0];
4448 slot = path->slots[0];
4449 item_size = btrfs_item_size_nr(eb, slot);
4450 if (item_size < sizeof(root_item))
4451 goto skip;
4452
4453 read_extent_buffer(eb, &root_item,
4454 btrfs_item_ptr_offset(eb, slot),
4455 (int)sizeof(root_item));
4456 if (btrfs_root_refs(&root_item) == 0)
4457 goto skip;
4458
4459 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4460 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4461 if (trans)
4462 goto update_tree;
4463
4464 btrfs_release_path(path);
4465
4466
4467
4468
4469 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4470 if (IS_ERR(trans)) {
4471 ret = PTR_ERR(trans);
4472 break;
4473 }
4474 continue;
4475 } else {
4476 goto skip;
4477 }
4478update_tree:
4479 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4480 ret = btrfs_uuid_tree_add(trans, root_item.uuid,
4481 BTRFS_UUID_KEY_SUBVOL,
4482 key.objectid);
4483 if (ret < 0) {
4484 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4485 ret);
4486 break;
4487 }
4488 }
4489
4490 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4491 ret = btrfs_uuid_tree_add(trans,
4492 root_item.received_uuid,
4493 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4494 key.objectid);
4495 if (ret < 0) {
4496 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4497 ret);
4498 break;
4499 }
4500 }
4501
4502skip:
4503 if (trans) {
4504 ret = btrfs_end_transaction(trans);
4505 trans = NULL;
4506 if (ret)
4507 break;
4508 }
4509
4510 btrfs_release_path(path);
4511 if (key.offset < (u64)-1) {
4512 key.offset++;
4513 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4514 key.offset = 0;
4515 key.type = BTRFS_ROOT_ITEM_KEY;
4516 } else if (key.objectid < (u64)-1) {
4517 key.offset = 0;
4518 key.type = BTRFS_ROOT_ITEM_KEY;
4519 key.objectid++;
4520 } else {
4521 break;
4522 }
4523 cond_resched();
4524 }
4525
4526out:
4527 btrfs_free_path(path);
4528 if (trans && !IS_ERR(trans))
4529 btrfs_end_transaction(trans);
4530 if (ret)
4531 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4532 else
4533 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4534 up(&fs_info->uuid_tree_rescan_sem);
4535 return 0;
4536}
4537
4538
4539
4540
4541
4542
4543
4544
4545static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
4546 u8 *uuid, u8 type, u64 subid)
4547{
4548 struct btrfs_key key;
4549 int ret = 0;
4550 struct btrfs_root *subvol_root;
4551
4552 if (type != BTRFS_UUID_KEY_SUBVOL &&
4553 type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
4554 goto out;
4555
4556 key.objectid = subid;
4557 key.type = BTRFS_ROOT_ITEM_KEY;
4558 key.offset = (u64)-1;
4559 subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
4560 if (IS_ERR(subvol_root)) {
4561 ret = PTR_ERR(subvol_root);
4562 if (ret == -ENOENT)
4563 ret = 1;
4564 goto out;
4565 }
4566
4567 switch (type) {
4568 case BTRFS_UUID_KEY_SUBVOL:
4569 if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
4570 ret = 1;
4571 break;
4572 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
4573 if (memcmp(uuid, subvol_root->root_item.received_uuid,
4574 BTRFS_UUID_SIZE))
4575 ret = 1;
4576 break;
4577 }
4578
4579out:
4580 return ret;
4581}
4582
4583static int btrfs_uuid_rescan_kthread(void *data)
4584{
4585 struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
4586 int ret;
4587
4588
4589
4590
4591
4592
4593 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
4594 if (ret < 0) {
4595 btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
4596 up(&fs_info->uuid_tree_rescan_sem);
4597 return ret;
4598 }
4599 return btrfs_uuid_scan_kthread(data);
4600}
4601
4602int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4603{
4604 struct btrfs_trans_handle *trans;
4605 struct btrfs_root *tree_root = fs_info->tree_root;
4606 struct btrfs_root *uuid_root;
4607 struct task_struct *task;
4608 int ret;
4609
4610
4611
4612
4613
4614 trans = btrfs_start_transaction(tree_root, 2);
4615 if (IS_ERR(trans))
4616 return PTR_ERR(trans);
4617
4618 uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID);
4619 if (IS_ERR(uuid_root)) {
4620 ret = PTR_ERR(uuid_root);
4621 btrfs_abort_transaction(trans, ret);
4622 btrfs_end_transaction(trans);
4623 return ret;
4624 }
4625
4626 fs_info->uuid_root = uuid_root;
4627
4628 ret = btrfs_commit_transaction(trans);
4629 if (ret)
4630 return ret;
4631
4632 down(&fs_info->uuid_tree_rescan_sem);
4633 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4634 if (IS_ERR(task)) {
4635
4636 btrfs_warn(fs_info, "failed to start uuid_scan task");
4637 up(&fs_info->uuid_tree_rescan_sem);
4638 return PTR_ERR(task);
4639 }
4640
4641 return 0;
4642}
4643
4644int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
4645{
4646 struct task_struct *task;
4647
4648 down(&fs_info->uuid_tree_rescan_sem);
4649 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
4650 if (IS_ERR(task)) {
4651
4652 btrfs_warn(fs_info, "failed to start uuid_rescan task");
4653 up(&fs_info->uuid_tree_rescan_sem);
4654 return PTR_ERR(task);
4655 }
4656
4657 return 0;
4658}
4659
4660
4661
4662
4663
4664
4665int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4666{
4667 struct btrfs_fs_info *fs_info = device->fs_info;
4668 struct btrfs_root *root = fs_info->dev_root;
4669 struct btrfs_trans_handle *trans;
4670 struct btrfs_dev_extent *dev_extent = NULL;
4671 struct btrfs_path *path;
4672 u64 length;
4673 u64 chunk_offset;
4674 int ret;
4675 int slot;
4676 int failed = 0;
4677 bool retried = false;
4678 struct extent_buffer *l;
4679 struct btrfs_key key;
4680 struct btrfs_super_block *super_copy = fs_info->super_copy;
4681 u64 old_total = btrfs_super_total_bytes(super_copy);
4682 u64 old_size = btrfs_device_get_total_bytes(device);
4683 u64 diff;
4684 u64 start;
4685
4686 new_size = round_down(new_size, fs_info->sectorsize);
4687 start = new_size;
4688 diff = round_down(old_size - new_size, fs_info->sectorsize);
4689
4690 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4691 return -EINVAL;
4692
4693 path = btrfs_alloc_path();
4694 if (!path)
4695 return -ENOMEM;
4696
4697 path->reada = READA_BACK;
4698
4699 trans = btrfs_start_transaction(root, 0);
4700 if (IS_ERR(trans)) {
4701 btrfs_free_path(path);
4702 return PTR_ERR(trans);
4703 }
4704
4705 mutex_lock(&fs_info->chunk_mutex);
4706
4707 btrfs_device_set_total_bytes(device, new_size);
4708 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4709 device->fs_devices->total_rw_bytes -= diff;
4710 atomic64_sub(diff, &fs_info->free_chunk_space);
4711 }
4712
4713
4714
4715
4716
4717
4718 if (contains_pending_extent(device, &start, diff)) {
4719 mutex_unlock(&fs_info->chunk_mutex);
4720 ret = btrfs_commit_transaction(trans);
4721 if (ret)
4722 goto done;
4723 } else {
4724 mutex_unlock(&fs_info->chunk_mutex);
4725 btrfs_end_transaction(trans);
4726 }
4727
4728again:
4729 key.objectid = device->devid;
4730 key.offset = (u64)-1;
4731 key.type = BTRFS_DEV_EXTENT_KEY;
4732
4733 do {
4734 mutex_lock(&fs_info->delete_unused_bgs_mutex);
4735 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4736 if (ret < 0) {
4737 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4738 goto done;
4739 }
4740
4741 ret = btrfs_previous_item(root, path, 0, key.type);
4742 if (ret)
4743 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4744 if (ret < 0)
4745 goto done;
4746 if (ret) {
4747 ret = 0;
4748 btrfs_release_path(path);
4749 break;
4750 }
4751
4752 l = path->nodes[0];
4753 slot = path->slots[0];
4754 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4755
4756 if (key.objectid != device->devid) {
4757 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4758 btrfs_release_path(path);
4759 break;
4760 }
4761
4762 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4763 length = btrfs_dev_extent_length(l, dev_extent);
4764
4765 if (key.offset + length <= new_size) {
4766 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4767 btrfs_release_path(path);
4768 break;
4769 }
4770
4771 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4772 btrfs_release_path(path);
4773
4774
4775
4776
4777
4778
4779
4780 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4781 if (ret < 0) {
4782 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4783 goto done;
4784 }
4785
4786 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4787 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4788 if (ret == -ENOSPC) {
4789 failed++;
4790 } else if (ret) {
4791 if (ret == -ETXTBSY) {
4792 btrfs_warn(fs_info,
4793 "could not shrink block group %llu due to active swapfile",
4794 chunk_offset);
4795 }
4796 goto done;
4797 }
4798 } while (key.offset-- > 0);
4799
4800 if (failed && !retried) {
4801 failed = 0;
4802 retried = true;
4803 goto again;
4804 } else if (failed && retried) {
4805 ret = -ENOSPC;
4806 goto done;
4807 }
4808
4809
4810 trans = btrfs_start_transaction(root, 0);
4811 if (IS_ERR(trans)) {
4812 ret = PTR_ERR(trans);
4813 goto done;
4814 }
4815
4816 mutex_lock(&fs_info->chunk_mutex);
4817 btrfs_device_set_disk_total_bytes(device, new_size);
4818 if (list_empty(&device->post_commit_list))
4819 list_add_tail(&device->post_commit_list,
4820 &trans->transaction->dev_update_list);
4821
4822 WARN_ON(diff > old_total);
4823 btrfs_set_super_total_bytes(super_copy,
4824 round_down(old_total - diff, fs_info->sectorsize));
4825 mutex_unlock(&fs_info->chunk_mutex);
4826
4827
4828 ret = btrfs_update_device(trans, device);
4829 if (ret < 0) {
4830 btrfs_abort_transaction(trans, ret);
4831 btrfs_end_transaction(trans);
4832 } else {
4833 ret = btrfs_commit_transaction(trans);
4834 }
4835done:
4836 btrfs_free_path(path);
4837 if (ret) {
4838 mutex_lock(&fs_info->chunk_mutex);
4839 btrfs_device_set_total_bytes(device, old_size);
4840 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4841 device->fs_devices->total_rw_bytes += diff;
4842 atomic64_add(diff, &fs_info->free_chunk_space);
4843 mutex_unlock(&fs_info->chunk_mutex);
4844 }
4845 return ret;
4846}
4847
4848static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4849 struct btrfs_key *key,
4850 struct btrfs_chunk *chunk, int item_size)
4851{
4852 struct btrfs_super_block *super_copy = fs_info->super_copy;
4853 struct btrfs_disk_key disk_key;
4854 u32 array_size;
4855 u8 *ptr;
4856
4857 mutex_lock(&fs_info->chunk_mutex);
4858 array_size = btrfs_super_sys_array_size(super_copy);
4859 if (array_size + item_size + sizeof(disk_key)
4860 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
4861 mutex_unlock(&fs_info->chunk_mutex);
4862 return -EFBIG;
4863 }
4864
4865 ptr = super_copy->sys_chunk_array + array_size;
4866 btrfs_cpu_key_to_disk(&disk_key, key);
4867 memcpy(ptr, &disk_key, sizeof(disk_key));
4868 ptr += sizeof(disk_key);
4869 memcpy(ptr, chunk, item_size);
4870 item_size += sizeof(disk_key);
4871 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4872 mutex_unlock(&fs_info->chunk_mutex);
4873
4874 return 0;
4875}
4876
4877
4878
4879
4880static int btrfs_cmp_device_info(const void *a, const void *b)
4881{
4882 const struct btrfs_device_info *di_a = a;
4883 const struct btrfs_device_info *di_b = b;
4884
4885 if (di_a->max_avail > di_b->max_avail)
4886 return -1;
4887 if (di_a->max_avail < di_b->max_avail)
4888 return 1;
4889 if (di_a->total_avail > di_b->total_avail)
4890 return -1;
4891 if (di_a->total_avail < di_b->total_avail)
4892 return 1;
4893 return 0;
4894}
4895
4896static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4897{
4898 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4899 return;
4900
4901 btrfs_set_fs_incompat(info, RAID56);
4902}
4903
4904static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4905 u64 start, u64 type)
4906{
4907 struct btrfs_fs_info *info = trans->fs_info;
4908 struct btrfs_fs_devices *fs_devices = info->fs_devices;
4909 struct btrfs_device *device;
4910 struct map_lookup *map = NULL;
4911 struct extent_map_tree *em_tree;
4912 struct extent_map *em;
4913 struct btrfs_device_info *devices_info = NULL;
4914 u64 total_avail;
4915 int num_stripes;
4916 int data_stripes;
4917
4918 int sub_stripes;
4919 int dev_stripes;
4920 int devs_max;
4921 int devs_min;
4922 int devs_increment;
4923 int ncopies;
4924 int nparity;
4925
4926 int ret;
4927 u64 max_stripe_size;
4928 u64 max_chunk_size;
4929 u64 stripe_size;
4930 u64 chunk_size;
4931 int ndevs;
4932 int i;
4933 int j;
4934 int index;
4935
4936 BUG_ON(!alloc_profile_is_valid(type, 0));
4937
4938 if (list_empty(&fs_devices->alloc_list)) {
4939 if (btrfs_test_opt(info, ENOSPC_DEBUG))
4940 btrfs_debug(info, "%s: no writable device", __func__);
4941 return -ENOSPC;
4942 }
4943
4944 index = btrfs_bg_flags_to_raid_index(type);
4945
4946 sub_stripes = btrfs_raid_array[index].sub_stripes;
4947 dev_stripes = btrfs_raid_array[index].dev_stripes;
4948 devs_max = btrfs_raid_array[index].devs_max;
4949 if (!devs_max)
4950 devs_max = BTRFS_MAX_DEVS(info);
4951 devs_min = btrfs_raid_array[index].devs_min;
4952 devs_increment = btrfs_raid_array[index].devs_increment;
4953 ncopies = btrfs_raid_array[index].ncopies;
4954 nparity = btrfs_raid_array[index].nparity;
4955
4956 if (type & BTRFS_BLOCK_GROUP_DATA) {
4957 max_stripe_size = SZ_1G;
4958 max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
4959 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
4960
4961 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
4962 max_stripe_size = SZ_1G;
4963 else
4964 max_stripe_size = SZ_256M;
4965 max_chunk_size = max_stripe_size;
4966 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
4967 max_stripe_size = SZ_32M;
4968 max_chunk_size = 2 * max_stripe_size;
4969 } else {
4970 btrfs_err(info, "invalid chunk type 0x%llx requested",
4971 type);
4972 BUG();
4973 }
4974
4975
4976 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
4977 max_chunk_size);
4978
4979 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
4980 GFP_NOFS);
4981 if (!devices_info)
4982 return -ENOMEM;
4983
4984
4985
4986
4987
4988 ndevs = 0;
4989 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
4990 u64 max_avail;
4991 u64 dev_offset;
4992
4993 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4994 WARN(1, KERN_ERR
4995 "BTRFS: read-only device in alloc_list\n");
4996 continue;
4997 }
4998
4999 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
5000 &device->dev_state) ||
5001 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
5002 continue;
5003
5004 if (device->total_bytes > device->bytes_used)
5005 total_avail = device->total_bytes - device->bytes_used;
5006 else
5007 total_avail = 0;
5008
5009
5010 if (total_avail == 0)
5011 continue;
5012
5013 ret = find_free_dev_extent(device,
5014 max_stripe_size * dev_stripes,
5015 &dev_offset, &max_avail);
5016 if (ret && ret != -ENOSPC)
5017 goto error;
5018
5019 if (ret == 0)
5020 max_avail = max_stripe_size * dev_stripes;
5021
5022 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) {
5023 if (btrfs_test_opt(info, ENOSPC_DEBUG))
5024 btrfs_debug(info,
5025 "%s: devid %llu has no free space, have=%llu want=%u",
5026 __func__, device->devid, max_avail,
5027 BTRFS_STRIPE_LEN * dev_stripes);
5028 continue;
5029 }
5030
5031 if (ndevs == fs_devices->rw_devices) {
5032 WARN(1, "%s: found more than %llu devices\n",
5033 __func__, fs_devices->rw_devices);
5034 break;
5035 }
5036 devices_info[ndevs].dev_offset = dev_offset;
5037 devices_info[ndevs].max_avail = max_avail;
5038 devices_info[ndevs].total_avail = total_avail;
5039 devices_info[ndevs].dev = device;
5040 ++ndevs;
5041 }
5042
5043
5044
5045
5046 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
5047 btrfs_cmp_device_info, NULL);
5048
5049
5050 ndevs = round_down(ndevs, devs_increment);
5051
5052 if (ndevs < devs_min) {
5053 ret = -ENOSPC;
5054 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
5055 btrfs_debug(info,
5056 "%s: not enough devices with free space: have=%d minimum required=%d",
5057 __func__, ndevs, devs_min);
5058 }
5059 goto error;
5060 }
5061
5062 ndevs = min(ndevs, devs_max);
5063
5064
5065
5066
5067
5068
5069
5070
5071 stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
5072 num_stripes = ndevs * dev_stripes;
5073
5074
5075
5076
5077
5078 data_stripes = (num_stripes - nparity) / ncopies;
5079
5080
5081
5082
5083
5084
5085
5086 if (stripe_size * data_stripes > max_chunk_size) {
5087
5088
5089
5090
5091
5092 stripe_size = min(round_up(div_u64(max_chunk_size,
5093 data_stripes), SZ_16M),
5094 stripe_size);
5095 }
5096
5097
5098 stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
5099
5100 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
5101 if (!map) {
5102 ret = -ENOMEM;
5103 goto error;
5104 }
5105 map->num_stripes = num_stripes;
5106
5107 for (i = 0; i < ndevs; ++i) {
5108 for (j = 0; j < dev_stripes; ++j) {
5109 int s = i * dev_stripes + j;
5110 map->stripes[s].dev = devices_info[i].dev;
5111 map->stripes[s].physical = devices_info[i].dev_offset +
5112 j * stripe_size;
5113 }
5114 }
5115 map->stripe_len = BTRFS_STRIPE_LEN;
5116 map->io_align = BTRFS_STRIPE_LEN;
5117 map->io_width = BTRFS_STRIPE_LEN;
5118 map->type = type;
5119 map->sub_stripes = sub_stripes;
5120
5121 chunk_size = stripe_size * data_stripes;
5122
5123 trace_btrfs_chunk_alloc(info, map, start, chunk_size);
5124
5125 em = alloc_extent_map();
5126 if (!em) {
5127 kfree(map);
5128 ret = -ENOMEM;
5129 goto error;
5130 }
5131 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
5132 em->map_lookup = map;
5133 em->start = start;
5134 em->len = chunk_size;
5135 em->block_start = 0;
5136 em->block_len = em->len;
5137 em->orig_block_len = stripe_size;
5138
5139 em_tree = &info->mapping_tree;
5140 write_lock(&em_tree->lock);
5141 ret = add_extent_mapping(em_tree, em, 0);
5142 if (ret) {
5143 write_unlock(&em_tree->lock);
5144 free_extent_map(em);
5145 goto error;
5146 }
5147 write_unlock(&em_tree->lock);
5148
5149 ret = btrfs_make_block_group(trans, 0, type, start, chunk_size);
5150 if (ret)
5151 goto error_del_extent;
5152
5153 for (i = 0; i < map->num_stripes; i++) {
5154 struct btrfs_device *dev = map->stripes[i].dev;
5155
5156 btrfs_device_set_bytes_used(dev, dev->bytes_used + stripe_size);
5157 if (list_empty(&dev->post_commit_list))
5158 list_add_tail(&dev->post_commit_list,
5159 &trans->transaction->dev_update_list);
5160 }
5161
5162 atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
5163
5164 free_extent_map(em);
5165 check_raid56_incompat_flag(info, type);
5166
5167 kfree(devices_info);
5168 return 0;
5169
5170error_del_extent:
5171 write_lock(&em_tree->lock);
5172 remove_extent_mapping(em_tree, em);
5173 write_unlock(&em_tree->lock);
5174
5175
5176 free_extent_map(em);
5177
5178 free_extent_map(em);
5179error:
5180 kfree(devices_info);
5181 return ret;
5182}
5183
5184int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
5185 u64 chunk_offset, u64 chunk_size)
5186{
5187 struct btrfs_fs_info *fs_info = trans->fs_info;
5188 struct btrfs_root *extent_root = fs_info->extent_root;
5189 struct btrfs_root *chunk_root = fs_info->chunk_root;
5190 struct btrfs_key key;
5191 struct btrfs_device *device;
5192 struct btrfs_chunk *chunk;
5193 struct btrfs_stripe *stripe;
5194 struct extent_map *em;
5195 struct map_lookup *map;
5196 size_t item_size;
5197 u64 dev_offset;
5198 u64 stripe_size;
5199 int i = 0;
5200 int ret = 0;
5201
5202 em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
5203 if (IS_ERR(em))
5204 return PTR_ERR(em);
5205
5206 map = em->map_lookup;
5207 item_size = btrfs_chunk_item_size(map->num_stripes);
5208 stripe_size = em->orig_block_len;
5209
5210 chunk = kzalloc(item_size, GFP_NOFS);
5211 if (!chunk) {
5212 ret = -ENOMEM;
5213 goto out;
5214 }
5215
5216
5217
5218
5219
5220
5221
5222
5223 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5224 for (i = 0; i < map->num_stripes; i++) {
5225 device = map->stripes[i].dev;
5226 dev_offset = map->stripes[i].physical;
5227
5228 ret = btrfs_update_device(trans, device);
5229 if (ret)
5230 break;
5231 ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5232 dev_offset, stripe_size);
5233 if (ret)
5234 break;
5235 }
5236 if (ret) {
5237 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5238 goto out;
5239 }
5240
5241 stripe = &chunk->stripe;
5242 for (i = 0; i < map->num_stripes; i++) {
5243 device = map->stripes[i].dev;
5244 dev_offset = map->stripes[i].physical;
5245
5246 btrfs_set_stack_stripe_devid(stripe, device->devid);
5247 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5248 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5249 stripe++;
5250 }
5251 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5252
5253 btrfs_set_stack_chunk_length(chunk, chunk_size);
5254 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5255 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5256 btrfs_set_stack_chunk_type(chunk, map->type);
5257 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5258 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5259 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5260 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5261 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5262
5263 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5264 key.type = BTRFS_CHUNK_ITEM_KEY;
5265 key.offset = chunk_offset;
5266
5267 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5268 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5269
5270
5271
5272
5273 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5274 }
5275
5276out:
5277 kfree(chunk);
5278 free_extent_map(em);
5279 return ret;
5280}
5281
5282
5283
5284
5285
5286
5287
5288
5289int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type)
5290{
5291 u64 chunk_offset;
5292
5293 lockdep_assert_held(&trans->fs_info->chunk_mutex);
5294 chunk_offset = find_next_chunk(trans->fs_info);
5295 return __btrfs_alloc_chunk(trans, chunk_offset, type);
5296}
5297
5298static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
5299{
5300 struct btrfs_fs_info *fs_info = trans->fs_info;
5301 u64 chunk_offset;
5302 u64 sys_chunk_offset;
5303 u64 alloc_profile;
5304 int ret;
5305
5306 chunk_offset = find_next_chunk(fs_info);
5307 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5308 ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
5309 if (ret)
5310 return ret;
5311
5312 sys_chunk_offset = find_next_chunk(fs_info);
5313 alloc_profile = btrfs_system_alloc_profile(fs_info);
5314 ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
5315 return ret;
5316}
5317
5318static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5319{
5320 const int index = btrfs_bg_flags_to_raid_index(map->type);
5321
5322 return btrfs_raid_array[index].tolerated_failures;
5323}
5324
5325int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5326{
5327 struct extent_map *em;
5328 struct map_lookup *map;
5329 int readonly = 0;
5330 int miss_ndevs = 0;
5331 int i;
5332
5333 em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
5334 if (IS_ERR(em))
5335 return 1;
5336
5337 map = em->map_lookup;
5338 for (i = 0; i < map->num_stripes; i++) {
5339 if (test_bit(BTRFS_DEV_STATE_MISSING,
5340 &map->stripes[i].dev->dev_state)) {
5341 miss_ndevs++;
5342 continue;
5343 }
5344 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5345 &map->stripes[i].dev->dev_state)) {
5346 readonly = 1;
5347 goto end;
5348 }
5349 }
5350
5351
5352
5353
5354
5355
5356 if (miss_ndevs > btrfs_chunk_max_errors(map))
5357 readonly = 1;
5358end:
5359 free_extent_map(em);
5360 return readonly;
5361}
5362
5363void btrfs_mapping_tree_free(struct extent_map_tree *tree)
5364{
5365 struct extent_map *em;
5366
5367 while (1) {
5368 write_lock(&tree->lock);
5369 em = lookup_extent_mapping(tree, 0, (u64)-1);
5370 if (em)
5371 remove_extent_mapping(tree, em);
5372 write_unlock(&tree->lock);
5373 if (!em)
5374 break;
5375
5376 free_extent_map(em);
5377
5378 free_extent_map(em);
5379 }
5380}
5381
5382int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5383{
5384 struct extent_map *em;
5385 struct map_lookup *map;
5386 int ret;
5387
5388 em = btrfs_get_chunk_map(fs_info, logical, len);
5389 if (IS_ERR(em))
5390
5391
5392
5393
5394
5395
5396 return 1;
5397
5398 map = em->map_lookup;
5399 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
5400 ret = map->num_stripes;
5401 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5402 ret = map->sub_stripes;
5403 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5404 ret = 2;
5405 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5406
5407
5408
5409
5410
5411
5412
5413 ret = map->num_stripes;
5414 else
5415 ret = 1;
5416 free_extent_map(em);
5417
5418 down_read(&fs_info->dev_replace.rwsem);
5419 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5420 fs_info->dev_replace.tgtdev)
5421 ret++;
5422 up_read(&fs_info->dev_replace.rwsem);
5423
5424 return ret;
5425}
5426
5427unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5428 u64 logical)
5429{
5430 struct extent_map *em;
5431 struct map_lookup *map;
5432 unsigned long len = fs_info->sectorsize;
5433
5434 em = btrfs_get_chunk_map(fs_info, logical, len);
5435
5436 if (!WARN_ON(IS_ERR(em))) {
5437 map = em->map_lookup;
5438 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5439 len = map->stripe_len * nr_data_stripes(map);
5440 free_extent_map(em);
5441 }
5442 return len;
5443}
5444
5445int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5446{
5447 struct extent_map *em;
5448 struct map_lookup *map;
5449 int ret = 0;
5450
5451 em = btrfs_get_chunk_map(fs_info, logical, len);
5452
5453 if(!WARN_ON(IS_ERR(em))) {
5454 map = em->map_lookup;
5455 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5456 ret = 1;
5457 free_extent_map(em);
5458 }
5459 return ret;
5460}
5461
5462static int find_live_mirror(struct btrfs_fs_info *fs_info,
5463 struct map_lookup *map, int first,
5464 int dev_replace_is_ongoing)
5465{
5466 int i;
5467 int num_stripes;
5468 int preferred_mirror;
5469 int tolerance;
5470 struct btrfs_device *srcdev;
5471
5472 ASSERT((map->type &
5473 (BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
5474
5475 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5476 num_stripes = map->sub_stripes;
5477 else
5478 num_stripes = map->num_stripes;
5479
5480 preferred_mirror = first + current->pid % num_stripes;
5481
5482 if (dev_replace_is_ongoing &&
5483 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5484 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5485 srcdev = fs_info->dev_replace.srcdev;
5486 else
5487 srcdev = NULL;
5488
5489
5490
5491
5492
5493
5494 for (tolerance = 0; tolerance < 2; tolerance++) {
5495 if (map->stripes[preferred_mirror].dev->bdev &&
5496 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5497 return preferred_mirror;
5498 for (i = first; i < first + num_stripes; i++) {
5499 if (map->stripes[i].dev->bdev &&
5500 (tolerance || map->stripes[i].dev != srcdev))
5501 return i;
5502 }
5503 }
5504
5505
5506
5507
5508 return preferred_mirror;
5509}
5510
5511static inline int parity_smaller(u64 a, u64 b)
5512{
5513 return a > b;
5514}
5515
5516
5517static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5518{
5519 struct btrfs_bio_stripe s;
5520 int i;
5521 u64 l;
5522 int again = 1;
5523
5524 while (again) {
5525 again = 0;
5526 for (i = 0; i < num_stripes - 1; i++) {
5527 if (parity_smaller(bbio->raid_map[i],
5528 bbio->raid_map[i+1])) {
5529 s = bbio->stripes[i];
5530 l = bbio->raid_map[i];
5531 bbio->stripes[i] = bbio->stripes[i+1];
5532 bbio->raid_map[i] = bbio->raid_map[i+1];
5533 bbio->stripes[i+1] = s;
5534 bbio->raid_map[i+1] = l;
5535
5536 again = 1;
5537 }
5538 }
5539 }
5540}
5541
5542static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5543{
5544 struct btrfs_bio *bbio = kzalloc(
5545
5546 sizeof(struct btrfs_bio) +
5547
5548 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5549
5550 sizeof(int) * (real_stripes) +
5551
5552
5553
5554
5555 sizeof(u64) * (total_stripes),
5556 GFP_NOFS|__GFP_NOFAIL);
5557
5558 atomic_set(&bbio->error, 0);
5559 refcount_set(&bbio->refs, 1);
5560
5561 return bbio;
5562}
5563
5564void btrfs_get_bbio(struct btrfs_bio *bbio)
5565{
5566 WARN_ON(!refcount_read(&bbio->refs));
5567 refcount_inc(&bbio->refs);
5568}
5569
5570void btrfs_put_bbio(struct btrfs_bio *bbio)
5571{
5572 if (!bbio)
5573 return;
5574 if (refcount_dec_and_test(&bbio->refs))
5575 kfree(bbio);
5576}
5577
5578
5579
5580
5581
5582
5583static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5584 u64 logical, u64 length,
5585 struct btrfs_bio **bbio_ret)
5586{
5587 struct extent_map *em;
5588 struct map_lookup *map;
5589 struct btrfs_bio *bbio;
5590 u64 offset;
5591 u64 stripe_nr;
5592 u64 stripe_nr_end;
5593 u64 stripe_end_offset;
5594 u64 stripe_cnt;
5595 u64 stripe_len;
5596 u64 stripe_offset;
5597 u64 num_stripes;
5598 u32 stripe_index;
5599 u32 factor = 0;
5600 u32 sub_stripes = 0;
5601 u64 stripes_per_dev = 0;
5602 u32 remaining_stripes = 0;
5603 u32 last_stripe = 0;
5604 int ret = 0;
5605 int i;
5606
5607
5608 ASSERT(bbio_ret);
5609
5610 em = btrfs_get_chunk_map(fs_info, logical, length);
5611 if (IS_ERR(em))
5612 return PTR_ERR(em);
5613
5614 map = em->map_lookup;
5615
5616 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5617 ret = -EOPNOTSUPP;
5618 goto out;
5619 }
5620
5621 offset = logical - em->start;
5622 length = min_t(u64, em->len - offset, length);
5623
5624 stripe_len = map->stripe_len;
5625
5626
5627
5628
5629 stripe_nr = div64_u64(offset, stripe_len);
5630
5631
5632 stripe_offset = offset - stripe_nr * stripe_len;
5633
5634 stripe_nr_end = round_up(offset + length, map->stripe_len);
5635 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5636 stripe_cnt = stripe_nr_end - stripe_nr;
5637 stripe_end_offset = stripe_nr_end * map->stripe_len -
5638 (offset + length);
5639
5640
5641
5642
5643
5644 num_stripes = 1;
5645 stripe_index = 0;
5646 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5647 BTRFS_BLOCK_GROUP_RAID10)) {
5648 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5649 sub_stripes = 1;
5650 else
5651 sub_stripes = map->sub_stripes;
5652
5653 factor = map->num_stripes / sub_stripes;
5654 num_stripes = min_t(u64, map->num_stripes,
5655 sub_stripes * stripe_cnt);
5656 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5657 stripe_index *= sub_stripes;
5658 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
5659 &remaining_stripes);
5660 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
5661 last_stripe *= sub_stripes;
5662 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
5663 BTRFS_BLOCK_GROUP_DUP)) {
5664 num_stripes = map->num_stripes;
5665 } else {
5666 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5667 &stripe_index);
5668 }
5669
5670 bbio = alloc_btrfs_bio(num_stripes, 0);
5671 if (!bbio) {
5672 ret = -ENOMEM;
5673 goto out;
5674 }
5675
5676 for (i = 0; i < num_stripes; i++) {
5677 bbio->stripes[i].physical =
5678 map->stripes[stripe_index].physical +
5679 stripe_offset + stripe_nr * map->stripe_len;
5680 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
5681
5682 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5683 BTRFS_BLOCK_GROUP_RAID10)) {
5684 bbio->stripes[i].length = stripes_per_dev *
5685 map->stripe_len;
5686
5687 if (i / sub_stripes < remaining_stripes)
5688 bbio->stripes[i].length +=
5689 map->stripe_len;
5690
5691
5692
5693
5694
5695
5696
5697
5698
5699 if (i < sub_stripes)
5700 bbio->stripes[i].length -=
5701 stripe_offset;
5702
5703 if (stripe_index >= last_stripe &&
5704 stripe_index <= (last_stripe +
5705 sub_stripes - 1))
5706 bbio->stripes[i].length -=
5707 stripe_end_offset;
5708
5709 if (i == sub_stripes - 1)
5710 stripe_offset = 0;
5711 } else {
5712 bbio->stripes[i].length = length;
5713 }
5714
5715 stripe_index++;
5716 if (stripe_index == map->num_stripes) {
5717 stripe_index = 0;
5718 stripe_nr++;
5719 }
5720 }
5721
5722 *bbio_ret = bbio;
5723 bbio->map_type = map->type;
5724 bbio->num_stripes = num_stripes;
5725out:
5726 free_extent_map(em);
5727 return ret;
5728}
5729
5730
5731
5732
5733
5734
5735
5736
5737
5738
5739
5740
5741
5742
5743static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
5744 u64 logical, u64 length,
5745 u64 srcdev_devid, int *mirror_num,
5746 u64 *physical)
5747{
5748 struct btrfs_bio *bbio = NULL;
5749 int num_stripes;
5750 int index_srcdev = 0;
5751 int found = 0;
5752 u64 physical_of_found = 0;
5753 int i;
5754 int ret = 0;
5755
5756 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
5757 logical, &length, &bbio, 0, 0);
5758 if (ret) {
5759 ASSERT(bbio == NULL);
5760 return ret;
5761 }
5762
5763 num_stripes = bbio->num_stripes;
5764 if (*mirror_num > num_stripes) {
5765
5766
5767
5768
5769
5770 btrfs_put_bbio(bbio);
5771 return -EIO;
5772 }
5773
5774
5775
5776
5777
5778
5779 for (i = 0; i < num_stripes; i++) {
5780 if (bbio->stripes[i].dev->devid != srcdev_devid)
5781 continue;
5782
5783
5784
5785
5786
5787 if (found &&
5788 physical_of_found <= bbio->stripes[i].physical)
5789 continue;
5790
5791 index_srcdev = i;
5792 found = 1;
5793 physical_of_found = bbio->stripes[i].physical;
5794 }
5795
5796 btrfs_put_bbio(bbio);
5797
5798 ASSERT(found);
5799 if (!found)
5800 return -EIO;
5801
5802 *mirror_num = index_srcdev + 1;
5803 *physical = physical_of_found;
5804 return ret;
5805}
5806
5807static void handle_ops_on_dev_replace(enum btrfs_map_op op,
5808 struct btrfs_bio **bbio_ret,
5809 struct btrfs_dev_replace *dev_replace,
5810 int *num_stripes_ret, int *max_errors_ret)
5811{
5812 struct btrfs_bio *bbio = *bbio_ret;
5813 u64 srcdev_devid = dev_replace->srcdev->devid;
5814 int tgtdev_indexes = 0;
5815 int num_stripes = *num_stripes_ret;
5816 int max_errors = *max_errors_ret;
5817 int i;
5818
5819 if (op == BTRFS_MAP_WRITE) {
5820 int index_where_to_add;
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830
5831
5832
5833 index_where_to_add = num_stripes;
5834 for (i = 0; i < num_stripes; i++) {
5835 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5836
5837 struct btrfs_bio_stripe *new =
5838 bbio->stripes + index_where_to_add;
5839 struct btrfs_bio_stripe *old =
5840 bbio->stripes + i;
5841
5842 new->physical = old->physical;
5843 new->length = old->length;
5844 new->dev = dev_replace->tgtdev;
5845 bbio->tgtdev_map[i] = index_where_to_add;
5846 index_where_to_add++;
5847 max_errors++;
5848 tgtdev_indexes++;
5849 }
5850 }
5851 num_stripes = index_where_to_add;
5852 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
5853 int index_srcdev = 0;
5854 int found = 0;
5855 u64 physical_of_found = 0;
5856
5857
5858
5859
5860
5861
5862
5863
5864 for (i = 0; i < num_stripes; i++) {
5865 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5866
5867
5868
5869
5870
5871 if (found &&
5872 physical_of_found <=
5873 bbio->stripes[i].physical)
5874 continue;
5875 index_srcdev = i;
5876 found = 1;
5877 physical_of_found = bbio->stripes[i].physical;
5878 }
5879 }
5880 if (found) {
5881 struct btrfs_bio_stripe *tgtdev_stripe =
5882 bbio->stripes + num_stripes;
5883
5884 tgtdev_stripe->physical = physical_of_found;
5885 tgtdev_stripe->length =
5886 bbio->stripes[index_srcdev].length;
5887 tgtdev_stripe->dev = dev_replace->tgtdev;
5888 bbio->tgtdev_map[index_srcdev] = num_stripes;
5889
5890 tgtdev_indexes++;
5891 num_stripes++;
5892 }
5893 }
5894
5895 *num_stripes_ret = num_stripes;
5896 *max_errors_ret = max_errors;
5897 bbio->num_tgtdevs = tgtdev_indexes;
5898 *bbio_ret = bbio;
5899}
5900
5901static bool need_full_stripe(enum btrfs_map_op op)
5902{
5903 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
5904}
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916
5917
5918
5919
5920int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
5921 u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
5922{
5923 struct extent_map *em;
5924 struct map_lookup *map;
5925 u64 offset;
5926 u64 stripe_offset;
5927 u64 stripe_nr;
5928 u64 stripe_len;
5929 u64 raid56_full_stripe_start = (u64)-1;
5930 int data_stripes;
5931 int ret = 0;
5932
5933 ASSERT(op != BTRFS_MAP_DISCARD);
5934
5935 em = btrfs_get_chunk_map(fs_info, logical, len);
5936 if (IS_ERR(em))
5937 return PTR_ERR(em);
5938
5939 map = em->map_lookup;
5940
5941 offset = logical - em->start;
5942
5943 stripe_len = map->stripe_len;
5944
5945 stripe_nr = div64_u64(offset, stripe_len);
5946
5947 stripe_offset = stripe_nr * stripe_len;
5948 if (offset < stripe_offset) {
5949 btrfs_crit(fs_info,
5950"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
5951 stripe_offset, offset, em->start, logical, stripe_len);
5952 ret = -EINVAL;
5953 goto out;
5954 }
5955
5956
5957 stripe_offset = offset - stripe_offset;
5958 data_stripes = nr_data_stripes(map);
5959
5960 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
5961 u64 max_len = stripe_len - stripe_offset;
5962
5963
5964
5965
5966 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5967 unsigned long full_stripe_len = stripe_len * data_stripes;
5968 raid56_full_stripe_start = offset;
5969
5970
5971
5972
5973
5974 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
5975 full_stripe_len);
5976 raid56_full_stripe_start *= full_stripe_len;
5977
5978
5979
5980
5981
5982
5983 if (op == BTRFS_MAP_WRITE) {
5984 max_len = stripe_len * data_stripes -
5985 (offset - raid56_full_stripe_start);
5986 }
5987 }
5988 len = min_t(u64, em->len - offset, max_len);
5989 } else {
5990 len = em->len - offset;
5991 }
5992
5993 io_geom->len = len;
5994 io_geom->offset = offset;
5995 io_geom->stripe_len = stripe_len;
5996 io_geom->stripe_nr = stripe_nr;
5997 io_geom->stripe_offset = stripe_offset;
5998 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
5999
6000out:
6001
6002 free_extent_map(em);
6003 return ret;
6004}
6005
6006static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
6007 enum btrfs_map_op op,
6008 u64 logical, u64 *length,
6009 struct btrfs_bio **bbio_ret,
6010 int mirror_num, int need_raid_map)
6011{
6012 struct extent_map *em;
6013 struct map_lookup *map;
6014 u64 offset;
6015 u64 stripe_offset;
6016 u64 stripe_nr;
6017 u64 stripe_len;
6018 u32 stripe_index;
6019 int data_stripes;
6020 int i;
6021 int ret = 0;
6022 int num_stripes;
6023 int max_errors = 0;
6024 int tgtdev_indexes = 0;
6025 struct btrfs_bio *bbio = NULL;
6026 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
6027 int dev_replace_is_ongoing = 0;
6028 int num_alloc_stripes;
6029 int patch_the_first_stripe_for_dev_replace = 0;
6030 u64 physical_to_patch_in_first_stripe = 0;
6031 u64 raid56_full_stripe_start = (u64)-1;
6032 struct btrfs_io_geometry geom;
6033
6034 ASSERT(bbio_ret);
6035
6036 if (op == BTRFS_MAP_DISCARD)
6037 return __btrfs_map_block_for_discard(fs_info, logical,
6038 *length, bbio_ret);
6039
6040 ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
6041 if (ret < 0)
6042 return ret;
6043
6044 em = btrfs_get_chunk_map(fs_info, logical, *length);
6045 ASSERT(em);
6046 map = em->map_lookup;
6047
6048 *length = geom.len;
6049 offset = geom.offset;
6050 stripe_len = geom.stripe_len;
6051 stripe_nr = geom.stripe_nr;
6052 stripe_offset = geom.stripe_offset;
6053 raid56_full_stripe_start = geom.raid56_stripe_offset;
6054 data_stripes = nr_data_stripes(map);
6055
6056 down_read(&dev_replace->rwsem);
6057 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
6058
6059
6060
6061
6062 if (!dev_replace_is_ongoing)
6063 up_read(&dev_replace->rwsem);
6064
6065 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
6066 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
6067 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
6068 dev_replace->srcdev->devid,
6069 &mirror_num,
6070 &physical_to_patch_in_first_stripe);
6071 if (ret)
6072 goto out;
6073 else
6074 patch_the_first_stripe_for_dev_replace = 1;
6075 } else if (mirror_num > map->num_stripes) {
6076 mirror_num = 0;
6077 }
6078
6079 num_stripes = 1;
6080 stripe_index = 0;
6081 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6082 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6083 &stripe_index);
6084 if (!need_full_stripe(op))
6085 mirror_num = 1;
6086 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
6087 if (need_full_stripe(op))
6088 num_stripes = map->num_stripes;
6089 else if (mirror_num)
6090 stripe_index = mirror_num - 1;
6091 else {
6092 stripe_index = find_live_mirror(fs_info, map, 0,
6093 dev_replace_is_ongoing);
6094 mirror_num = stripe_index + 1;
6095 }
6096
6097 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
6098 if (need_full_stripe(op)) {
6099 num_stripes = map->num_stripes;
6100 } else if (mirror_num) {
6101 stripe_index = mirror_num - 1;
6102 } else {
6103 mirror_num = 1;
6104 }
6105
6106 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6107 u32 factor = map->num_stripes / map->sub_stripes;
6108
6109 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
6110 stripe_index *= map->sub_stripes;
6111
6112 if (need_full_stripe(op))
6113 num_stripes = map->sub_stripes;
6114 else if (mirror_num)
6115 stripe_index += mirror_num - 1;
6116 else {
6117 int old_stripe_index = stripe_index;
6118 stripe_index = find_live_mirror(fs_info, map,
6119 stripe_index,
6120 dev_replace_is_ongoing);
6121 mirror_num = stripe_index - old_stripe_index + 1;
6122 }
6123
6124 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6125 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
6126
6127 stripe_nr = div64_u64(raid56_full_stripe_start,
6128 stripe_len * data_stripes);
6129
6130
6131 num_stripes = map->num_stripes;
6132 max_errors = nr_parity_stripes(map);
6133
6134 *length = map->stripe_len;
6135 stripe_index = 0;
6136 stripe_offset = 0;
6137 } else {
6138
6139
6140
6141
6142
6143 stripe_nr = div_u64_rem(stripe_nr,
6144 data_stripes, &stripe_index);
6145 if (mirror_num > 1)
6146 stripe_index = data_stripes + mirror_num - 2;
6147
6148
6149 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
6150 &stripe_index);
6151 if (!need_full_stripe(op) && mirror_num <= 1)
6152 mirror_num = 1;
6153 }
6154 } else {
6155
6156
6157
6158
6159
6160 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
6161 &stripe_index);
6162 mirror_num = stripe_index + 1;
6163 }
6164 if (stripe_index >= map->num_stripes) {
6165 btrfs_crit(fs_info,
6166 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
6167 stripe_index, map->num_stripes);
6168 ret = -EINVAL;
6169 goto out;
6170 }
6171
6172 num_alloc_stripes = num_stripes;
6173 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
6174 if (op == BTRFS_MAP_WRITE)
6175 num_alloc_stripes <<= 1;
6176 if (op == BTRFS_MAP_GET_READ_MIRRORS)
6177 num_alloc_stripes++;
6178 tgtdev_indexes = num_stripes;
6179 }
6180
6181 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
6182 if (!bbio) {
6183 ret = -ENOMEM;
6184 goto out;
6185 }
6186 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
6187 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
6188
6189
6190 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
6191 (need_full_stripe(op) || mirror_num > 1)) {
6192 u64 tmp;
6193 unsigned rot;
6194
6195 bbio->raid_map = (u64 *)((void *)bbio->stripes +
6196 sizeof(struct btrfs_bio_stripe) *
6197 num_alloc_stripes +
6198 sizeof(int) * tgtdev_indexes);
6199
6200
6201 div_u64_rem(stripe_nr, num_stripes, &rot);
6202
6203
6204 tmp = stripe_nr * data_stripes;
6205 for (i = 0; i < data_stripes; i++)
6206 bbio->raid_map[(i+rot) % num_stripes] =
6207 em->start + (tmp + i) * map->stripe_len;
6208
6209 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
6210 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
6211 bbio->raid_map[(i+rot+1) % num_stripes] =
6212 RAID6_Q_STRIPE;
6213 }
6214
6215
6216 for (i = 0; i < num_stripes; i++) {
6217 bbio->stripes[i].physical =
6218 map->stripes[stripe_index].physical +
6219 stripe_offset +
6220 stripe_nr * map->stripe_len;
6221 bbio->stripes[i].dev =
6222 map->stripes[stripe_index].dev;
6223 stripe_index++;
6224 }
6225
6226 if (need_full_stripe(op))
6227 max_errors = btrfs_chunk_max_errors(map);
6228
6229 if (bbio->raid_map)
6230 sort_parity_stripes(bbio, num_stripes);
6231
6232 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
6233 need_full_stripe(op)) {
6234 handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
6235 &max_errors);
6236 }
6237
6238 *bbio_ret = bbio;
6239 bbio->map_type = map->type;
6240 bbio->num_stripes = num_stripes;
6241 bbio->max_errors = max_errors;
6242 bbio->mirror_num = mirror_num;
6243
6244
6245
6246
6247
6248
6249 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6250 WARN_ON(num_stripes > 1);
6251 bbio->stripes[0].dev = dev_replace->tgtdev;
6252 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6253 bbio->mirror_num = map->num_stripes + 1;
6254 }
6255out:
6256 if (dev_replace_is_ongoing) {
6257 lockdep_assert_held(&dev_replace->rwsem);
6258
6259 up_read(&dev_replace->rwsem);
6260 }
6261 free_extent_map(em);
6262 return ret;
6263}
6264
6265int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6266 u64 logical, u64 *length,
6267 struct btrfs_bio **bbio_ret, int mirror_num)
6268{
6269 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6270 mirror_num, 0);
6271}
6272
6273
6274int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6275 u64 logical, u64 *length,
6276 struct btrfs_bio **bbio_ret)
6277{
6278 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6279}
6280
6281int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
6282 u64 physical, u64 **logical, int *naddrs, int *stripe_len)
6283{
6284 struct extent_map *em;
6285 struct map_lookup *map;
6286 u64 *buf;
6287 u64 bytenr;
6288 u64 length;
6289 u64 stripe_nr;
6290 u64 rmap_len;
6291 int i, j, nr = 0;
6292
6293 em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
6294 if (IS_ERR(em))
6295 return -EIO;
6296
6297 map = em->map_lookup;
6298 length = em->len;
6299 rmap_len = map->stripe_len;
6300
6301 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
6302 length = div_u64(length, map->num_stripes / map->sub_stripes);
6303 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
6304 length = div_u64(length, map->num_stripes);
6305 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6306 length = div_u64(length, nr_data_stripes(map));
6307 rmap_len = map->stripe_len * nr_data_stripes(map);
6308 }
6309
6310 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
6311 BUG_ON(!buf);
6312
6313 for (i = 0; i < map->num_stripes; i++) {
6314 if (map->stripes[i].physical > physical ||
6315 map->stripes[i].physical + length <= physical)
6316 continue;
6317
6318 stripe_nr = physical - map->stripes[i].physical;
6319 stripe_nr = div64_u64(stripe_nr, map->stripe_len);
6320
6321 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6322 stripe_nr = stripe_nr * map->num_stripes + i;
6323 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
6324 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6325 stripe_nr = stripe_nr * map->num_stripes + i;
6326 }
6327
6328
6329
6330 bytenr = chunk_start + stripe_nr * rmap_len;
6331 WARN_ON(nr >= map->num_stripes);
6332 for (j = 0; j < nr; j++) {
6333 if (buf[j] == bytenr)
6334 break;
6335 }
6336 if (j == nr) {
6337 WARN_ON(nr >= map->num_stripes);
6338 buf[nr++] = bytenr;
6339 }
6340 }
6341
6342 *logical = buf;
6343 *naddrs = nr;
6344 *stripe_len = rmap_len;
6345
6346 free_extent_map(em);
6347 return 0;
6348}
6349
6350static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6351{
6352 bio->bi_private = bbio->private;
6353 bio->bi_end_io = bbio->end_io;
6354 bio_endio(bio);
6355
6356 btrfs_put_bbio(bbio);
6357}
6358
6359static void btrfs_end_bio(struct bio *bio)
6360{
6361 struct btrfs_bio *bbio = bio->bi_private;
6362 int is_orig_bio = 0;
6363
6364 if (bio->bi_status) {
6365 atomic_inc(&bbio->error);
6366 if (bio->bi_status == BLK_STS_IOERR ||
6367 bio->bi_status == BLK_STS_TARGET) {
6368 unsigned int stripe_index =
6369 btrfs_io_bio(bio)->stripe_index;
6370 struct btrfs_device *dev;
6371
6372 BUG_ON(stripe_index >= bbio->num_stripes);
6373 dev = bbio->stripes[stripe_index].dev;
6374 if (dev->bdev) {
6375 if (bio_op(bio) == REQ_OP_WRITE)
6376 btrfs_dev_stat_inc_and_print(dev,
6377 BTRFS_DEV_STAT_WRITE_ERRS);
6378 else if (!(bio->bi_opf & REQ_RAHEAD))
6379 btrfs_dev_stat_inc_and_print(dev,
6380 BTRFS_DEV_STAT_READ_ERRS);
6381 if (bio->bi_opf & REQ_PREFLUSH)
6382 btrfs_dev_stat_inc_and_print(dev,
6383 BTRFS_DEV_STAT_FLUSH_ERRS);
6384 }
6385 }
6386 }
6387
6388 if (bio == bbio->orig_bio)
6389 is_orig_bio = 1;
6390
6391 btrfs_bio_counter_dec(bbio->fs_info);
6392
6393 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6394 if (!is_orig_bio) {
6395 bio_put(bio);
6396 bio = bbio->orig_bio;
6397 }
6398
6399 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6400
6401
6402
6403 if (atomic_read(&bbio->error) > bbio->max_errors) {
6404 bio->bi_status = BLK_STS_IOERR;
6405 } else {
6406
6407
6408
6409
6410 bio->bi_status = BLK_STS_OK;
6411 }
6412
6413 btrfs_end_bbio(bbio, bio);
6414 } else if (!is_orig_bio) {
6415 bio_put(bio);
6416 }
6417}
6418
6419
6420
6421
6422
6423
6424
6425
6426static noinline void btrfs_schedule_bio(struct btrfs_device *device,
6427 struct bio *bio)
6428{
6429 struct btrfs_fs_info *fs_info = device->fs_info;
6430 int should_queue = 1;
6431 struct btrfs_pending_bios *pending_bios;
6432
6433
6434 if (bio_op(bio) == REQ_OP_READ) {
6435 btrfsic_submit_bio(bio);
6436 return;
6437 }
6438
6439 WARN_ON(bio->bi_next);
6440 bio->bi_next = NULL;
6441
6442 spin_lock(&device->io_lock);
6443 if (op_is_sync(bio->bi_opf))
6444 pending_bios = &device->pending_sync_bios;
6445 else
6446 pending_bios = &device->pending_bios;
6447
6448 if (pending_bios->tail)
6449 pending_bios->tail->bi_next = bio;
6450
6451 pending_bios->tail = bio;
6452 if (!pending_bios->head)
6453 pending_bios->head = bio;
6454 if (device->running_pending)
6455 should_queue = 0;
6456
6457 spin_unlock(&device->io_lock);
6458
6459 if (should_queue)
6460 btrfs_queue_work(fs_info->submit_workers, &device->work);
6461}
6462
6463static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6464 u64 physical, int dev_nr, int async)
6465{
6466 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
6467 struct btrfs_fs_info *fs_info = bbio->fs_info;
6468
6469 bio->bi_private = bbio;
6470 btrfs_io_bio(bio)->stripe_index = dev_nr;
6471 bio->bi_end_io = btrfs_end_bio;
6472 bio->bi_iter.bi_sector = physical >> 9;
6473 btrfs_debug_in_rcu(fs_info,
6474 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6475 bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
6476 (u_long)dev->bdev->bd_dev, rcu_str_deref(dev->name), dev->devid,
6477 bio->bi_iter.bi_size);
6478 bio_set_dev(bio, dev->bdev);
6479
6480 btrfs_bio_counter_inc_noblocked(fs_info);
6481
6482 if (async)
6483 btrfs_schedule_bio(dev, bio);
6484 else
6485 btrfsic_submit_bio(bio);
6486}
6487
6488static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6489{
6490 atomic_inc(&bbio->error);
6491 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6492
6493 WARN_ON(bio != bbio->orig_bio);
6494
6495 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6496 bio->bi_iter.bi_sector = logical >> 9;
6497 if (atomic_read(&bbio->error) > bbio->max_errors)
6498 bio->bi_status = BLK_STS_IOERR;
6499 else
6500 bio->bi_status = BLK_STS_OK;
6501 btrfs_end_bbio(bbio, bio);
6502 }
6503}
6504
6505blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6506 int mirror_num, int async_submit)
6507{
6508 struct btrfs_device *dev;
6509 struct bio *first_bio = bio;
6510 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
6511 u64 length = 0;
6512 u64 map_length;
6513 int ret;
6514 int dev_nr;
6515 int total_devs;
6516 struct btrfs_bio *bbio = NULL;
6517
6518 length = bio->bi_iter.bi_size;
6519 map_length = length;
6520
6521 btrfs_bio_counter_inc_blocked(fs_info);
6522 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6523 &map_length, &bbio, mirror_num, 1);
6524 if (ret) {
6525 btrfs_bio_counter_dec(fs_info);
6526 return errno_to_blk_status(ret);
6527 }
6528
6529 total_devs = bbio->num_stripes;
6530 bbio->orig_bio = first_bio;
6531 bbio->private = first_bio->bi_private;
6532 bbio->end_io = first_bio->bi_end_io;
6533 bbio->fs_info = fs_info;
6534 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6535
6536 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6537 ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
6538
6539
6540 if (bio_op(bio) == REQ_OP_WRITE) {
6541 ret = raid56_parity_write(fs_info, bio, bbio,
6542 map_length);
6543 } else {
6544 ret = raid56_parity_recover(fs_info, bio, bbio,
6545 map_length, mirror_num, 1);
6546 }
6547
6548 btrfs_bio_counter_dec(fs_info);
6549 return errno_to_blk_status(ret);
6550 }
6551
6552 if (map_length < length) {
6553 btrfs_crit(fs_info,
6554 "mapping failed logical %llu bio len %llu len %llu",
6555 logical, length, map_length);
6556 BUG();
6557 }
6558
6559 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6560 dev = bbio->stripes[dev_nr].dev;
6561 if (!dev || !dev->bdev || test_bit(BTRFS_DEV_STATE_MISSING,
6562 &dev->dev_state) ||
6563 (bio_op(first_bio) == REQ_OP_WRITE &&
6564 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6565 bbio_error(bbio, first_bio, logical);
6566 continue;
6567 }
6568
6569 if (dev_nr < total_devs - 1)
6570 bio = btrfs_bio_clone(first_bio);
6571 else
6572 bio = first_bio;
6573
6574 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
6575 dev_nr, async_submit);
6576 }
6577 btrfs_bio_counter_dec(fs_info);
6578 return BLK_STS_OK;
6579}
6580
6581
6582
6583
6584
6585
6586
6587
6588
6589
6590struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
6591 u64 devid, u8 *uuid, u8 *fsid,
6592 bool seed)
6593{
6594 struct btrfs_device *device;
6595
6596 while (fs_devices) {
6597 if (!fsid ||
6598 !memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
6599 list_for_each_entry(device, &fs_devices->devices,
6600 dev_list) {
6601 if (device->devid == devid &&
6602 (!uuid || memcmp(device->uuid, uuid,
6603 BTRFS_UUID_SIZE) == 0))
6604 return device;
6605 }
6606 }
6607 if (seed)
6608 fs_devices = fs_devices->seed;
6609 else
6610 return NULL;
6611 }
6612 return NULL;
6613}
6614
6615static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6616 u64 devid, u8 *dev_uuid)
6617{
6618 struct btrfs_device *device;
6619
6620 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6621 if (IS_ERR(device))
6622 return device;
6623
6624 list_add(&device->dev_list, &fs_devices->devices);
6625 device->fs_devices = fs_devices;
6626 fs_devices->num_devices++;
6627
6628 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6629 fs_devices->missing_devices++;
6630
6631 return device;
6632}
6633
6634
6635
6636
6637
6638
6639
6640
6641
6642
6643
6644
6645
6646
6647struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6648 const u64 *devid,
6649 const u8 *uuid)
6650{
6651 struct btrfs_device *dev;
6652 u64 tmp;
6653
6654 if (WARN_ON(!devid && !fs_info))
6655 return ERR_PTR(-EINVAL);
6656
6657 dev = __alloc_device();
6658 if (IS_ERR(dev))
6659 return dev;
6660
6661 if (devid)
6662 tmp = *devid;
6663 else {
6664 int ret;
6665
6666 ret = find_next_devid(fs_info, &tmp);
6667 if (ret) {
6668 btrfs_free_device(dev);
6669 return ERR_PTR(ret);
6670 }
6671 }
6672 dev->devid = tmp;
6673
6674 if (uuid)
6675 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6676 else
6677 generate_random_uuid(dev->uuid);
6678
6679 btrfs_init_work(&dev->work, btrfs_submit_helper,
6680 pending_bios_fn, NULL, NULL);
6681
6682 return dev;
6683}
6684
6685static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6686 u64 devid, u8 *uuid, bool error)
6687{
6688 if (error)
6689 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6690 devid, uuid);
6691 else
6692 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6693 devid, uuid);
6694}
6695
6696static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
6697{
6698 int index = btrfs_bg_flags_to_raid_index(type);
6699 int ncopies = btrfs_raid_array[index].ncopies;
6700 int data_stripes;
6701
6702 switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
6703 case BTRFS_BLOCK_GROUP_RAID5:
6704 data_stripes = num_stripes - 1;
6705 break;
6706 case BTRFS_BLOCK_GROUP_RAID6:
6707 data_stripes = num_stripes - 2;
6708 break;
6709 default:
6710 data_stripes = num_stripes / ncopies;
6711 break;
6712 }
6713 return div_u64(chunk_len, data_stripes);
6714}
6715
6716static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
6717 struct btrfs_chunk *chunk)
6718{
6719 struct btrfs_fs_info *fs_info = leaf->fs_info;
6720 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
6721 struct map_lookup *map;
6722 struct extent_map *em;
6723 u64 logical;
6724 u64 length;
6725 u64 devid;
6726 u8 uuid[BTRFS_UUID_SIZE];
6727 int num_stripes;
6728 int ret;
6729 int i;
6730
6731 logical = key->offset;
6732 length = btrfs_chunk_length(leaf, chunk);
6733 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6734
6735
6736
6737
6738
6739 if (leaf->start == BTRFS_SUPER_INFO_OFFSET) {
6740 ret = btrfs_check_chunk_valid(leaf, chunk, logical);
6741 if (ret)
6742 return ret;
6743 }
6744
6745 read_lock(&map_tree->lock);
6746 em = lookup_extent_mapping(map_tree, logical, 1);
6747 read_unlock(&map_tree->lock);
6748
6749
6750 if (em && em->start <= logical && em->start + em->len > logical) {
6751 free_extent_map(em);
6752 return 0;
6753 } else if (em) {
6754 free_extent_map(em);
6755 }
6756
6757 em = alloc_extent_map();
6758 if (!em)
6759 return -ENOMEM;
6760 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
6761 if (!map) {
6762 free_extent_map(em);
6763 return -ENOMEM;
6764 }
6765
6766 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
6767 em->map_lookup = map;
6768 em->start = logical;
6769 em->len = length;
6770 em->orig_start = 0;
6771 em->block_start = 0;
6772 em->block_len = em->len;
6773
6774 map->num_stripes = num_stripes;
6775 map->io_width = btrfs_chunk_io_width(leaf, chunk);
6776 map->io_align = btrfs_chunk_io_align(leaf, chunk);
6777 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6778 map->type = btrfs_chunk_type(leaf, chunk);
6779 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6780 map->verified_stripes = 0;
6781 em->orig_block_len = calc_stripe_length(map->type, em->len,
6782 map->num_stripes);
6783 for (i = 0; i < num_stripes; i++) {
6784 map->stripes[i].physical =
6785 btrfs_stripe_offset_nr(leaf, chunk, i);
6786 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
6787 read_extent_buffer(leaf, uuid, (unsigned long)
6788 btrfs_stripe_dev_uuid_nr(chunk, i),
6789 BTRFS_UUID_SIZE);
6790 map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
6791 devid, uuid, NULL, true);
6792 if (!map->stripes[i].dev &&
6793 !btrfs_test_opt(fs_info, DEGRADED)) {
6794 free_extent_map(em);
6795 btrfs_report_missing_device(fs_info, devid, uuid, true);
6796 return -ENOENT;
6797 }
6798 if (!map->stripes[i].dev) {
6799 map->stripes[i].dev =
6800 add_missing_dev(fs_info->fs_devices, devid,
6801 uuid);
6802 if (IS_ERR(map->stripes[i].dev)) {
6803 free_extent_map(em);
6804 btrfs_err(fs_info,
6805 "failed to init missing dev %llu: %ld",
6806 devid, PTR_ERR(map->stripes[i].dev));
6807 return PTR_ERR(map->stripes[i].dev);
6808 }
6809 btrfs_report_missing_device(fs_info, devid, uuid, false);
6810 }
6811 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
6812 &(map->stripes[i].dev->dev_state));
6813
6814 }
6815
6816 write_lock(&map_tree->lock);
6817 ret = add_extent_mapping(map_tree, em, 0);
6818 write_unlock(&map_tree->lock);
6819 if (ret < 0) {
6820 btrfs_err(fs_info,
6821 "failed to add chunk map, start=%llu len=%llu: %d",
6822 em->start, em->len, ret);
6823 }
6824 free_extent_map(em);
6825
6826 return ret;
6827}
6828
6829static void fill_device_from_item(struct extent_buffer *leaf,
6830 struct btrfs_dev_item *dev_item,
6831 struct btrfs_device *device)
6832{
6833 unsigned long ptr;
6834
6835 device->devid = btrfs_device_id(leaf, dev_item);
6836 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
6837 device->total_bytes = device->disk_total_bytes;
6838 device->commit_total_bytes = device->disk_total_bytes;
6839 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
6840 device->commit_bytes_used = device->bytes_used;
6841 device->type = btrfs_device_type(leaf, dev_item);
6842 device->io_align = btrfs_device_io_align(leaf, dev_item);
6843 device->io_width = btrfs_device_io_width(leaf, dev_item);
6844 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
6845 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
6846 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
6847
6848 ptr = btrfs_device_uuid(dev_item);
6849 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
6850}
6851
6852static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
6853 u8 *fsid)
6854{
6855 struct btrfs_fs_devices *fs_devices;
6856 int ret;
6857
6858 lockdep_assert_held(&uuid_mutex);
6859 ASSERT(fsid);
6860
6861 fs_devices = fs_info->fs_devices->seed;
6862 while (fs_devices) {
6863 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
6864 return fs_devices;
6865
6866 fs_devices = fs_devices->seed;
6867 }
6868
6869 fs_devices = find_fsid(fsid, NULL);
6870 if (!fs_devices) {
6871 if (!btrfs_test_opt(fs_info, DEGRADED))
6872 return ERR_PTR(-ENOENT);
6873
6874 fs_devices = alloc_fs_devices(fsid, NULL);
6875 if (IS_ERR(fs_devices))
6876 return fs_devices;
6877
6878 fs_devices->seeding = 1;
6879 fs_devices->opened = 1;
6880 return fs_devices;
6881 }
6882
6883 fs_devices = clone_fs_devices(fs_devices);
6884 if (IS_ERR(fs_devices))
6885 return fs_devices;
6886
6887 ret = open_fs_devices(fs_devices, FMODE_READ, fs_info->bdev_holder);
6888 if (ret) {
6889 free_fs_devices(fs_devices);
6890 fs_devices = ERR_PTR(ret);
6891 goto out;
6892 }
6893
6894 if (!fs_devices->seeding) {
6895 close_fs_devices(fs_devices);
6896 free_fs_devices(fs_devices);
6897 fs_devices = ERR_PTR(-EINVAL);
6898 goto out;
6899 }
6900
6901 fs_devices->seed = fs_info->fs_devices->seed;
6902 fs_info->fs_devices->seed = fs_devices;
6903out:
6904 return fs_devices;
6905}
6906
6907static int read_one_dev(struct extent_buffer *leaf,
6908 struct btrfs_dev_item *dev_item)
6909{
6910 struct btrfs_fs_info *fs_info = leaf->fs_info;
6911 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6912 struct btrfs_device *device;
6913 u64 devid;
6914 int ret;
6915 u8 fs_uuid[BTRFS_FSID_SIZE];
6916 u8 dev_uuid[BTRFS_UUID_SIZE];
6917
6918 devid = btrfs_device_id(leaf, dev_item);
6919 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
6920 BTRFS_UUID_SIZE);
6921 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
6922 BTRFS_FSID_SIZE);
6923
6924 if (memcmp(fs_uuid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE)) {
6925 fs_devices = open_seed_devices(fs_info, fs_uuid);
6926 if (IS_ERR(fs_devices))
6927 return PTR_ERR(fs_devices);
6928 }
6929
6930 device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
6931 fs_uuid, true);
6932 if (!device) {
6933 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6934 btrfs_report_missing_device(fs_info, devid,
6935 dev_uuid, true);
6936 return -ENOENT;
6937 }
6938
6939 device = add_missing_dev(fs_devices, devid, dev_uuid);
6940 if (IS_ERR(device)) {
6941 btrfs_err(fs_info,
6942 "failed to add missing dev %llu: %ld",
6943 devid, PTR_ERR(device));
6944 return PTR_ERR(device);
6945 }
6946 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
6947 } else {
6948 if (!device->bdev) {
6949 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6950 btrfs_report_missing_device(fs_info,
6951 devid, dev_uuid, true);
6952 return -ENOENT;
6953 }
6954 btrfs_report_missing_device(fs_info, devid,
6955 dev_uuid, false);
6956 }
6957
6958 if (!device->bdev &&
6959 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
6960
6961
6962
6963
6964
6965
6966 device->fs_devices->missing_devices++;
6967 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6968 }
6969
6970
6971 if (device->fs_devices != fs_devices) {
6972 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
6973 &device->dev_state));
6974
6975 list_move(&device->dev_list, &fs_devices->devices);
6976 device->fs_devices->num_devices--;
6977 fs_devices->num_devices++;
6978
6979 device->fs_devices->missing_devices--;
6980 fs_devices->missing_devices++;
6981
6982 device->fs_devices = fs_devices;
6983 }
6984 }
6985
6986 if (device->fs_devices != fs_info->fs_devices) {
6987 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
6988 if (device->generation !=
6989 btrfs_device_generation(leaf, dev_item))
6990 return -EINVAL;
6991 }
6992
6993 fill_device_from_item(leaf, dev_item, device);
6994 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
6995 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
6996 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
6997 device->fs_devices->total_rw_bytes += device->total_bytes;
6998 atomic64_add(device->total_bytes - device->bytes_used,
6999 &fs_info->free_chunk_space);
7000 }
7001 ret = 0;
7002 return ret;
7003}
7004
7005int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
7006{
7007 struct btrfs_root *root = fs_info->tree_root;
7008 struct btrfs_super_block *super_copy = fs_info->super_copy;
7009 struct extent_buffer *sb;
7010 struct btrfs_disk_key *disk_key;
7011 struct btrfs_chunk *chunk;
7012 u8 *array_ptr;
7013 unsigned long sb_array_offset;
7014 int ret = 0;
7015 u32 num_stripes;
7016 u32 array_size;
7017 u32 len = 0;
7018 u32 cur_offset;
7019 u64 type;
7020 struct btrfs_key key;
7021
7022 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
7023
7024
7025
7026
7027
7028 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
7029 if (IS_ERR(sb))
7030 return PTR_ERR(sb);
7031 set_extent_buffer_uptodate(sb);
7032 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
7033
7034
7035
7036
7037
7038
7039
7040
7041
7042
7043
7044
7045 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
7046 SetPageUptodate(sb->pages[0]);
7047
7048 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
7049 array_size = btrfs_super_sys_array_size(super_copy);
7050
7051 array_ptr = super_copy->sys_chunk_array;
7052 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
7053 cur_offset = 0;
7054
7055 while (cur_offset < array_size) {
7056 disk_key = (struct btrfs_disk_key *)array_ptr;
7057 len = sizeof(*disk_key);
7058 if (cur_offset + len > array_size)
7059 goto out_short_read;
7060
7061 btrfs_disk_key_to_cpu(&key, disk_key);
7062
7063 array_ptr += len;
7064 sb_array_offset += len;
7065 cur_offset += len;
7066
7067 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
7068 chunk = (struct btrfs_chunk *)sb_array_offset;
7069
7070
7071
7072
7073 len = btrfs_chunk_item_size(1);
7074 if (cur_offset + len > array_size)
7075 goto out_short_read;
7076
7077 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
7078 if (!num_stripes) {
7079 btrfs_err(fs_info,
7080 "invalid number of stripes %u in sys_array at offset %u",
7081 num_stripes, cur_offset);
7082 ret = -EIO;
7083 break;
7084 }
7085
7086 type = btrfs_chunk_type(sb, chunk);
7087 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
7088 btrfs_err(fs_info,
7089 "invalid chunk type %llu in sys_array at offset %u",
7090 type, cur_offset);
7091 ret = -EIO;
7092 break;
7093 }
7094
7095 len = btrfs_chunk_item_size(num_stripes);
7096 if (cur_offset + len > array_size)
7097 goto out_short_read;
7098
7099 ret = read_one_chunk(&key, sb, chunk);
7100 if (ret)
7101 break;
7102 } else {
7103 btrfs_err(fs_info,
7104 "unexpected item type %u in sys_array at offset %u",
7105 (u32)key.type, cur_offset);
7106 ret = -EIO;
7107 break;
7108 }
7109 array_ptr += len;
7110 sb_array_offset += len;
7111 cur_offset += len;
7112 }
7113 clear_extent_buffer_uptodate(sb);
7114 free_extent_buffer_stale(sb);
7115 return ret;
7116
7117out_short_read:
7118 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
7119 len, cur_offset);
7120 clear_extent_buffer_uptodate(sb);
7121 free_extent_buffer_stale(sb);
7122 return -EIO;
7123}
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
7134 struct btrfs_device *failing_dev)
7135{
7136 struct extent_map_tree *map_tree = &fs_info->mapping_tree;
7137 struct extent_map *em;
7138 u64 next_start = 0;
7139 bool ret = true;
7140
7141 read_lock(&map_tree->lock);
7142 em = lookup_extent_mapping(map_tree, 0, (u64)-1);
7143 read_unlock(&map_tree->lock);
7144
7145 if (!em) {
7146 ret = false;
7147 goto out;
7148 }
7149 while (em) {
7150 struct map_lookup *map;
7151 int missing = 0;
7152 int max_tolerated;
7153 int i;
7154
7155 map = em->map_lookup;
7156 max_tolerated =
7157 btrfs_get_num_tolerated_disk_barrier_failures(
7158 map->type);
7159 for (i = 0; i < map->num_stripes; i++) {
7160 struct btrfs_device *dev = map->stripes[i].dev;
7161
7162 if (!dev || !dev->bdev ||
7163 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
7164 dev->last_flush_error)
7165 missing++;
7166 else if (failing_dev && failing_dev == dev)
7167 missing++;
7168 }
7169 if (missing > max_tolerated) {
7170 if (!failing_dev)
7171 btrfs_warn(fs_info,
7172 "chunk %llu missing %d devices, max tolerance is %d for writable mount",
7173 em->start, missing, max_tolerated);
7174 free_extent_map(em);
7175 ret = false;
7176 goto out;
7177 }
7178 next_start = extent_map_end(em);
7179 free_extent_map(em);
7180
7181 read_lock(&map_tree->lock);
7182 em = lookup_extent_mapping(map_tree, next_start,
7183 (u64)(-1) - next_start);
7184 read_unlock(&map_tree->lock);
7185 }
7186out:
7187 return ret;
7188}
7189
7190int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
7191{
7192 struct btrfs_root *root = fs_info->chunk_root;
7193 struct btrfs_path *path;
7194 struct extent_buffer *leaf;
7195 struct btrfs_key key;
7196 struct btrfs_key found_key;
7197 int ret;
7198 int slot;
7199 u64 total_dev = 0;
7200
7201 path = btrfs_alloc_path();
7202 if (!path)
7203 return -ENOMEM;
7204
7205
7206
7207
7208
7209 mutex_lock(&uuid_mutex);
7210 mutex_lock(&fs_info->chunk_mutex);
7211
7212
7213
7214
7215
7216
7217
7218 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7219 key.offset = 0;
7220 key.type = 0;
7221 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7222 if (ret < 0)
7223 goto error;
7224 while (1) {
7225 leaf = path->nodes[0];
7226 slot = path->slots[0];
7227 if (slot >= btrfs_header_nritems(leaf)) {
7228 ret = btrfs_next_leaf(root, path);
7229 if (ret == 0)
7230 continue;
7231 if (ret < 0)
7232 goto error;
7233 break;
7234 }
7235 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7236 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7237 struct btrfs_dev_item *dev_item;
7238 dev_item = btrfs_item_ptr(leaf, slot,
7239 struct btrfs_dev_item);
7240 ret = read_one_dev(leaf, dev_item);
7241 if (ret)
7242 goto error;
7243 total_dev++;
7244 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7245 struct btrfs_chunk *chunk;
7246 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7247 ret = read_one_chunk(&found_key, leaf, chunk);
7248 if (ret)
7249 goto error;
7250 }
7251 path->slots[0]++;
7252 }
7253
7254
7255
7256
7257
7258 if (total_dev != fs_info->fs_devices->total_devices) {
7259 btrfs_err(fs_info,
7260 "super_num_devices %llu mismatch with num_devices %llu found here",
7261 btrfs_super_num_devices(fs_info->super_copy),
7262 total_dev);
7263 ret = -EINVAL;
7264 goto error;
7265 }
7266 if (btrfs_super_total_bytes(fs_info->super_copy) <
7267 fs_info->fs_devices->total_rw_bytes) {
7268 btrfs_err(fs_info,
7269 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7270 btrfs_super_total_bytes(fs_info->super_copy),
7271 fs_info->fs_devices->total_rw_bytes);
7272 ret = -EINVAL;
7273 goto error;
7274 }
7275 ret = 0;
7276error:
7277 mutex_unlock(&fs_info->chunk_mutex);
7278 mutex_unlock(&uuid_mutex);
7279
7280 btrfs_free_path(path);
7281 return ret;
7282}
7283
7284void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7285{
7286 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7287 struct btrfs_device *device;
7288
7289 while (fs_devices) {
7290 mutex_lock(&fs_devices->device_list_mutex);
7291 list_for_each_entry(device, &fs_devices->devices, dev_list)
7292 device->fs_info = fs_info;
7293 mutex_unlock(&fs_devices->device_list_mutex);
7294
7295 fs_devices = fs_devices->seed;
7296 }
7297}
7298
7299static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
7300{
7301 int i;
7302
7303 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7304 btrfs_dev_stat_reset(dev, i);
7305}
7306
7307int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7308{
7309 struct btrfs_key key;
7310 struct btrfs_key found_key;
7311 struct btrfs_root *dev_root = fs_info->dev_root;
7312 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7313 struct extent_buffer *eb;
7314 int slot;
7315 int ret = 0;
7316 struct btrfs_device *device;
7317 struct btrfs_path *path = NULL;
7318 int i;
7319
7320 path = btrfs_alloc_path();
7321 if (!path) {
7322 ret = -ENOMEM;
7323 goto out;
7324 }
7325
7326 mutex_lock(&fs_devices->device_list_mutex);
7327 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7328 int item_size;
7329 struct btrfs_dev_stats_item *ptr;
7330
7331 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7332 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7333 key.offset = device->devid;
7334 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
7335 if (ret) {
7336 __btrfs_reset_dev_stats(device);
7337 device->dev_stats_valid = 1;
7338 btrfs_release_path(path);
7339 continue;
7340 }
7341 slot = path->slots[0];
7342 eb = path->nodes[0];
7343 btrfs_item_key_to_cpu(eb, &found_key, slot);
7344 item_size = btrfs_item_size_nr(eb, slot);
7345
7346 ptr = btrfs_item_ptr(eb, slot,
7347 struct btrfs_dev_stats_item);
7348
7349 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7350 if (item_size >= (1 + i) * sizeof(__le64))
7351 btrfs_dev_stat_set(device, i,
7352 btrfs_dev_stats_value(eb, ptr, i));
7353 else
7354 btrfs_dev_stat_reset(device, i);
7355 }
7356
7357 device->dev_stats_valid = 1;
7358 btrfs_dev_stat_print_on_load(device);
7359 btrfs_release_path(path);
7360 }
7361 mutex_unlock(&fs_devices->device_list_mutex);
7362
7363out:
7364 btrfs_free_path(path);
7365 return ret < 0 ? ret : 0;
7366}
7367
7368static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7369 struct btrfs_device *device)
7370{
7371 struct btrfs_fs_info *fs_info = trans->fs_info;
7372 struct btrfs_root *dev_root = fs_info->dev_root;
7373 struct btrfs_path *path;
7374 struct btrfs_key key;
7375 struct extent_buffer *eb;
7376 struct btrfs_dev_stats_item *ptr;
7377 int ret;
7378 int i;
7379
7380 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7381 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7382 key.offset = device->devid;
7383
7384 path = btrfs_alloc_path();
7385 if (!path)
7386 return -ENOMEM;
7387 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7388 if (ret < 0) {
7389 btrfs_warn_in_rcu(fs_info,
7390 "error %d while searching for dev_stats item for device %s",
7391 ret, rcu_str_deref(device->name));
7392 goto out;
7393 }
7394
7395 if (ret == 0 &&
7396 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7397
7398 ret = btrfs_del_item(trans, dev_root, path);
7399 if (ret != 0) {
7400 btrfs_warn_in_rcu(fs_info,
7401 "delete too small dev_stats item for device %s failed %d",
7402 rcu_str_deref(device->name), ret);
7403 goto out;
7404 }
7405 ret = 1;
7406 }
7407
7408 if (ret == 1) {
7409
7410 btrfs_release_path(path);
7411 ret = btrfs_insert_empty_item(trans, dev_root, path,
7412 &key, sizeof(*ptr));
7413 if (ret < 0) {
7414 btrfs_warn_in_rcu(fs_info,
7415 "insert dev_stats item for device %s failed %d",
7416 rcu_str_deref(device->name), ret);
7417 goto out;
7418 }
7419 }
7420
7421 eb = path->nodes[0];
7422 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7423 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7424 btrfs_set_dev_stats_value(eb, ptr, i,
7425 btrfs_dev_stat_read(device, i));
7426 btrfs_mark_buffer_dirty(eb);
7427
7428out:
7429 btrfs_free_path(path);
7430 return ret;
7431}
7432
7433
7434
7435
7436int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
7437{
7438 struct btrfs_fs_info *fs_info = trans->fs_info;
7439 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7440 struct btrfs_device *device;
7441 int stats_cnt;
7442 int ret = 0;
7443
7444 mutex_lock(&fs_devices->device_list_mutex);
7445 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7446 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7447 if (!device->dev_stats_valid || stats_cnt == 0)
7448 continue;
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462 smp_rmb();
7463
7464 ret = update_dev_stat_item(trans, device);
7465 if (!ret)
7466 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7467 }
7468 mutex_unlock(&fs_devices->device_list_mutex);
7469
7470 return ret;
7471}
7472
7473void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7474{
7475 btrfs_dev_stat_inc(dev, index);
7476 btrfs_dev_stat_print_on_error(dev);
7477}
7478
7479static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7480{
7481 if (!dev->dev_stats_valid)
7482 return;
7483 btrfs_err_rl_in_rcu(dev->fs_info,
7484 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7485 rcu_str_deref(dev->name),
7486 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7487 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7488 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7489 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7490 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7491}
7492
7493static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7494{
7495 int i;
7496
7497 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7498 if (btrfs_dev_stat_read(dev, i) != 0)
7499 break;
7500 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7501 return;
7502
7503 btrfs_info_in_rcu(dev->fs_info,
7504 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7505 rcu_str_deref(dev->name),
7506 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7507 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7508 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7509 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7510 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7511}
7512
7513int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7514 struct btrfs_ioctl_get_dev_stats *stats)
7515{
7516 struct btrfs_device *dev;
7517 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7518 int i;
7519
7520 mutex_lock(&fs_devices->device_list_mutex);
7521 dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
7522 true);
7523 mutex_unlock(&fs_devices->device_list_mutex);
7524
7525 if (!dev) {
7526 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7527 return -ENODEV;
7528 } else if (!dev->dev_stats_valid) {
7529 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7530 return -ENODEV;
7531 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7532 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7533 if (stats->nr_items > i)
7534 stats->values[i] =
7535 btrfs_dev_stat_read_and_reset(dev, i);
7536 else
7537 btrfs_dev_stat_reset(dev, i);
7538 }
7539 } else {
7540 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7541 if (stats->nr_items > i)
7542 stats->values[i] = btrfs_dev_stat_read(dev, i);
7543 }
7544 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7545 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7546 return 0;
7547}
7548
7549void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
7550{
7551 struct buffer_head *bh;
7552 struct btrfs_super_block *disk_super;
7553 int copy_num;
7554
7555 if (!bdev)
7556 return;
7557
7558 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
7559 copy_num++) {
7560
7561 if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
7562 continue;
7563
7564 disk_super = (struct btrfs_super_block *)bh->b_data;
7565
7566 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
7567 set_buffer_dirty(bh);
7568 sync_dirty_buffer(bh);
7569 brelse(bh);
7570 }
7571
7572
7573 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
7574
7575
7576 update_dev_time(device_path);
7577}
7578
7579
7580
7581
7582
7583
7584
7585
7586void btrfs_commit_device_sizes(struct btrfs_transaction *trans)
7587{
7588 struct btrfs_device *curr, *next;
7589
7590 ASSERT(trans->state == TRANS_STATE_COMMIT_DOING);
7591
7592 if (list_empty(&trans->dev_update_list))
7593 return;
7594
7595
7596
7597
7598
7599
7600 mutex_lock(&trans->fs_info->chunk_mutex);
7601 list_for_each_entry_safe(curr, next, &trans->dev_update_list,
7602 post_commit_list) {
7603 list_del_init(&curr->post_commit_list);
7604 curr->commit_total_bytes = curr->disk_total_bytes;
7605 curr->commit_bytes_used = curr->bytes_used;
7606 }
7607 mutex_unlock(&trans->fs_info->chunk_mutex);
7608}
7609
7610void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
7611{
7612 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7613 while (fs_devices) {
7614 fs_devices->fs_info = fs_info;
7615 fs_devices = fs_devices->seed;
7616 }
7617}
7618
7619void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
7620{
7621 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7622 while (fs_devices) {
7623 fs_devices->fs_info = NULL;
7624 fs_devices = fs_devices->seed;
7625 }
7626}
7627
7628
7629
7630
7631int btrfs_bg_type_to_factor(u64 flags)
7632{
7633 const int index = btrfs_bg_flags_to_raid_index(flags);
7634
7635 return btrfs_raid_array[index].ncopies;
7636}
7637
7638
7639
7640static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
7641 u64 chunk_offset, u64 devid,
7642 u64 physical_offset, u64 physical_len)
7643{
7644 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7645 struct extent_map *em;
7646 struct map_lookup *map;
7647 struct btrfs_device *dev;
7648 u64 stripe_len;
7649 bool found = false;
7650 int ret = 0;
7651 int i;
7652
7653 read_lock(&em_tree->lock);
7654 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
7655 read_unlock(&em_tree->lock);
7656
7657 if (!em) {
7658 btrfs_err(fs_info,
7659"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
7660 physical_offset, devid);
7661 ret = -EUCLEAN;
7662 goto out;
7663 }
7664
7665 map = em->map_lookup;
7666 stripe_len = calc_stripe_length(map->type, em->len, map->num_stripes);
7667 if (physical_len != stripe_len) {
7668 btrfs_err(fs_info,
7669"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
7670 physical_offset, devid, em->start, physical_len,
7671 stripe_len);
7672 ret = -EUCLEAN;
7673 goto out;
7674 }
7675
7676 for (i = 0; i < map->num_stripes; i++) {
7677 if (map->stripes[i].dev->devid == devid &&
7678 map->stripes[i].physical == physical_offset) {
7679 found = true;
7680 if (map->verified_stripes >= map->num_stripes) {
7681 btrfs_err(fs_info,
7682 "too many dev extents for chunk %llu found",
7683 em->start);
7684 ret = -EUCLEAN;
7685 goto out;
7686 }
7687 map->verified_stripes++;
7688 break;
7689 }
7690 }
7691 if (!found) {
7692 btrfs_err(fs_info,
7693 "dev extent physical offset %llu devid %llu has no corresponding chunk",
7694 physical_offset, devid);
7695 ret = -EUCLEAN;
7696 }
7697
7698
7699 dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
7700 if (!dev) {
7701 btrfs_err(fs_info, "failed to find devid %llu", devid);
7702 ret = -EUCLEAN;
7703 goto out;
7704 }
7705
7706
7707 if (dev->disk_total_bytes == 0) {
7708 dev = btrfs_find_device(fs_info->fs_devices->seed, devid, NULL,
7709 NULL, false);
7710 if (!dev) {
7711 btrfs_err(fs_info, "failed to find seed devid %llu",
7712 devid);
7713 ret = -EUCLEAN;
7714 goto out;
7715 }
7716 }
7717
7718 if (physical_offset + physical_len > dev->disk_total_bytes) {
7719 btrfs_err(fs_info,
7720"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
7721 devid, physical_offset, physical_len,
7722 dev->disk_total_bytes);
7723 ret = -EUCLEAN;
7724 goto out;
7725 }
7726out:
7727 free_extent_map(em);
7728 return ret;
7729}
7730
7731static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
7732{
7733 struct extent_map_tree *em_tree = &fs_info->mapping_tree;
7734 struct extent_map *em;
7735 struct rb_node *node;
7736 int ret = 0;
7737
7738 read_lock(&em_tree->lock);
7739 for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
7740 em = rb_entry(node, struct extent_map, rb_node);
7741 if (em->map_lookup->num_stripes !=
7742 em->map_lookup->verified_stripes) {
7743 btrfs_err(fs_info,
7744 "chunk %llu has missing dev extent, have %d expect %d",
7745 em->start, em->map_lookup->verified_stripes,
7746 em->map_lookup->num_stripes);
7747 ret = -EUCLEAN;
7748 goto out;
7749 }
7750 }
7751out:
7752 read_unlock(&em_tree->lock);
7753 return ret;
7754}
7755
7756
7757
7758
7759
7760
7761
7762
7763int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
7764{
7765 struct btrfs_path *path;
7766 struct btrfs_root *root = fs_info->dev_root;
7767 struct btrfs_key key;
7768 u64 prev_devid = 0;
7769 u64 prev_dev_ext_end = 0;
7770 int ret = 0;
7771
7772 key.objectid = 1;
7773 key.type = BTRFS_DEV_EXTENT_KEY;
7774 key.offset = 0;
7775
7776 path = btrfs_alloc_path();
7777 if (!path)
7778 return -ENOMEM;
7779
7780 path->reada = READA_FORWARD;
7781 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7782 if (ret < 0)
7783 goto out;
7784
7785 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
7786 ret = btrfs_next_item(root, path);
7787 if (ret < 0)
7788 goto out;
7789
7790 if (ret > 0) {
7791 ret = -EUCLEAN;
7792 goto out;
7793 }
7794 }
7795 while (1) {
7796 struct extent_buffer *leaf = path->nodes[0];
7797 struct btrfs_dev_extent *dext;
7798 int slot = path->slots[0];
7799 u64 chunk_offset;
7800 u64 physical_offset;
7801 u64 physical_len;
7802 u64 devid;
7803
7804 btrfs_item_key_to_cpu(leaf, &key, slot);
7805 if (key.type != BTRFS_DEV_EXTENT_KEY)
7806 break;
7807 devid = key.objectid;
7808 physical_offset = key.offset;
7809
7810 dext = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent);
7811 chunk_offset = btrfs_dev_extent_chunk_offset(leaf, dext);
7812 physical_len = btrfs_dev_extent_length(leaf, dext);
7813
7814
7815 if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
7816 btrfs_err(fs_info,
7817"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
7818 devid, physical_offset, prev_dev_ext_end);
7819 ret = -EUCLEAN;
7820 goto out;
7821 }
7822
7823 ret = verify_one_dev_extent(fs_info, chunk_offset, devid,
7824 physical_offset, physical_len);
7825 if (ret < 0)
7826 goto out;
7827 prev_devid = devid;
7828 prev_dev_ext_end = physical_offset + physical_len;
7829
7830 ret = btrfs_next_item(root, path);
7831 if (ret < 0)
7832 goto out;
7833 if (ret > 0) {
7834 ret = 0;
7835 break;
7836 }
7837 }
7838
7839
7840 ret = verify_chunk_dev_extent_mapping(fs_info);
7841out:
7842 btrfs_free_path(path);
7843 return ret;
7844}
7845
7846
7847
7848
7849
7850bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
7851{
7852 struct btrfs_swapfile_pin *sp;
7853 struct rb_node *node;
7854
7855 spin_lock(&fs_info->swapfile_pins_lock);
7856 node = fs_info->swapfile_pins.rb_node;
7857 while (node) {
7858 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
7859 if (ptr < sp->ptr)
7860 node = node->rb_left;
7861 else if (ptr > sp->ptr)
7862 node = node->rb_right;
7863 else
7864 break;
7865 }
7866 spin_unlock(&fs_info->swapfile_pins_lock);
7867 return node != NULL;
7868}
7869