1
2
3
4
5
6#include <linux/sched.h>
7#include <linux/bio.h>
8#include <linux/slab.h>
9#include <linux/buffer_head.h>
10#include <linux/blkdev.h>
11#include <linux/iocontext.h>
12#include <linux/capability.h>
13#include <linux/ratelimit.h>
14#include <linux/kthread.h>
15#include <linux/raid/pq.h>
16#include <linux/semaphore.h>
17#include <linux/uuid.h>
18#include <linux/list_sort.h>
19#include <asm/div64.h>
20#include "ctree.h"
21#include "extent_map.h"
22#include "disk-io.h"
23#include "transaction.h"
24#include "print-tree.h"
25#include "volumes.h"
26#include "raid56.h"
27#include "async-thread.h"
28#include "check-integrity.h"
29#include "rcu-string.h"
30#include "math.h"
31#include "dev-replace.h"
32#include "sysfs.h"
33
34const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
35 [BTRFS_RAID_RAID10] = {
36 .sub_stripes = 2,
37 .dev_stripes = 1,
38 .devs_max = 0,
39 .devs_min = 4,
40 .tolerated_failures = 1,
41 .devs_increment = 2,
42 .ncopies = 2,
43 },
44 [BTRFS_RAID_RAID1] = {
45 .sub_stripes = 1,
46 .dev_stripes = 1,
47 .devs_max = 2,
48 .devs_min = 2,
49 .tolerated_failures = 1,
50 .devs_increment = 2,
51 .ncopies = 2,
52 },
53 [BTRFS_RAID_DUP] = {
54 .sub_stripes = 1,
55 .dev_stripes = 2,
56 .devs_max = 1,
57 .devs_min = 1,
58 .tolerated_failures = 0,
59 .devs_increment = 1,
60 .ncopies = 2,
61 },
62 [BTRFS_RAID_RAID0] = {
63 .sub_stripes = 1,
64 .dev_stripes = 1,
65 .devs_max = 0,
66 .devs_min = 2,
67 .tolerated_failures = 0,
68 .devs_increment = 1,
69 .ncopies = 1,
70 },
71 [BTRFS_RAID_SINGLE] = {
72 .sub_stripes = 1,
73 .dev_stripes = 1,
74 .devs_max = 1,
75 .devs_min = 1,
76 .tolerated_failures = 0,
77 .devs_increment = 1,
78 .ncopies = 1,
79 },
80 [BTRFS_RAID_RAID5] = {
81 .sub_stripes = 1,
82 .dev_stripes = 1,
83 .devs_max = 0,
84 .devs_min = 2,
85 .tolerated_failures = 1,
86 .devs_increment = 1,
87 .ncopies = 2,
88 },
89 [BTRFS_RAID_RAID6] = {
90 .sub_stripes = 1,
91 .dev_stripes = 1,
92 .devs_max = 0,
93 .devs_min = 3,
94 .tolerated_failures = 2,
95 .devs_increment = 1,
96 .ncopies = 3,
97 },
98};
99
100const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
101 [BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
102 [BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
103 [BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
104 [BTRFS_RAID_RAID0] = BTRFS_BLOCK_GROUP_RAID0,
105 [BTRFS_RAID_SINGLE] = 0,
106 [BTRFS_RAID_RAID5] = BTRFS_BLOCK_GROUP_RAID5,
107 [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6,
108};
109
110
111
112
113
114
115const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
116 [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
117 [BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
118 [BTRFS_RAID_DUP] = 0,
119 [BTRFS_RAID_RAID0] = 0,
120 [BTRFS_RAID_SINGLE] = 0,
121 [BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
122 [BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
123};
124
125static int init_first_rw_device(struct btrfs_trans_handle *trans,
126 struct btrfs_fs_info *fs_info);
127static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
128static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
129static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
130static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
131static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
132 enum btrfs_map_op op,
133 u64 logical, u64 *length,
134 struct btrfs_bio **bbio_ret,
135 int mirror_num, int need_raid_map);
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202DEFINE_MUTEX(uuid_mutex);
203static LIST_HEAD(fs_uuids);
204struct list_head *btrfs_get_fs_uuids(void)
205{
206 return &fs_uuids;
207}
208
209
210
211
212
213
214
215
216
217static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
218{
219 struct btrfs_fs_devices *fs_devs;
220
221 fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
222 if (!fs_devs)
223 return ERR_PTR(-ENOMEM);
224
225 mutex_init(&fs_devs->device_list_mutex);
226
227 INIT_LIST_HEAD(&fs_devs->devices);
228 INIT_LIST_HEAD(&fs_devs->resized_devices);
229 INIT_LIST_HEAD(&fs_devs->alloc_list);
230 INIT_LIST_HEAD(&fs_devs->list);
231 if (fsid)
232 memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
233
234 return fs_devs;
235}
236
237static void free_device(struct btrfs_device *device)
238{
239 rcu_string_free(device->name);
240 bio_put(device->flush_bio);
241 kfree(device);
242}
243
244static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
245{
246 struct btrfs_device *device;
247 WARN_ON(fs_devices->opened);
248 while (!list_empty(&fs_devices->devices)) {
249 device = list_entry(fs_devices->devices.next,
250 struct btrfs_device, dev_list);
251 list_del(&device->dev_list);
252 free_device(device);
253 }
254 kfree(fs_devices);
255}
256
257static void btrfs_kobject_uevent(struct block_device *bdev,
258 enum kobject_action action)
259{
260 int ret;
261
262 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
263 if (ret)
264 pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
265 action,
266 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
267 &disk_to_dev(bdev->bd_disk)->kobj);
268}
269
270void __exit btrfs_cleanup_fs_uuids(void)
271{
272 struct btrfs_fs_devices *fs_devices;
273
274 while (!list_empty(&fs_uuids)) {
275 fs_devices = list_entry(fs_uuids.next,
276 struct btrfs_fs_devices, list);
277 list_del(&fs_devices->list);
278 free_fs_devices(fs_devices);
279 }
280}
281
282
283
284
285
286
287static struct btrfs_device *__alloc_device(void)
288{
289 struct btrfs_device *dev;
290
291 dev = kzalloc(sizeof(*dev), GFP_KERNEL);
292 if (!dev)
293 return ERR_PTR(-ENOMEM);
294
295
296
297
298
299 dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
300 if (!dev->flush_bio) {
301 kfree(dev);
302 return ERR_PTR(-ENOMEM);
303 }
304
305 INIT_LIST_HEAD(&dev->dev_list);
306 INIT_LIST_HEAD(&dev->dev_alloc_list);
307 INIT_LIST_HEAD(&dev->resized_list);
308
309 spin_lock_init(&dev->io_lock);
310
311 atomic_set(&dev->reada_in_flight, 0);
312 atomic_set(&dev->dev_stats_ccnt, 0);
313 btrfs_device_data_ordered_init(dev);
314 INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
315 INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
316
317 return dev;
318}
319
320
321
322
323
324
325
326
327static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
328 u64 devid, const u8 *uuid)
329{
330 struct list_head *head = &fs_devices->devices;
331 struct btrfs_device *dev;
332
333 list_for_each_entry(dev, head, dev_list) {
334 if (dev->devid == devid &&
335 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
336 return dev;
337 }
338 }
339 return NULL;
340}
341
342static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
343{
344 struct btrfs_fs_devices *fs_devices;
345
346 list_for_each_entry(fs_devices, &fs_uuids, list) {
347 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
348 return fs_devices;
349 }
350 return NULL;
351}
352
353static int
354btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
355 int flush, struct block_device **bdev,
356 struct buffer_head **bh)
357{
358 int ret;
359
360 *bdev = blkdev_get_by_path(device_path, flags, holder);
361
362 if (IS_ERR(*bdev)) {
363 ret = PTR_ERR(*bdev);
364 goto error;
365 }
366
367 if (flush)
368 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
369 ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
370 if (ret) {
371 blkdev_put(*bdev, flags);
372 goto error;
373 }
374 invalidate_bdev(*bdev);
375 *bh = btrfs_read_dev_super(*bdev);
376 if (IS_ERR(*bh)) {
377 ret = PTR_ERR(*bh);
378 blkdev_put(*bdev, flags);
379 goto error;
380 }
381
382 return 0;
383
384error:
385 *bdev = NULL;
386 *bh = NULL;
387 return ret;
388}
389
390static void requeue_list(struct btrfs_pending_bios *pending_bios,
391 struct bio *head, struct bio *tail)
392{
393
394 struct bio *old_head;
395
396 old_head = pending_bios->head;
397 pending_bios->head = head;
398 if (pending_bios->tail)
399 tail->bi_next = old_head;
400 else
401 pending_bios->tail = tail;
402}
403
404
405
406
407
408
409
410
411
412
413
414
415static noinline void run_scheduled_bios(struct btrfs_device *device)
416{
417 struct btrfs_fs_info *fs_info = device->fs_info;
418 struct bio *pending;
419 struct backing_dev_info *bdi;
420 struct btrfs_pending_bios *pending_bios;
421 struct bio *tail;
422 struct bio *cur;
423 int again = 0;
424 unsigned long num_run;
425 unsigned long batch_run = 0;
426 unsigned long last_waited = 0;
427 int force_reg = 0;
428 int sync_pending = 0;
429 struct blk_plug plug;
430
431
432
433
434
435
436
437 blk_start_plug(&plug);
438
439 bdi = device->bdev->bd_bdi;
440
441loop:
442 spin_lock(&device->io_lock);
443
444loop_lock:
445 num_run = 0;
446
447
448
449
450
451
452 if (!force_reg && device->pending_sync_bios.head) {
453 pending_bios = &device->pending_sync_bios;
454 force_reg = 1;
455 } else {
456 pending_bios = &device->pending_bios;
457 force_reg = 0;
458 }
459
460 pending = pending_bios->head;
461 tail = pending_bios->tail;
462 WARN_ON(pending && !tail);
463
464
465
466
467
468
469
470
471
472 if (device->pending_sync_bios.head == NULL &&
473 device->pending_bios.head == NULL) {
474 again = 0;
475 device->running_pending = 0;
476 } else {
477 again = 1;
478 device->running_pending = 1;
479 }
480
481 pending_bios->head = NULL;
482 pending_bios->tail = NULL;
483
484 spin_unlock(&device->io_lock);
485
486 while (pending) {
487
488 rmb();
489
490
491
492 if ((num_run > 32 &&
493 pending_bios != &device->pending_sync_bios &&
494 device->pending_sync_bios.head) ||
495 (num_run > 64 && pending_bios == &device->pending_sync_bios &&
496 device->pending_bios.head)) {
497 spin_lock(&device->io_lock);
498 requeue_list(pending_bios, pending, tail);
499 goto loop_lock;
500 }
501
502 cur = pending;
503 pending = pending->bi_next;
504 cur->bi_next = NULL;
505
506 BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
507
508
509
510
511
512
513
514
515
516 if (pending_bios == &device->pending_sync_bios) {
517 sync_pending = 1;
518 } else if (sync_pending) {
519 blk_finish_plug(&plug);
520 blk_start_plug(&plug);
521 sync_pending = 0;
522 }
523
524 btrfsic_submit_bio(cur);
525 num_run++;
526 batch_run++;
527
528 cond_resched();
529
530
531
532
533
534
535 if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
536 fs_info->fs_devices->open_devices > 1) {
537 struct io_context *ioc;
538
539 ioc = current->io_context;
540
541
542
543
544
545
546
547
548
549
550 if (ioc && ioc->nr_batch_requests > 0 &&
551 time_before(jiffies, ioc->last_waited + HZ/50UL) &&
552 (last_waited == 0 ||
553 ioc->last_waited == last_waited)) {
554
555
556
557
558
559
560 last_waited = ioc->last_waited;
561 cond_resched();
562 continue;
563 }
564 spin_lock(&device->io_lock);
565 requeue_list(pending_bios, pending, tail);
566 device->running_pending = 1;
567
568 spin_unlock(&device->io_lock);
569 btrfs_queue_work(fs_info->submit_workers,
570 &device->work);
571 goto done;
572 }
573 }
574
575 cond_resched();
576 if (again)
577 goto loop;
578
579 spin_lock(&device->io_lock);
580 if (device->pending_bios.head || device->pending_sync_bios.head)
581 goto loop_lock;
582 spin_unlock(&device->io_lock);
583
584done:
585 blk_finish_plug(&plug);
586}
587
588static void pending_bios_fn(struct btrfs_work *work)
589{
590 struct btrfs_device *device;
591
592 device = container_of(work, struct btrfs_device, work);
593 run_scheduled_bios(device);
594}
595
596
597
598
599
600
601
602
603
604static void btrfs_free_stale_devices(const char *path,
605 struct btrfs_device *skip_dev)
606{
607 struct btrfs_fs_devices *fs_devs, *tmp_fs_devs;
608 struct btrfs_device *dev, *tmp_dev;
609
610 list_for_each_entry_safe(fs_devs, tmp_fs_devs, &fs_uuids, list) {
611
612 if (fs_devs->opened)
613 continue;
614
615 list_for_each_entry_safe(dev, tmp_dev,
616 &fs_devs->devices, dev_list) {
617 int not_found = 0;
618
619 if (skip_dev && skip_dev == dev)
620 continue;
621 if (path && !dev->name)
622 continue;
623
624 rcu_read_lock();
625 if (path)
626 not_found = strcmp(rcu_str_deref(dev->name),
627 path);
628 rcu_read_unlock();
629 if (not_found)
630 continue;
631
632
633 if (fs_devs->num_devices == 1) {
634 btrfs_sysfs_remove_fsid(fs_devs);
635 list_del(&fs_devs->list);
636 free_fs_devices(fs_devs);
637 break;
638 } else {
639 fs_devs->num_devices--;
640 list_del(&dev->dev_list);
641 free_device(dev);
642 }
643 }
644 }
645}
646
647static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
648 struct btrfs_device *device, fmode_t flags,
649 void *holder)
650{
651 struct request_queue *q;
652 struct block_device *bdev;
653 struct buffer_head *bh;
654 struct btrfs_super_block *disk_super;
655 u64 devid;
656 int ret;
657
658 if (device->bdev)
659 return -EINVAL;
660 if (!device->name)
661 return -EINVAL;
662
663 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
664 &bdev, &bh);
665 if (ret)
666 return ret;
667
668 disk_super = (struct btrfs_super_block *)bh->b_data;
669 devid = btrfs_stack_device_id(&disk_super->dev_item);
670 if (devid != device->devid)
671 goto error_brelse;
672
673 if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
674 goto error_brelse;
675
676 device->generation = btrfs_super_generation(disk_super);
677
678 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
679 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
680 fs_devices->seeding = 1;
681 } else {
682 if (bdev_read_only(bdev))
683 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
684 else
685 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
686 }
687
688 q = bdev_get_queue(bdev);
689 if (!blk_queue_nonrot(q))
690 fs_devices->rotating = 1;
691
692 device->bdev = bdev;
693 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
694 device->mode = flags;
695
696 fs_devices->open_devices++;
697 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
698 device->devid != BTRFS_DEV_REPLACE_DEVID) {
699 fs_devices->rw_devices++;
700 list_add_tail(&device->dev_alloc_list, &fs_devices->alloc_list);
701 }
702 brelse(bh);
703
704 return 0;
705
706error_brelse:
707 brelse(bh);
708 blkdev_put(bdev, flags);
709
710 return -EINVAL;
711}
712
713
714
715
716
717
718
719
720static noinline struct btrfs_device *device_list_add(const char *path,
721 struct btrfs_super_block *disk_super)
722{
723 struct btrfs_device *device;
724 struct btrfs_fs_devices *fs_devices;
725 struct rcu_string *name;
726 u64 found_transid = btrfs_super_generation(disk_super);
727 u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
728
729 fs_devices = find_fsid(disk_super->fsid);
730 if (!fs_devices) {
731 fs_devices = alloc_fs_devices(disk_super->fsid);
732 if (IS_ERR(fs_devices))
733 return ERR_CAST(fs_devices);
734
735 list_add(&fs_devices->list, &fs_uuids);
736
737 device = NULL;
738 } else {
739 device = find_device(fs_devices, devid,
740 disk_super->dev_item.uuid);
741 }
742
743 if (!device) {
744 if (fs_devices->opened)
745 return ERR_PTR(-EBUSY);
746
747 device = btrfs_alloc_device(NULL, &devid,
748 disk_super->dev_item.uuid);
749 if (IS_ERR(device)) {
750
751 return device;
752 }
753
754 name = rcu_string_strdup(path, GFP_NOFS);
755 if (!name) {
756 free_device(device);
757 return ERR_PTR(-ENOMEM);
758 }
759 rcu_assign_pointer(device->name, name);
760
761 mutex_lock(&fs_devices->device_list_mutex);
762 list_add_rcu(&device->dev_list, &fs_devices->devices);
763 fs_devices->num_devices++;
764 mutex_unlock(&fs_devices->device_list_mutex);
765
766 device->fs_devices = fs_devices;
767 btrfs_free_stale_devices(path, device);
768
769 if (disk_super->label[0])
770 pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
771 disk_super->label, devid, found_transid, path);
772 else
773 pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n",
774 disk_super->fsid, devid, found_transid, path);
775
776 } else if (!device->name || strcmp(device->name->str, path)) {
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803 if (!fs_devices->opened && found_transid < device->generation) {
804
805
806
807
808
809
810
811 return ERR_PTR(-EEXIST);
812 }
813
814 name = rcu_string_strdup(path, GFP_NOFS);
815 if (!name)
816 return ERR_PTR(-ENOMEM);
817 rcu_string_free(device->name);
818 rcu_assign_pointer(device->name, name);
819 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
820 fs_devices->missing_devices--;
821 clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
822 }
823 }
824
825
826
827
828
829
830
831 if (!fs_devices->opened)
832 device->generation = found_transid;
833
834 fs_devices->total_devices = btrfs_super_num_devices(disk_super);
835
836 return device;
837}
838
839static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
840{
841 struct btrfs_fs_devices *fs_devices;
842 struct btrfs_device *device;
843 struct btrfs_device *orig_dev;
844
845 fs_devices = alloc_fs_devices(orig->fsid);
846 if (IS_ERR(fs_devices))
847 return fs_devices;
848
849 mutex_lock(&orig->device_list_mutex);
850 fs_devices->total_devices = orig->total_devices;
851
852
853 list_for_each_entry(orig_dev, &orig->devices, dev_list) {
854 struct rcu_string *name;
855
856 device = btrfs_alloc_device(NULL, &orig_dev->devid,
857 orig_dev->uuid);
858 if (IS_ERR(device))
859 goto error;
860
861
862
863
864
865 if (orig_dev->name) {
866 name = rcu_string_strdup(orig_dev->name->str,
867 GFP_KERNEL);
868 if (!name) {
869 free_device(device);
870 goto error;
871 }
872 rcu_assign_pointer(device->name, name);
873 }
874
875 list_add(&device->dev_list, &fs_devices->devices);
876 device->fs_devices = fs_devices;
877 fs_devices->num_devices++;
878 }
879 mutex_unlock(&orig->device_list_mutex);
880 return fs_devices;
881error:
882 mutex_unlock(&orig->device_list_mutex);
883 free_fs_devices(fs_devices);
884 return ERR_PTR(-ENOMEM);
885}
886
887
888
889
890
891void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
892{
893 struct btrfs_device *device, *next;
894 struct btrfs_device *latest_dev = NULL;
895
896 mutex_lock(&uuid_mutex);
897again:
898
899 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
900 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
901 &device->dev_state)) {
902 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
903 &device->dev_state) &&
904 (!latest_dev ||
905 device->generation > latest_dev->generation)) {
906 latest_dev = device;
907 }
908 continue;
909 }
910
911 if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
912
913
914
915
916
917
918
919
920
921
922 if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
923 &device->dev_state)) {
924 continue;
925 }
926 }
927 if (device->bdev) {
928 blkdev_put(device->bdev, device->mode);
929 device->bdev = NULL;
930 fs_devices->open_devices--;
931 }
932 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
933 list_del_init(&device->dev_alloc_list);
934 clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
935 if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
936 &device->dev_state))
937 fs_devices->rw_devices--;
938 }
939 list_del_init(&device->dev_list);
940 fs_devices->num_devices--;
941 free_device(device);
942 }
943
944 if (fs_devices->seed) {
945 fs_devices = fs_devices->seed;
946 goto again;
947 }
948
949 fs_devices->latest_bdev = latest_dev->bdev;
950
951 mutex_unlock(&uuid_mutex);
952}
953
954static void free_device_rcu(struct rcu_head *head)
955{
956 struct btrfs_device *device;
957
958 device = container_of(head, struct btrfs_device, rcu);
959 free_device(device);
960}
961
962static void btrfs_close_bdev(struct btrfs_device *device)
963{
964 if (!device->bdev)
965 return;
966
967 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
968 sync_blockdev(device->bdev);
969 invalidate_bdev(device->bdev);
970 }
971
972 blkdev_put(device->bdev, device->mode);
973}
974
975static void btrfs_prepare_close_one_device(struct btrfs_device *device)
976{
977 struct btrfs_fs_devices *fs_devices = device->fs_devices;
978 struct btrfs_device *new_device;
979 struct rcu_string *name;
980
981 if (device->bdev)
982 fs_devices->open_devices--;
983
984 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
985 device->devid != BTRFS_DEV_REPLACE_DEVID) {
986 list_del_init(&device->dev_alloc_list);
987 fs_devices->rw_devices--;
988 }
989
990 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
991 fs_devices->missing_devices--;
992
993 new_device = btrfs_alloc_device(NULL, &device->devid,
994 device->uuid);
995 BUG_ON(IS_ERR(new_device));
996
997
998 if (device->name) {
999 name = rcu_string_strdup(device->name->str, GFP_NOFS);
1000 BUG_ON(!name);
1001 rcu_assign_pointer(new_device->name, name);
1002 }
1003
1004 list_replace_rcu(&device->dev_list, &new_device->dev_list);
1005 new_device->fs_devices = device->fs_devices;
1006}
1007
1008static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1009{
1010 struct btrfs_device *device, *tmp;
1011 struct list_head pending_put;
1012
1013 INIT_LIST_HEAD(&pending_put);
1014
1015 if (--fs_devices->opened > 0)
1016 return 0;
1017
1018 mutex_lock(&fs_devices->device_list_mutex);
1019 list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
1020 btrfs_prepare_close_one_device(device);
1021 list_add(&device->dev_list, &pending_put);
1022 }
1023 mutex_unlock(&fs_devices->device_list_mutex);
1024
1025
1026
1027
1028
1029
1030
1031 while (!list_empty(&pending_put)) {
1032 device = list_first_entry(&pending_put,
1033 struct btrfs_device, dev_list);
1034 list_del(&device->dev_list);
1035 btrfs_close_bdev(device);
1036 call_rcu(&device->rcu, free_device_rcu);
1037 }
1038
1039 WARN_ON(fs_devices->open_devices);
1040 WARN_ON(fs_devices->rw_devices);
1041 fs_devices->opened = 0;
1042 fs_devices->seeding = 0;
1043
1044 return 0;
1045}
1046
1047int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
1048{
1049 struct btrfs_fs_devices *seed_devices = NULL;
1050 int ret;
1051
1052 mutex_lock(&uuid_mutex);
1053 ret = __btrfs_close_devices(fs_devices);
1054 if (!fs_devices->opened) {
1055 seed_devices = fs_devices->seed;
1056 fs_devices->seed = NULL;
1057 }
1058 mutex_unlock(&uuid_mutex);
1059
1060 while (seed_devices) {
1061 fs_devices = seed_devices;
1062 seed_devices = fs_devices->seed;
1063 __btrfs_close_devices(fs_devices);
1064 free_fs_devices(fs_devices);
1065 }
1066 return ret;
1067}
1068
1069static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1070 fmode_t flags, void *holder)
1071{
1072 struct list_head *head = &fs_devices->devices;
1073 struct btrfs_device *device;
1074 struct btrfs_device *latest_dev = NULL;
1075 int ret = 0;
1076
1077 flags |= FMODE_EXCL;
1078
1079 list_for_each_entry(device, head, dev_list) {
1080
1081 if (btrfs_open_one_device(fs_devices, device, flags, holder))
1082 continue;
1083
1084 if (!latest_dev ||
1085 device->generation > latest_dev->generation)
1086 latest_dev = device;
1087 }
1088 if (fs_devices->open_devices == 0) {
1089 ret = -EINVAL;
1090 goto out;
1091 }
1092 fs_devices->opened = 1;
1093 fs_devices->latest_bdev = latest_dev->bdev;
1094 fs_devices->total_rw_bytes = 0;
1095out:
1096 return ret;
1097}
1098
1099static int devid_cmp(void *priv, struct list_head *a, struct list_head *b)
1100{
1101 struct btrfs_device *dev1, *dev2;
1102
1103 dev1 = list_entry(a, struct btrfs_device, dev_list);
1104 dev2 = list_entry(b, struct btrfs_device, dev_list);
1105
1106 if (dev1->devid < dev2->devid)
1107 return -1;
1108 else if (dev1->devid > dev2->devid)
1109 return 1;
1110 return 0;
1111}
1112
1113int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
1114 fmode_t flags, void *holder)
1115{
1116 int ret;
1117
1118 mutex_lock(&uuid_mutex);
1119 if (fs_devices->opened) {
1120 fs_devices->opened++;
1121 ret = 0;
1122 } else {
1123 list_sort(NULL, &fs_devices->devices, devid_cmp);
1124 ret = __btrfs_open_devices(fs_devices, flags, holder);
1125 }
1126 mutex_unlock(&uuid_mutex);
1127 return ret;
1128}
1129
1130static void btrfs_release_disk_super(struct page *page)
1131{
1132 kunmap(page);
1133 put_page(page);
1134}
1135
1136static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
1137 struct page **page,
1138 struct btrfs_super_block **disk_super)
1139{
1140 void *p;
1141 pgoff_t index;
1142
1143
1144 if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
1145 return 1;
1146
1147
1148 if (sizeof(**disk_super) > PAGE_SIZE)
1149 return 1;
1150
1151
1152 index = bytenr >> PAGE_SHIFT;
1153 if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
1154 return 1;
1155
1156
1157 *page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
1158 index, GFP_KERNEL);
1159
1160 if (IS_ERR_OR_NULL(*page))
1161 return 1;
1162
1163 p = kmap(*page);
1164
1165
1166 *disk_super = p + (bytenr & ~PAGE_MASK);
1167
1168 if (btrfs_super_bytenr(*disk_super) != bytenr ||
1169 btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
1170 btrfs_release_disk_super(*page);
1171 return 1;
1172 }
1173
1174 if ((*disk_super)->label[0] &&
1175 (*disk_super)->label[BTRFS_LABEL_SIZE - 1])
1176 (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
1177
1178 return 0;
1179}
1180
1181
1182
1183
1184
1185
1186int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
1187 struct btrfs_fs_devices **fs_devices_ret)
1188{
1189 struct btrfs_super_block *disk_super;
1190 struct btrfs_device *device;
1191 struct block_device *bdev;
1192 struct page *page;
1193 int ret = 0;
1194 u64 bytenr;
1195
1196
1197
1198
1199
1200
1201
1202 bytenr = btrfs_sb_offset(0);
1203 flags |= FMODE_EXCL;
1204 mutex_lock(&uuid_mutex);
1205
1206 bdev = blkdev_get_by_path(path, flags, holder);
1207 if (IS_ERR(bdev)) {
1208 ret = PTR_ERR(bdev);
1209 goto error;
1210 }
1211
1212 if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
1213 ret = -EINVAL;
1214 goto error_bdev_put;
1215 }
1216
1217 device = device_list_add(path, disk_super);
1218 if (IS_ERR(device))
1219 ret = PTR_ERR(device);
1220 else
1221 *fs_devices_ret = device->fs_devices;
1222
1223 btrfs_release_disk_super(page);
1224
1225error_bdev_put:
1226 blkdev_put(bdev, flags);
1227error:
1228 mutex_unlock(&uuid_mutex);
1229 return ret;
1230}
1231
1232
1233int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
1234 u64 end, u64 *length)
1235{
1236 struct btrfs_key key;
1237 struct btrfs_root *root = device->fs_info->dev_root;
1238 struct btrfs_dev_extent *dev_extent;
1239 struct btrfs_path *path;
1240 u64 extent_end;
1241 int ret;
1242 int slot;
1243 struct extent_buffer *l;
1244
1245 *length = 0;
1246
1247 if (start >= device->total_bytes ||
1248 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
1249 return 0;
1250
1251 path = btrfs_alloc_path();
1252 if (!path)
1253 return -ENOMEM;
1254 path->reada = READA_FORWARD;
1255
1256 key.objectid = device->devid;
1257 key.offset = start;
1258 key.type = BTRFS_DEV_EXTENT_KEY;
1259
1260 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1261 if (ret < 0)
1262 goto out;
1263 if (ret > 0) {
1264 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1265 if (ret < 0)
1266 goto out;
1267 }
1268
1269 while (1) {
1270 l = path->nodes[0];
1271 slot = path->slots[0];
1272 if (slot >= btrfs_header_nritems(l)) {
1273 ret = btrfs_next_leaf(root, path);
1274 if (ret == 0)
1275 continue;
1276 if (ret < 0)
1277 goto out;
1278
1279 break;
1280 }
1281 btrfs_item_key_to_cpu(l, &key, slot);
1282
1283 if (key.objectid < device->devid)
1284 goto next;
1285
1286 if (key.objectid > device->devid)
1287 break;
1288
1289 if (key.type != BTRFS_DEV_EXTENT_KEY)
1290 goto next;
1291
1292 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1293 extent_end = key.offset + btrfs_dev_extent_length(l,
1294 dev_extent);
1295 if (key.offset <= start && extent_end > end) {
1296 *length = end - start + 1;
1297 break;
1298 } else if (key.offset <= start && extent_end > start)
1299 *length += extent_end - start;
1300 else if (key.offset > start && extent_end <= end)
1301 *length += extent_end - key.offset;
1302 else if (key.offset > start && key.offset <= end) {
1303 *length += end - key.offset + 1;
1304 break;
1305 } else if (key.offset > end)
1306 break;
1307
1308next:
1309 path->slots[0]++;
1310 }
1311 ret = 0;
1312out:
1313 btrfs_free_path(path);
1314 return ret;
1315}
1316
1317static int contains_pending_extent(struct btrfs_transaction *transaction,
1318 struct btrfs_device *device,
1319 u64 *start, u64 len)
1320{
1321 struct btrfs_fs_info *fs_info = device->fs_info;
1322 struct extent_map *em;
1323 struct list_head *search_list = &fs_info->pinned_chunks;
1324 int ret = 0;
1325 u64 physical_start = *start;
1326
1327 if (transaction)
1328 search_list = &transaction->pending_chunks;
1329again:
1330 list_for_each_entry(em, search_list, list) {
1331 struct map_lookup *map;
1332 int i;
1333
1334 map = em->map_lookup;
1335 for (i = 0; i < map->num_stripes; i++) {
1336 u64 end;
1337
1338 if (map->stripes[i].dev != device)
1339 continue;
1340 if (map->stripes[i].physical >= physical_start + len ||
1341 map->stripes[i].physical + em->orig_block_len <=
1342 physical_start)
1343 continue;
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356 end = map->stripes[i].physical + em->orig_block_len;
1357 if (end > *start) {
1358 *start = end;
1359 ret = 1;
1360 }
1361 }
1362 }
1363 if (search_list != &fs_info->pinned_chunks) {
1364 search_list = &fs_info->pinned_chunks;
1365 goto again;
1366 }
1367
1368 return ret;
1369}
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393int find_free_dev_extent_start(struct btrfs_transaction *transaction,
1394 struct btrfs_device *device, u64 num_bytes,
1395 u64 search_start, u64 *start, u64 *len)
1396{
1397 struct btrfs_fs_info *fs_info = device->fs_info;
1398 struct btrfs_root *root = fs_info->dev_root;
1399 struct btrfs_key key;
1400 struct btrfs_dev_extent *dev_extent;
1401 struct btrfs_path *path;
1402 u64 hole_size;
1403 u64 max_hole_start;
1404 u64 max_hole_size;
1405 u64 extent_end;
1406 u64 search_end = device->total_bytes;
1407 int ret;
1408 int slot;
1409 struct extent_buffer *l;
1410
1411
1412
1413
1414
1415
1416 search_start = max_t(u64, search_start, SZ_1M);
1417
1418 path = btrfs_alloc_path();
1419 if (!path)
1420 return -ENOMEM;
1421
1422 max_hole_start = search_start;
1423 max_hole_size = 0;
1424
1425again:
1426 if (search_start >= search_end ||
1427 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1428 ret = -ENOSPC;
1429 goto out;
1430 }
1431
1432 path->reada = READA_FORWARD;
1433 path->search_commit_root = 1;
1434 path->skip_locking = 1;
1435
1436 key.objectid = device->devid;
1437 key.offset = search_start;
1438 key.type = BTRFS_DEV_EXTENT_KEY;
1439
1440 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1441 if (ret < 0)
1442 goto out;
1443 if (ret > 0) {
1444 ret = btrfs_previous_item(root, path, key.objectid, key.type);
1445 if (ret < 0)
1446 goto out;
1447 }
1448
1449 while (1) {
1450 l = path->nodes[0];
1451 slot = path->slots[0];
1452 if (slot >= btrfs_header_nritems(l)) {
1453 ret = btrfs_next_leaf(root, path);
1454 if (ret == 0)
1455 continue;
1456 if (ret < 0)
1457 goto out;
1458
1459 break;
1460 }
1461 btrfs_item_key_to_cpu(l, &key, slot);
1462
1463 if (key.objectid < device->devid)
1464 goto next;
1465
1466 if (key.objectid > device->devid)
1467 break;
1468
1469 if (key.type != BTRFS_DEV_EXTENT_KEY)
1470 goto next;
1471
1472 if (key.offset > search_start) {
1473 hole_size = key.offset - search_start;
1474
1475
1476
1477
1478
1479 if (contains_pending_extent(transaction, device,
1480 &search_start,
1481 hole_size)) {
1482 if (key.offset >= search_start) {
1483 hole_size = key.offset - search_start;
1484 } else {
1485 WARN_ON_ONCE(1);
1486 hole_size = 0;
1487 }
1488 }
1489
1490 if (hole_size > max_hole_size) {
1491 max_hole_start = search_start;
1492 max_hole_size = hole_size;
1493 }
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504 if (hole_size >= num_bytes) {
1505 ret = 0;
1506 goto out;
1507 }
1508 }
1509
1510 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1511 extent_end = key.offset + btrfs_dev_extent_length(l,
1512 dev_extent);
1513 if (extent_end > search_start)
1514 search_start = extent_end;
1515next:
1516 path->slots[0]++;
1517 cond_resched();
1518 }
1519
1520
1521
1522
1523
1524
1525 if (search_end > search_start) {
1526 hole_size = search_end - search_start;
1527
1528 if (contains_pending_extent(transaction, device, &search_start,
1529 hole_size)) {
1530 btrfs_release_path(path);
1531 goto again;
1532 }
1533
1534 if (hole_size > max_hole_size) {
1535 max_hole_start = search_start;
1536 max_hole_size = hole_size;
1537 }
1538 }
1539
1540
1541 if (max_hole_size < num_bytes)
1542 ret = -ENOSPC;
1543 else
1544 ret = 0;
1545
1546out:
1547 btrfs_free_path(path);
1548 *start = max_hole_start;
1549 if (len)
1550 *len = max_hole_size;
1551 return ret;
1552}
1553
1554int find_free_dev_extent(struct btrfs_trans_handle *trans,
1555 struct btrfs_device *device, u64 num_bytes,
1556 u64 *start, u64 *len)
1557{
1558
1559 return find_free_dev_extent_start(trans->transaction, device,
1560 num_bytes, 0, start, len);
1561}
1562
1563static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
1564 struct btrfs_device *device,
1565 u64 start, u64 *dev_extent_len)
1566{
1567 struct btrfs_fs_info *fs_info = device->fs_info;
1568 struct btrfs_root *root = fs_info->dev_root;
1569 int ret;
1570 struct btrfs_path *path;
1571 struct btrfs_key key;
1572 struct btrfs_key found_key;
1573 struct extent_buffer *leaf = NULL;
1574 struct btrfs_dev_extent *extent = NULL;
1575
1576 path = btrfs_alloc_path();
1577 if (!path)
1578 return -ENOMEM;
1579
1580 key.objectid = device->devid;
1581 key.offset = start;
1582 key.type = BTRFS_DEV_EXTENT_KEY;
1583again:
1584 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1585 if (ret > 0) {
1586 ret = btrfs_previous_item(root, path, key.objectid,
1587 BTRFS_DEV_EXTENT_KEY);
1588 if (ret)
1589 goto out;
1590 leaf = path->nodes[0];
1591 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1592 extent = btrfs_item_ptr(leaf, path->slots[0],
1593 struct btrfs_dev_extent);
1594 BUG_ON(found_key.offset > start || found_key.offset +
1595 btrfs_dev_extent_length(leaf, extent) < start);
1596 key = found_key;
1597 btrfs_release_path(path);
1598 goto again;
1599 } else if (ret == 0) {
1600 leaf = path->nodes[0];
1601 extent = btrfs_item_ptr(leaf, path->slots[0],
1602 struct btrfs_dev_extent);
1603 } else {
1604 btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
1605 goto out;
1606 }
1607
1608 *dev_extent_len = btrfs_dev_extent_length(leaf, extent);
1609
1610 ret = btrfs_del_item(trans, root, path);
1611 if (ret) {
1612 btrfs_handle_fs_error(fs_info, ret,
1613 "Failed to remove dev extent item");
1614 } else {
1615 set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
1616 }
1617out:
1618 btrfs_free_path(path);
1619 return ret;
1620}
1621
1622static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1623 struct btrfs_device *device,
1624 u64 chunk_offset, u64 start, u64 num_bytes)
1625{
1626 int ret;
1627 struct btrfs_path *path;
1628 struct btrfs_fs_info *fs_info = device->fs_info;
1629 struct btrfs_root *root = fs_info->dev_root;
1630 struct btrfs_dev_extent *extent;
1631 struct extent_buffer *leaf;
1632 struct btrfs_key key;
1633
1634 WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1635 WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
1636 path = btrfs_alloc_path();
1637 if (!path)
1638 return -ENOMEM;
1639
1640 key.objectid = device->devid;
1641 key.offset = start;
1642 key.type = BTRFS_DEV_EXTENT_KEY;
1643 ret = btrfs_insert_empty_item(trans, root, path, &key,
1644 sizeof(*extent));
1645 if (ret)
1646 goto out;
1647
1648 leaf = path->nodes[0];
1649 extent = btrfs_item_ptr(leaf, path->slots[0],
1650 struct btrfs_dev_extent);
1651 btrfs_set_dev_extent_chunk_tree(leaf, extent,
1652 BTRFS_CHUNK_TREE_OBJECTID);
1653 btrfs_set_dev_extent_chunk_objectid(leaf, extent,
1654 BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1655 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1656
1657 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
1658 btrfs_mark_buffer_dirty(leaf);
1659out:
1660 btrfs_free_path(path);
1661 return ret;
1662}
1663
1664static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
1665{
1666 struct extent_map_tree *em_tree;
1667 struct extent_map *em;
1668 struct rb_node *n;
1669 u64 ret = 0;
1670
1671 em_tree = &fs_info->mapping_tree.map_tree;
1672 read_lock(&em_tree->lock);
1673 n = rb_last(&em_tree->map);
1674 if (n) {
1675 em = rb_entry(n, struct extent_map, rb_node);
1676 ret = em->start + em->len;
1677 }
1678 read_unlock(&em_tree->lock);
1679
1680 return ret;
1681}
1682
1683static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
1684 u64 *devid_ret)
1685{
1686 int ret;
1687 struct btrfs_key key;
1688 struct btrfs_key found_key;
1689 struct btrfs_path *path;
1690
1691 path = btrfs_alloc_path();
1692 if (!path)
1693 return -ENOMEM;
1694
1695 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1696 key.type = BTRFS_DEV_ITEM_KEY;
1697 key.offset = (u64)-1;
1698
1699 ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
1700 if (ret < 0)
1701 goto error;
1702
1703 BUG_ON(ret == 0);
1704
1705 ret = btrfs_previous_item(fs_info->chunk_root, path,
1706 BTRFS_DEV_ITEMS_OBJECTID,
1707 BTRFS_DEV_ITEM_KEY);
1708 if (ret) {
1709 *devid_ret = 1;
1710 } else {
1711 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1712 path->slots[0]);
1713 *devid_ret = found_key.offset + 1;
1714 }
1715 ret = 0;
1716error:
1717 btrfs_free_path(path);
1718 return ret;
1719}
1720
1721
1722
1723
1724
1725static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
1726 struct btrfs_fs_info *fs_info,
1727 struct btrfs_device *device)
1728{
1729 struct btrfs_root *root = fs_info->chunk_root;
1730 int ret;
1731 struct btrfs_path *path;
1732 struct btrfs_dev_item *dev_item;
1733 struct extent_buffer *leaf;
1734 struct btrfs_key key;
1735 unsigned long ptr;
1736
1737 path = btrfs_alloc_path();
1738 if (!path)
1739 return -ENOMEM;
1740
1741 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1742 key.type = BTRFS_DEV_ITEM_KEY;
1743 key.offset = device->devid;
1744
1745 ret = btrfs_insert_empty_item(trans, root, path, &key,
1746 sizeof(*dev_item));
1747 if (ret)
1748 goto out;
1749
1750 leaf = path->nodes[0];
1751 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1752
1753 btrfs_set_device_id(leaf, dev_item, device->devid);
1754 btrfs_set_device_generation(leaf, dev_item, 0);
1755 btrfs_set_device_type(leaf, dev_item, device->type);
1756 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1757 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1758 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1759 btrfs_set_device_total_bytes(leaf, dev_item,
1760 btrfs_device_get_disk_total_bytes(device));
1761 btrfs_set_device_bytes_used(leaf, dev_item,
1762 btrfs_device_get_bytes_used(device));
1763 btrfs_set_device_group(leaf, dev_item, 0);
1764 btrfs_set_device_seek_speed(leaf, dev_item, 0);
1765 btrfs_set_device_bandwidth(leaf, dev_item, 0);
1766 btrfs_set_device_start_offset(leaf, dev_item, 0);
1767
1768 ptr = btrfs_device_uuid(dev_item);
1769 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
1770 ptr = btrfs_device_fsid(dev_item);
1771 write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
1772 btrfs_mark_buffer_dirty(leaf);
1773
1774 ret = 0;
1775out:
1776 btrfs_free_path(path);
1777 return ret;
1778}
1779
1780
1781
1782
1783
1784static void update_dev_time(const char *path_name)
1785{
1786 struct file *filp;
1787
1788 filp = filp_open(path_name, O_RDWR, 0);
1789 if (IS_ERR(filp))
1790 return;
1791 file_update_time(filp);
1792 filp_close(filp, NULL);
1793}
1794
1795static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,
1796 struct btrfs_device *device)
1797{
1798 struct btrfs_root *root = fs_info->chunk_root;
1799 int ret;
1800 struct btrfs_path *path;
1801 struct btrfs_key key;
1802 struct btrfs_trans_handle *trans;
1803
1804 path = btrfs_alloc_path();
1805 if (!path)
1806 return -ENOMEM;
1807
1808 trans = btrfs_start_transaction(root, 0);
1809 if (IS_ERR(trans)) {
1810 btrfs_free_path(path);
1811 return PTR_ERR(trans);
1812 }
1813 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1814 key.type = BTRFS_DEV_ITEM_KEY;
1815 key.offset = device->devid;
1816
1817 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1818 if (ret) {
1819 if (ret > 0)
1820 ret = -ENOENT;
1821 btrfs_abort_transaction(trans, ret);
1822 btrfs_end_transaction(trans);
1823 goto out;
1824 }
1825
1826 ret = btrfs_del_item(trans, root, path);
1827 if (ret) {
1828 btrfs_abort_transaction(trans, ret);
1829 btrfs_end_transaction(trans);
1830 }
1831
1832out:
1833 btrfs_free_path(path);
1834 if (!ret)
1835 ret = btrfs_commit_transaction(trans);
1836 return ret;
1837}
1838
1839
1840
1841
1842
1843
1844static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
1845 u64 num_devices)
1846{
1847 u64 all_avail;
1848 unsigned seq;
1849 int i;
1850
1851 do {
1852 seq = read_seqbegin(&fs_info->profiles_lock);
1853
1854 all_avail = fs_info->avail_data_alloc_bits |
1855 fs_info->avail_system_alloc_bits |
1856 fs_info->avail_metadata_alloc_bits;
1857 } while (read_seqretry(&fs_info->profiles_lock, seq));
1858
1859 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1860 if (!(all_avail & btrfs_raid_group[i]))
1861 continue;
1862
1863 if (num_devices < btrfs_raid_array[i].devs_min) {
1864 int ret = btrfs_raid_mindev_error[i];
1865
1866 if (ret)
1867 return ret;
1868 }
1869 }
1870
1871 return 0;
1872}
1873
1874static struct btrfs_device * btrfs_find_next_active_device(
1875 struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
1876{
1877 struct btrfs_device *next_device;
1878
1879 list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
1880 if (next_device != device &&
1881 !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
1882 && next_device->bdev)
1883 return next_device;
1884 }
1885
1886 return NULL;
1887}
1888
1889
1890
1891
1892
1893
1894
1895void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
1896 struct btrfs_device *device, struct btrfs_device *this_dev)
1897{
1898 struct btrfs_device *next_device;
1899
1900 if (this_dev)
1901 next_device = this_dev;
1902 else
1903 next_device = btrfs_find_next_active_device(fs_info->fs_devices,
1904 device);
1905 ASSERT(next_device);
1906
1907 if (fs_info->sb->s_bdev &&
1908 (fs_info->sb->s_bdev == device->bdev))
1909 fs_info->sb->s_bdev = next_device->bdev;
1910
1911 if (fs_info->fs_devices->latest_bdev == device->bdev)
1912 fs_info->fs_devices->latest_bdev = next_device->bdev;
1913}
1914
1915int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
1916 u64 devid)
1917{
1918 struct btrfs_device *device;
1919 struct btrfs_fs_devices *cur_devices;
1920 u64 num_devices;
1921 int ret = 0;
1922
1923 mutex_lock(&fs_info->volume_mutex);
1924 mutex_lock(&uuid_mutex);
1925
1926 num_devices = fs_info->fs_devices->num_devices;
1927 btrfs_dev_replace_read_lock(&fs_info->dev_replace);
1928 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
1929 WARN_ON(num_devices < 1);
1930 num_devices--;
1931 }
1932 btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
1933
1934 ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
1935 if (ret)
1936 goto out;
1937
1938 ret = btrfs_find_device_by_devspec(fs_info, devid, device_path,
1939 &device);
1940 if (ret)
1941 goto out;
1942
1943 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1944 ret = BTRFS_ERROR_DEV_TGT_REPLACE;
1945 goto out;
1946 }
1947
1948 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1949 fs_info->fs_devices->rw_devices == 1) {
1950 ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
1951 goto out;
1952 }
1953
1954 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
1955 mutex_lock(&fs_info->chunk_mutex);
1956 list_del_init(&device->dev_alloc_list);
1957 device->fs_devices->rw_devices--;
1958 mutex_unlock(&fs_info->chunk_mutex);
1959 }
1960
1961 mutex_unlock(&uuid_mutex);
1962 ret = btrfs_shrink_device(device, 0);
1963 mutex_lock(&uuid_mutex);
1964 if (ret)
1965 goto error_undo;
1966
1967
1968
1969
1970
1971
1972 ret = btrfs_rm_dev_item(fs_info, device);
1973 if (ret)
1974 goto error_undo;
1975
1976 clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
1977 btrfs_scrub_cancel_dev(fs_info, device);
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989 cur_devices = device->fs_devices;
1990 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1991 list_del_rcu(&device->dev_list);
1992
1993 device->fs_devices->num_devices--;
1994 device->fs_devices->total_devices--;
1995
1996 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1997 device->fs_devices->missing_devices--;
1998
1999 btrfs_assign_next_active_device(fs_info, device, NULL);
2000
2001 if (device->bdev) {
2002 device->fs_devices->open_devices--;
2003
2004 btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
2005 }
2006
2007 num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
2008 btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
2009 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2010
2011
2012
2013
2014
2015
2016 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2017 btrfs_scratch_superblocks(device->bdev, device->name->str);
2018
2019 btrfs_close_bdev(device);
2020 call_rcu(&device->rcu, free_device_rcu);
2021
2022 if (cur_devices->open_devices == 0) {
2023 struct btrfs_fs_devices *fs_devices;
2024 fs_devices = fs_info->fs_devices;
2025 while (fs_devices) {
2026 if (fs_devices->seed == cur_devices) {
2027 fs_devices->seed = cur_devices->seed;
2028 break;
2029 }
2030 fs_devices = fs_devices->seed;
2031 }
2032 cur_devices->seed = NULL;
2033 __btrfs_close_devices(cur_devices);
2034 free_fs_devices(cur_devices);
2035 }
2036
2037out:
2038 mutex_unlock(&uuid_mutex);
2039 mutex_unlock(&fs_info->volume_mutex);
2040 return ret;
2041
2042error_undo:
2043 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
2044 mutex_lock(&fs_info->chunk_mutex);
2045 list_add(&device->dev_alloc_list,
2046 &fs_info->fs_devices->alloc_list);
2047 device->fs_devices->rw_devices++;
2048 mutex_unlock(&fs_info->chunk_mutex);
2049 }
2050 goto out;
2051}
2052
2053void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
2054 struct btrfs_device *srcdev)
2055{
2056 struct btrfs_fs_devices *fs_devices;
2057
2058 lockdep_assert_held(&fs_info->fs_devices->device_list_mutex);
2059
2060
2061
2062
2063
2064
2065
2066 fs_devices = srcdev->fs_devices;
2067
2068 list_del_rcu(&srcdev->dev_list);
2069 list_del(&srcdev->dev_alloc_list);
2070 fs_devices->num_devices--;
2071 if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2072 fs_devices->missing_devices--;
2073
2074 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
2075 fs_devices->rw_devices--;
2076
2077 if (srcdev->bdev)
2078 fs_devices->open_devices--;
2079}
2080
2081void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
2082 struct btrfs_device *srcdev)
2083{
2084 struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
2085
2086 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
2087
2088 btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
2089 }
2090
2091 btrfs_close_bdev(srcdev);
2092 call_rcu(&srcdev->rcu, free_device_rcu);
2093
2094
2095 if (!fs_devices->num_devices) {
2096 struct btrfs_fs_devices *tmp_fs_devices;
2097
2098
2099
2100
2101
2102
2103
2104 ASSERT(fs_devices->seeding);
2105
2106 tmp_fs_devices = fs_info->fs_devices;
2107 while (tmp_fs_devices) {
2108 if (tmp_fs_devices->seed == fs_devices) {
2109 tmp_fs_devices->seed = fs_devices->seed;
2110 break;
2111 }
2112 tmp_fs_devices = tmp_fs_devices->seed;
2113 }
2114 fs_devices->seed = NULL;
2115 __btrfs_close_devices(fs_devices);
2116 free_fs_devices(fs_devices);
2117 }
2118}
2119
2120void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2121 struct btrfs_device *tgtdev)
2122{
2123 mutex_lock(&uuid_mutex);
2124 WARN_ON(!tgtdev);
2125 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2126
2127 btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
2128
2129 if (tgtdev->bdev)
2130 fs_info->fs_devices->open_devices--;
2131
2132 fs_info->fs_devices->num_devices--;
2133
2134 btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
2135
2136 list_del_rcu(&tgtdev->dev_list);
2137
2138 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2139 mutex_unlock(&uuid_mutex);
2140
2141
2142
2143
2144
2145
2146
2147
2148 btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
2149
2150 btrfs_close_bdev(tgtdev);
2151 call_rcu(&tgtdev->rcu, free_device_rcu);
2152}
2153
2154static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
2155 const char *device_path,
2156 struct btrfs_device **device)
2157{
2158 int ret = 0;
2159 struct btrfs_super_block *disk_super;
2160 u64 devid;
2161 u8 *dev_uuid;
2162 struct block_device *bdev;
2163 struct buffer_head *bh;
2164
2165 *device = NULL;
2166 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
2167 fs_info->bdev_holder, 0, &bdev, &bh);
2168 if (ret)
2169 return ret;
2170 disk_super = (struct btrfs_super_block *)bh->b_data;
2171 devid = btrfs_stack_device_id(&disk_super->dev_item);
2172 dev_uuid = disk_super->dev_item.uuid;
2173 *device = btrfs_find_device(fs_info, devid, dev_uuid, disk_super->fsid);
2174 brelse(bh);
2175 if (!*device)
2176 ret = -ENOENT;
2177 blkdev_put(bdev, FMODE_READ);
2178 return ret;
2179}
2180
2181int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
2182 const char *device_path,
2183 struct btrfs_device **device)
2184{
2185 *device = NULL;
2186 if (strcmp(device_path, "missing") == 0) {
2187 struct list_head *devices;
2188 struct btrfs_device *tmp;
2189
2190 devices = &fs_info->fs_devices->devices;
2191
2192
2193
2194
2195 list_for_each_entry(tmp, devices, dev_list) {
2196 if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2197 &tmp->dev_state) && !tmp->bdev) {
2198 *device = tmp;
2199 break;
2200 }
2201 }
2202
2203 if (!*device)
2204 return BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
2205
2206 return 0;
2207 } else {
2208 return btrfs_find_device_by_path(fs_info, device_path, device);
2209 }
2210}
2211
2212
2213
2214
2215int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
2216 const char *devpath,
2217 struct btrfs_device **device)
2218{
2219 int ret;
2220
2221 if (devid) {
2222 ret = 0;
2223 *device = btrfs_find_device(fs_info, devid, NULL, NULL);
2224 if (!*device)
2225 ret = -ENOENT;
2226 } else {
2227 if (!devpath || !devpath[0])
2228 return -EINVAL;
2229
2230 ret = btrfs_find_device_missing_or_by_path(fs_info, devpath,
2231 device);
2232 }
2233 return ret;
2234}
2235
2236
2237
2238
2239static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
2240{
2241 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2242 struct btrfs_fs_devices *old_devices;
2243 struct btrfs_fs_devices *seed_devices;
2244 struct btrfs_super_block *disk_super = fs_info->super_copy;
2245 struct btrfs_device *device;
2246 u64 super_flags;
2247
2248 lockdep_assert_held(&uuid_mutex);
2249 if (!fs_devices->seeding)
2250 return -EINVAL;
2251
2252 seed_devices = alloc_fs_devices(NULL);
2253 if (IS_ERR(seed_devices))
2254 return PTR_ERR(seed_devices);
2255
2256 old_devices = clone_fs_devices(fs_devices);
2257 if (IS_ERR(old_devices)) {
2258 kfree(seed_devices);
2259 return PTR_ERR(old_devices);
2260 }
2261
2262 list_add(&old_devices->list, &fs_uuids);
2263
2264 memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2265 seed_devices->opened = 1;
2266 INIT_LIST_HEAD(&seed_devices->devices);
2267 INIT_LIST_HEAD(&seed_devices->alloc_list);
2268 mutex_init(&seed_devices->device_list_mutex);
2269
2270 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2271 list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
2272 synchronize_rcu);
2273 list_for_each_entry(device, &seed_devices->devices, dev_list)
2274 device->fs_devices = seed_devices;
2275
2276 mutex_lock(&fs_info->chunk_mutex);
2277 list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
2278 mutex_unlock(&fs_info->chunk_mutex);
2279
2280 fs_devices->seeding = 0;
2281 fs_devices->num_devices = 0;
2282 fs_devices->open_devices = 0;
2283 fs_devices->missing_devices = 0;
2284 fs_devices->rotating = 0;
2285 fs_devices->seed = seed_devices;
2286
2287 generate_random_uuid(fs_devices->fsid);
2288 memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2289 memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
2290 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2291
2292 super_flags = btrfs_super_flags(disk_super) &
2293 ~BTRFS_SUPER_FLAG_SEEDING;
2294 btrfs_set_super_flags(disk_super, super_flags);
2295
2296 return 0;
2297}
2298
2299
2300
2301
2302static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
2303 struct btrfs_fs_info *fs_info)
2304{
2305 struct btrfs_root *root = fs_info->chunk_root;
2306 struct btrfs_path *path;
2307 struct extent_buffer *leaf;
2308 struct btrfs_dev_item *dev_item;
2309 struct btrfs_device *device;
2310 struct btrfs_key key;
2311 u8 fs_uuid[BTRFS_FSID_SIZE];
2312 u8 dev_uuid[BTRFS_UUID_SIZE];
2313 u64 devid;
2314 int ret;
2315
2316 path = btrfs_alloc_path();
2317 if (!path)
2318 return -ENOMEM;
2319
2320 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2321 key.offset = 0;
2322 key.type = BTRFS_DEV_ITEM_KEY;
2323
2324 while (1) {
2325 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2326 if (ret < 0)
2327 goto error;
2328
2329 leaf = path->nodes[0];
2330next_slot:
2331 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
2332 ret = btrfs_next_leaf(root, path);
2333 if (ret > 0)
2334 break;
2335 if (ret < 0)
2336 goto error;
2337 leaf = path->nodes[0];
2338 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2339 btrfs_release_path(path);
2340 continue;
2341 }
2342
2343 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2344 if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
2345 key.type != BTRFS_DEV_ITEM_KEY)
2346 break;
2347
2348 dev_item = btrfs_item_ptr(leaf, path->slots[0],
2349 struct btrfs_dev_item);
2350 devid = btrfs_device_id(leaf, dev_item);
2351 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
2352 BTRFS_UUID_SIZE);
2353 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
2354 BTRFS_FSID_SIZE);
2355 device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
2356 BUG_ON(!device);
2357
2358 if (device->fs_devices->seeding) {
2359 btrfs_set_device_generation(leaf, dev_item,
2360 device->generation);
2361 btrfs_mark_buffer_dirty(leaf);
2362 }
2363
2364 path->slots[0]++;
2365 goto next_slot;
2366 }
2367 ret = 0;
2368error:
2369 btrfs_free_path(path);
2370 return ret;
2371}
2372
2373int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2374{
2375 struct btrfs_root *root = fs_info->dev_root;
2376 struct request_queue *q;
2377 struct btrfs_trans_handle *trans;
2378 struct btrfs_device *device;
2379 struct block_device *bdev;
2380 struct list_head *devices;
2381 struct super_block *sb = fs_info->sb;
2382 struct rcu_string *name;
2383 u64 tmp;
2384 int seeding_dev = 0;
2385 int ret = 0;
2386 bool unlocked = false;
2387
2388 if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
2389 return -EROFS;
2390
2391 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2392 fs_info->bdev_holder);
2393 if (IS_ERR(bdev))
2394 return PTR_ERR(bdev);
2395
2396 if (fs_info->fs_devices->seeding) {
2397 seeding_dev = 1;
2398 down_write(&sb->s_umount);
2399 mutex_lock(&uuid_mutex);
2400 }
2401
2402 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2403
2404 devices = &fs_info->fs_devices->devices;
2405
2406 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2407 list_for_each_entry(device, devices, dev_list) {
2408 if (device->bdev == bdev) {
2409 ret = -EEXIST;
2410 mutex_unlock(
2411 &fs_info->fs_devices->device_list_mutex);
2412 goto error;
2413 }
2414 }
2415 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2416
2417 device = btrfs_alloc_device(fs_info, NULL, NULL);
2418 if (IS_ERR(device)) {
2419
2420 ret = PTR_ERR(device);
2421 goto error;
2422 }
2423
2424 name = rcu_string_strdup(device_path, GFP_KERNEL);
2425 if (!name) {
2426 ret = -ENOMEM;
2427 goto error_free_device;
2428 }
2429 rcu_assign_pointer(device->name, name);
2430
2431 trans = btrfs_start_transaction(root, 0);
2432 if (IS_ERR(trans)) {
2433 ret = PTR_ERR(trans);
2434 goto error_free_device;
2435 }
2436
2437 q = bdev_get_queue(bdev);
2438 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2439 device->generation = trans->transid;
2440 device->io_width = fs_info->sectorsize;
2441 device->io_align = fs_info->sectorsize;
2442 device->sector_size = fs_info->sectorsize;
2443 device->total_bytes = round_down(i_size_read(bdev->bd_inode),
2444 fs_info->sectorsize);
2445 device->disk_total_bytes = device->total_bytes;
2446 device->commit_total_bytes = device->total_bytes;
2447 device->fs_info = fs_info;
2448 device->bdev = bdev;
2449 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2450 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2451 device->mode = FMODE_EXCL;
2452 device->dev_stats_valid = 1;
2453 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2454
2455 if (seeding_dev) {
2456 sb->s_flags &= ~SB_RDONLY;
2457 ret = btrfs_prepare_sprout(fs_info);
2458 if (ret) {
2459 btrfs_abort_transaction(trans, ret);
2460 goto error_trans;
2461 }
2462 }
2463
2464 device->fs_devices = fs_info->fs_devices;
2465
2466 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2467 mutex_lock(&fs_info->chunk_mutex);
2468 list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices);
2469 list_add(&device->dev_alloc_list,
2470 &fs_info->fs_devices->alloc_list);
2471 fs_info->fs_devices->num_devices++;
2472 fs_info->fs_devices->open_devices++;
2473 fs_info->fs_devices->rw_devices++;
2474 fs_info->fs_devices->total_devices++;
2475 fs_info->fs_devices->total_rw_bytes += device->total_bytes;
2476
2477 atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
2478
2479 if (!blk_queue_nonrot(q))
2480 fs_info->fs_devices->rotating = 1;
2481
2482 tmp = btrfs_super_total_bytes(fs_info->super_copy);
2483 btrfs_set_super_total_bytes(fs_info->super_copy,
2484 round_down(tmp + device->total_bytes, fs_info->sectorsize));
2485
2486 tmp = btrfs_super_num_devices(fs_info->super_copy);
2487 btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
2488
2489
2490 btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
2491
2492
2493
2494
2495
2496 btrfs_clear_space_info_full(fs_info);
2497
2498 mutex_unlock(&fs_info->chunk_mutex);
2499 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2500
2501 if (seeding_dev) {
2502 mutex_lock(&fs_info->chunk_mutex);
2503 ret = init_first_rw_device(trans, fs_info);
2504 mutex_unlock(&fs_info->chunk_mutex);
2505 if (ret) {
2506 btrfs_abort_transaction(trans, ret);
2507 goto error_sysfs;
2508 }
2509 }
2510
2511 ret = btrfs_add_dev_item(trans, fs_info, device);
2512 if (ret) {
2513 btrfs_abort_transaction(trans, ret);
2514 goto error_sysfs;
2515 }
2516
2517 if (seeding_dev) {
2518 char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
2519
2520 ret = btrfs_finish_sprout(trans, fs_info);
2521 if (ret) {
2522 btrfs_abort_transaction(trans, ret);
2523 goto error_sysfs;
2524 }
2525
2526
2527
2528
2529 snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
2530 fs_info->fsid);
2531 if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf))
2532 btrfs_warn(fs_info,
2533 "sysfs: failed to create fsid for sprout");
2534 }
2535
2536 ret = btrfs_commit_transaction(trans);
2537
2538 if (seeding_dev) {
2539 mutex_unlock(&uuid_mutex);
2540 up_write(&sb->s_umount);
2541 unlocked = true;
2542
2543 if (ret)
2544 return ret;
2545
2546 ret = btrfs_relocate_sys_chunks(fs_info);
2547 if (ret < 0)
2548 btrfs_handle_fs_error(fs_info, ret,
2549 "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2550 trans = btrfs_attach_transaction(root);
2551 if (IS_ERR(trans)) {
2552 if (PTR_ERR(trans) == -ENOENT)
2553 return 0;
2554 ret = PTR_ERR(trans);
2555 trans = NULL;
2556 goto error_sysfs;
2557 }
2558 ret = btrfs_commit_transaction(trans);
2559 }
2560
2561
2562 update_dev_time(device_path);
2563 return ret;
2564
2565error_sysfs:
2566 btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
2567error_trans:
2568 if (seeding_dev)
2569 sb->s_flags |= SB_RDONLY;
2570 if (trans)
2571 btrfs_end_transaction(trans);
2572error_free_device:
2573 free_device(device);
2574error:
2575 blkdev_put(bdev, FMODE_EXCL);
2576 if (seeding_dev && !unlocked) {
2577 mutex_unlock(&uuid_mutex);
2578 up_write(&sb->s_umount);
2579 }
2580 return ret;
2581}
2582
2583int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2584 const char *device_path,
2585 struct btrfs_device *srcdev,
2586 struct btrfs_device **device_out)
2587{
2588 struct btrfs_device *device;
2589 struct block_device *bdev;
2590 struct list_head *devices;
2591 struct rcu_string *name;
2592 u64 devid = BTRFS_DEV_REPLACE_DEVID;
2593 int ret = 0;
2594
2595 *device_out = NULL;
2596 if (fs_info->fs_devices->seeding) {
2597 btrfs_err(fs_info, "the filesystem is a seed filesystem!");
2598 return -EINVAL;
2599 }
2600
2601 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2602 fs_info->bdev_holder);
2603 if (IS_ERR(bdev)) {
2604 btrfs_err(fs_info, "target device %s is invalid!", device_path);
2605 return PTR_ERR(bdev);
2606 }
2607
2608 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2609
2610 devices = &fs_info->fs_devices->devices;
2611 list_for_each_entry(device, devices, dev_list) {
2612 if (device->bdev == bdev) {
2613 btrfs_err(fs_info,
2614 "target device is in the filesystem!");
2615 ret = -EEXIST;
2616 goto error;
2617 }
2618 }
2619
2620
2621 if (i_size_read(bdev->bd_inode) <
2622 btrfs_device_get_total_bytes(srcdev)) {
2623 btrfs_err(fs_info,
2624 "target device is smaller than source device!");
2625 ret = -EINVAL;
2626 goto error;
2627 }
2628
2629
2630 device = btrfs_alloc_device(NULL, &devid, NULL);
2631 if (IS_ERR(device)) {
2632 ret = PTR_ERR(device);
2633 goto error;
2634 }
2635
2636 name = rcu_string_strdup(device_path, GFP_KERNEL);
2637 if (!name) {
2638 free_device(device);
2639 ret = -ENOMEM;
2640 goto error;
2641 }
2642 rcu_assign_pointer(device->name, name);
2643
2644 mutex_lock(&fs_info->fs_devices->device_list_mutex);
2645 set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2646 device->generation = 0;
2647 device->io_width = fs_info->sectorsize;
2648 device->io_align = fs_info->sectorsize;
2649 device->sector_size = fs_info->sectorsize;
2650 device->total_bytes = btrfs_device_get_total_bytes(srcdev);
2651 device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
2652 device->bytes_used = btrfs_device_get_bytes_used(srcdev);
2653 device->commit_total_bytes = srcdev->commit_total_bytes;
2654 device->commit_bytes_used = device->bytes_used;
2655 device->fs_info = fs_info;
2656 device->bdev = bdev;
2657 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2658 set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2659 device->mode = FMODE_EXCL;
2660 device->dev_stats_valid = 1;
2661 set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2662 device->fs_devices = fs_info->fs_devices;
2663 list_add(&device->dev_list, &fs_info->fs_devices->devices);
2664 fs_info->fs_devices->num_devices++;
2665 fs_info->fs_devices->open_devices++;
2666 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2667
2668 *device_out = device;
2669 return ret;
2670
2671error:
2672 blkdev_put(bdev, FMODE_EXCL);
2673 return ret;
2674}
2675
2676static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
2677 struct btrfs_device *device)
2678{
2679 int ret;
2680 struct btrfs_path *path;
2681 struct btrfs_root *root = device->fs_info->chunk_root;
2682 struct btrfs_dev_item *dev_item;
2683 struct extent_buffer *leaf;
2684 struct btrfs_key key;
2685
2686 path = btrfs_alloc_path();
2687 if (!path)
2688 return -ENOMEM;
2689
2690 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2691 key.type = BTRFS_DEV_ITEM_KEY;
2692 key.offset = device->devid;
2693
2694 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2695 if (ret < 0)
2696 goto out;
2697
2698 if (ret > 0) {
2699 ret = -ENOENT;
2700 goto out;
2701 }
2702
2703 leaf = path->nodes[0];
2704 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
2705
2706 btrfs_set_device_id(leaf, dev_item, device->devid);
2707 btrfs_set_device_type(leaf, dev_item, device->type);
2708 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
2709 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
2710 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
2711 btrfs_set_device_total_bytes(leaf, dev_item,
2712 btrfs_device_get_disk_total_bytes(device));
2713 btrfs_set_device_bytes_used(leaf, dev_item,
2714 btrfs_device_get_bytes_used(device));
2715 btrfs_mark_buffer_dirty(leaf);
2716
2717out:
2718 btrfs_free_path(path);
2719 return ret;
2720}
2721
2722int btrfs_grow_device(struct btrfs_trans_handle *trans,
2723 struct btrfs_device *device, u64 new_size)
2724{
2725 struct btrfs_fs_info *fs_info = device->fs_info;
2726 struct btrfs_super_block *super_copy = fs_info->super_copy;
2727 struct btrfs_fs_devices *fs_devices;
2728 u64 old_total;
2729 u64 diff;
2730
2731 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2732 return -EACCES;
2733
2734 new_size = round_down(new_size, fs_info->sectorsize);
2735
2736 mutex_lock(&fs_info->chunk_mutex);
2737 old_total = btrfs_super_total_bytes(super_copy);
2738 diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
2739
2740 if (new_size <= device->total_bytes ||
2741 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
2742 mutex_unlock(&fs_info->chunk_mutex);
2743 return -EINVAL;
2744 }
2745
2746 fs_devices = fs_info->fs_devices;
2747
2748 btrfs_set_super_total_bytes(super_copy,
2749 round_down(old_total + diff, fs_info->sectorsize));
2750 device->fs_devices->total_rw_bytes += diff;
2751
2752 btrfs_device_set_total_bytes(device, new_size);
2753 btrfs_device_set_disk_total_bytes(device, new_size);
2754 btrfs_clear_space_info_full(device->fs_info);
2755 if (list_empty(&device->resized_list))
2756 list_add_tail(&device->resized_list,
2757 &fs_devices->resized_devices);
2758 mutex_unlock(&fs_info->chunk_mutex);
2759
2760 return btrfs_update_device(trans, device);
2761}
2762
2763static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
2764 struct btrfs_fs_info *fs_info, u64 chunk_offset)
2765{
2766 struct btrfs_root *root = fs_info->chunk_root;
2767 int ret;
2768 struct btrfs_path *path;
2769 struct btrfs_key key;
2770
2771 path = btrfs_alloc_path();
2772 if (!path)
2773 return -ENOMEM;
2774
2775 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
2776 key.offset = chunk_offset;
2777 key.type = BTRFS_CHUNK_ITEM_KEY;
2778
2779 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2780 if (ret < 0)
2781 goto out;
2782 else if (ret > 0) {
2783 btrfs_handle_fs_error(fs_info, -ENOENT,
2784 "Failed lookup while freeing chunk.");
2785 ret = -ENOENT;
2786 goto out;
2787 }
2788
2789 ret = btrfs_del_item(trans, root, path);
2790 if (ret < 0)
2791 btrfs_handle_fs_error(fs_info, ret,
2792 "Failed to delete chunk item.");
2793out:
2794 btrfs_free_path(path);
2795 return ret;
2796}
2797
2798static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2799{
2800 struct btrfs_super_block *super_copy = fs_info->super_copy;
2801 struct btrfs_disk_key *disk_key;
2802 struct btrfs_chunk *chunk;
2803 u8 *ptr;
2804 int ret = 0;
2805 u32 num_stripes;
2806 u32 array_size;
2807 u32 len = 0;
2808 u32 cur;
2809 struct btrfs_key key;
2810
2811 mutex_lock(&fs_info->chunk_mutex);
2812 array_size = btrfs_super_sys_array_size(super_copy);
2813
2814 ptr = super_copy->sys_chunk_array;
2815 cur = 0;
2816
2817 while (cur < array_size) {
2818 disk_key = (struct btrfs_disk_key *)ptr;
2819 btrfs_disk_key_to_cpu(&key, disk_key);
2820
2821 len = sizeof(*disk_key);
2822
2823 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
2824 chunk = (struct btrfs_chunk *)(ptr + len);
2825 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
2826 len += btrfs_chunk_item_size(num_stripes);
2827 } else {
2828 ret = -EIO;
2829 break;
2830 }
2831 if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
2832 key.offset == chunk_offset) {
2833 memmove(ptr, ptr + len, array_size - (cur + len));
2834 array_size -= len;
2835 btrfs_set_super_sys_array_size(super_copy, array_size);
2836 } else {
2837 ptr += len;
2838 cur += len;
2839 }
2840 }
2841 mutex_unlock(&fs_info->chunk_mutex);
2842 return ret;
2843}
2844
2845static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
2846 u64 logical, u64 length)
2847{
2848 struct extent_map_tree *em_tree;
2849 struct extent_map *em;
2850
2851 em_tree = &fs_info->mapping_tree.map_tree;
2852 read_lock(&em_tree->lock);
2853 em = lookup_extent_mapping(em_tree, logical, length);
2854 read_unlock(&em_tree->lock);
2855
2856 if (!em) {
2857 btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2858 logical, length);
2859 return ERR_PTR(-EINVAL);
2860 }
2861
2862 if (em->start > logical || em->start + em->len < logical) {
2863 btrfs_crit(fs_info,
2864 "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
2865 logical, length, em->start, em->start + em->len);
2866 free_extent_map(em);
2867 return ERR_PTR(-EINVAL);
2868 }
2869
2870
2871 return em;
2872}
2873
2874int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
2875 struct btrfs_fs_info *fs_info, u64 chunk_offset)
2876{
2877 struct extent_map *em;
2878 struct map_lookup *map;
2879 u64 dev_extent_len = 0;
2880 int i, ret = 0;
2881 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
2882
2883 em = get_chunk_map(fs_info, chunk_offset, 1);
2884 if (IS_ERR(em)) {
2885
2886
2887
2888
2889
2890 ASSERT(0);
2891 return PTR_ERR(em);
2892 }
2893 map = em->map_lookup;
2894 mutex_lock(&fs_info->chunk_mutex);
2895 check_system_chunk(trans, fs_info, map->type);
2896 mutex_unlock(&fs_info->chunk_mutex);
2897
2898
2899
2900
2901
2902
2903 mutex_lock(&fs_devices->device_list_mutex);
2904 for (i = 0; i < map->num_stripes; i++) {
2905 struct btrfs_device *device = map->stripes[i].dev;
2906 ret = btrfs_free_dev_extent(trans, device,
2907 map->stripes[i].physical,
2908 &dev_extent_len);
2909 if (ret) {
2910 mutex_unlock(&fs_devices->device_list_mutex);
2911 btrfs_abort_transaction(trans, ret);
2912 goto out;
2913 }
2914
2915 if (device->bytes_used > 0) {
2916 mutex_lock(&fs_info->chunk_mutex);
2917 btrfs_device_set_bytes_used(device,
2918 device->bytes_used - dev_extent_len);
2919 atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
2920 btrfs_clear_space_info_full(fs_info);
2921 mutex_unlock(&fs_info->chunk_mutex);
2922 }
2923
2924 if (map->stripes[i].dev) {
2925 ret = btrfs_update_device(trans, map->stripes[i].dev);
2926 if (ret) {
2927 mutex_unlock(&fs_devices->device_list_mutex);
2928 btrfs_abort_transaction(trans, ret);
2929 goto out;
2930 }
2931 }
2932 }
2933 mutex_unlock(&fs_devices->device_list_mutex);
2934
2935 ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
2936 if (ret) {
2937 btrfs_abort_transaction(trans, ret);
2938 goto out;
2939 }
2940
2941 trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
2942
2943 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2944 ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
2945 if (ret) {
2946 btrfs_abort_transaction(trans, ret);
2947 goto out;
2948 }
2949 }
2950
2951 ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em);
2952 if (ret) {
2953 btrfs_abort_transaction(trans, ret);
2954 goto out;
2955 }
2956
2957out:
2958
2959 free_extent_map(em);
2960 return ret;
2961}
2962
2963static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
2964{
2965 struct btrfs_root *root = fs_info->chunk_root;
2966 struct btrfs_trans_handle *trans;
2967 int ret;
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981 lockdep_assert_held(&fs_info->delete_unused_bgs_mutex);
2982
2983 ret = btrfs_can_relocate(fs_info, chunk_offset);
2984 if (ret)
2985 return -ENOSPC;
2986
2987
2988 btrfs_scrub_pause(fs_info);
2989 ret = btrfs_relocate_block_group(fs_info, chunk_offset);
2990 btrfs_scrub_continue(fs_info);
2991 if (ret)
2992 return ret;
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002 btrfs_add_raid_kobjects(fs_info);
3003
3004 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3005 chunk_offset);
3006 if (IS_ERR(trans)) {
3007 ret = PTR_ERR(trans);
3008 btrfs_handle_fs_error(root->fs_info, ret, NULL);
3009 return ret;
3010 }
3011
3012
3013
3014
3015
3016 ret = btrfs_remove_chunk(trans, fs_info, chunk_offset);
3017 btrfs_end_transaction(trans);
3018 return ret;
3019}
3020
3021static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
3022{
3023 struct btrfs_root *chunk_root = fs_info->chunk_root;
3024 struct btrfs_path *path;
3025 struct extent_buffer *leaf;
3026 struct btrfs_chunk *chunk;
3027 struct btrfs_key key;
3028 struct btrfs_key found_key;
3029 u64 chunk_type;
3030 bool retried = false;
3031 int failed = 0;
3032 int ret;
3033
3034 path = btrfs_alloc_path();
3035 if (!path)
3036 return -ENOMEM;
3037
3038again:
3039 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3040 key.offset = (u64)-1;
3041 key.type = BTRFS_CHUNK_ITEM_KEY;
3042
3043 while (1) {
3044 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3045 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3046 if (ret < 0) {
3047 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3048 goto error;
3049 }
3050 BUG_ON(ret == 0);
3051
3052 ret = btrfs_previous_item(chunk_root, path, key.objectid,
3053 key.type);
3054 if (ret)
3055 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3056 if (ret < 0)
3057 goto error;
3058 if (ret > 0)
3059 break;
3060
3061 leaf = path->nodes[0];
3062 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3063
3064 chunk = btrfs_item_ptr(leaf, path->slots[0],
3065 struct btrfs_chunk);
3066 chunk_type = btrfs_chunk_type(leaf, chunk);
3067 btrfs_release_path(path);
3068
3069 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
3070 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3071 if (ret == -ENOSPC)
3072 failed++;
3073 else
3074 BUG_ON(ret);
3075 }
3076 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3077
3078 if (found_key.offset == 0)
3079 break;
3080 key.offset = found_key.offset - 1;
3081 }
3082 ret = 0;
3083 if (failed && !retried) {
3084 failed = 0;
3085 retried = true;
3086 goto again;
3087 } else if (WARN_ON(failed && retried)) {
3088 ret = -ENOSPC;
3089 }
3090error:
3091 btrfs_free_path(path);
3092 return ret;
3093}
3094
3095
3096
3097
3098
3099
3100static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3101 u64 chunk_offset)
3102{
3103 struct btrfs_block_group_cache *cache;
3104 u64 bytes_used;
3105 u64 chunk_type;
3106
3107 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3108 ASSERT(cache);
3109 chunk_type = cache->flags;
3110 btrfs_put_block_group(cache);
3111
3112 if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
3113 spin_lock(&fs_info->data_sinfo->lock);
3114 bytes_used = fs_info->data_sinfo->bytes_used;
3115 spin_unlock(&fs_info->data_sinfo->lock);
3116
3117 if (!bytes_used) {
3118 struct btrfs_trans_handle *trans;
3119 int ret;
3120
3121 trans = btrfs_join_transaction(fs_info->tree_root);
3122 if (IS_ERR(trans))
3123 return PTR_ERR(trans);
3124
3125 ret = btrfs_force_chunk_alloc(trans, fs_info,
3126 BTRFS_BLOCK_GROUP_DATA);
3127 btrfs_end_transaction(trans);
3128 if (ret < 0)
3129 return ret;
3130
3131 btrfs_add_raid_kobjects(fs_info);
3132
3133 return 1;
3134 }
3135 }
3136 return 0;
3137}
3138
3139static int insert_balance_item(struct btrfs_fs_info *fs_info,
3140 struct btrfs_balance_control *bctl)
3141{
3142 struct btrfs_root *root = fs_info->tree_root;
3143 struct btrfs_trans_handle *trans;
3144 struct btrfs_balance_item *item;
3145 struct btrfs_disk_balance_args disk_bargs;
3146 struct btrfs_path *path;
3147 struct extent_buffer *leaf;
3148 struct btrfs_key key;
3149 int ret, err;
3150
3151 path = btrfs_alloc_path();
3152 if (!path)
3153 return -ENOMEM;
3154
3155 trans = btrfs_start_transaction(root, 0);
3156 if (IS_ERR(trans)) {
3157 btrfs_free_path(path);
3158 return PTR_ERR(trans);
3159 }
3160
3161 key.objectid = BTRFS_BALANCE_OBJECTID;
3162 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3163 key.offset = 0;
3164
3165 ret = btrfs_insert_empty_item(trans, root, path, &key,
3166 sizeof(*item));
3167 if (ret)
3168 goto out;
3169
3170 leaf = path->nodes[0];
3171 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
3172
3173 memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
3174
3175 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
3176 btrfs_set_balance_data(leaf, item, &disk_bargs);
3177 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
3178 btrfs_set_balance_meta(leaf, item, &disk_bargs);
3179 btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
3180 btrfs_set_balance_sys(leaf, item, &disk_bargs);
3181
3182 btrfs_set_balance_flags(leaf, item, bctl->flags);
3183
3184 btrfs_mark_buffer_dirty(leaf);
3185out:
3186 btrfs_free_path(path);
3187 err = btrfs_commit_transaction(trans);
3188 if (err && !ret)
3189 ret = err;
3190 return ret;
3191}
3192
3193static int del_balance_item(struct btrfs_fs_info *fs_info)
3194{
3195 struct btrfs_root *root = fs_info->tree_root;
3196 struct btrfs_trans_handle *trans;
3197 struct btrfs_path *path;
3198 struct btrfs_key key;
3199 int ret, err;
3200
3201 path = btrfs_alloc_path();
3202 if (!path)
3203 return -ENOMEM;
3204
3205 trans = btrfs_start_transaction(root, 0);
3206 if (IS_ERR(trans)) {
3207 btrfs_free_path(path);
3208 return PTR_ERR(trans);
3209 }
3210
3211 key.objectid = BTRFS_BALANCE_OBJECTID;
3212 key.type = BTRFS_TEMPORARY_ITEM_KEY;
3213 key.offset = 0;
3214
3215 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3216 if (ret < 0)
3217 goto out;
3218 if (ret > 0) {
3219 ret = -ENOENT;
3220 goto out;
3221 }
3222
3223 ret = btrfs_del_item(trans, root, path);
3224out:
3225 btrfs_free_path(path);
3226 err = btrfs_commit_transaction(trans);
3227 if (err && !ret)
3228 ret = err;
3229 return ret;
3230}
3231
3232
3233
3234
3235
3236static void update_balance_args(struct btrfs_balance_control *bctl)
3237{
3238
3239
3240
3241 if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
3242 bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
3243 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
3244 bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
3245 if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
3246 bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
3247
3248
3249
3250
3251
3252
3253
3254
3255 if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3256 !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3257 !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3258 bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
3259 bctl->data.usage = 90;
3260 }
3261 if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3262 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3263 !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3264 bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
3265 bctl->sys.usage = 90;
3266 }
3267 if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3268 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3269 !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
3270 bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
3271 bctl->meta.usage = 90;
3272 }
3273}
3274
3275
3276
3277
3278
3279
3280static void set_balance_control(struct btrfs_balance_control *bctl)
3281{
3282 struct btrfs_fs_info *fs_info = bctl->fs_info;
3283
3284 BUG_ON(fs_info->balance_ctl);
3285
3286 spin_lock(&fs_info->balance_lock);
3287 fs_info->balance_ctl = bctl;
3288 spin_unlock(&fs_info->balance_lock);
3289}
3290
3291static void unset_balance_control(struct btrfs_fs_info *fs_info)
3292{
3293 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3294
3295 BUG_ON(!fs_info->balance_ctl);
3296
3297 spin_lock(&fs_info->balance_lock);
3298 fs_info->balance_ctl = NULL;
3299 spin_unlock(&fs_info->balance_lock);
3300
3301 kfree(bctl);
3302}
3303
3304
3305
3306
3307
3308static int chunk_profiles_filter(u64 chunk_type,
3309 struct btrfs_balance_args *bargs)
3310{
3311 chunk_type = chunk_to_extended(chunk_type) &
3312 BTRFS_EXTENDED_PROFILE_MASK;
3313
3314 if (bargs->profiles & chunk_type)
3315 return 0;
3316
3317 return 1;
3318}
3319
3320static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
3321 struct btrfs_balance_args *bargs)
3322{
3323 struct btrfs_block_group_cache *cache;
3324 u64 chunk_used;
3325 u64 user_thresh_min;
3326 u64 user_thresh_max;
3327 int ret = 1;
3328
3329 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3330 chunk_used = btrfs_block_group_used(&cache->item);
3331
3332 if (bargs->usage_min == 0)
3333 user_thresh_min = 0;
3334 else
3335 user_thresh_min = div_factor_fine(cache->key.offset,
3336 bargs->usage_min);
3337
3338 if (bargs->usage_max == 0)
3339 user_thresh_max = 1;
3340 else if (bargs->usage_max > 100)
3341 user_thresh_max = cache->key.offset;
3342 else
3343 user_thresh_max = div_factor_fine(cache->key.offset,
3344 bargs->usage_max);
3345
3346 if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3347 ret = 0;
3348
3349 btrfs_put_block_group(cache);
3350 return ret;
3351}
3352
3353static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3354 u64 chunk_offset, struct btrfs_balance_args *bargs)
3355{
3356 struct btrfs_block_group_cache *cache;
3357 u64 chunk_used, user_thresh;
3358 int ret = 1;
3359
3360 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3361 chunk_used = btrfs_block_group_used(&cache->item);
3362
3363 if (bargs->usage_min == 0)
3364 user_thresh = 1;
3365 else if (bargs->usage > 100)
3366 user_thresh = cache->key.offset;
3367 else
3368 user_thresh = div_factor_fine(cache->key.offset,
3369 bargs->usage);
3370
3371 if (chunk_used < user_thresh)
3372 ret = 0;
3373
3374 btrfs_put_block_group(cache);
3375 return ret;
3376}
3377
3378static int chunk_devid_filter(struct extent_buffer *leaf,
3379 struct btrfs_chunk *chunk,
3380 struct btrfs_balance_args *bargs)
3381{
3382 struct btrfs_stripe *stripe;
3383 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3384 int i;
3385
3386 for (i = 0; i < num_stripes; i++) {
3387 stripe = btrfs_stripe_nr(chunk, i);
3388 if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3389 return 0;
3390 }
3391
3392 return 1;
3393}
3394
3395
3396static int chunk_drange_filter(struct extent_buffer *leaf,
3397 struct btrfs_chunk *chunk,
3398 struct btrfs_balance_args *bargs)
3399{
3400 struct btrfs_stripe *stripe;
3401 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3402 u64 stripe_offset;
3403 u64 stripe_length;
3404 int factor;
3405 int i;
3406
3407 if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
3408 return 0;
3409
3410 if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
3411 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) {
3412 factor = num_stripes / 2;
3413 } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) {
3414 factor = num_stripes - 1;
3415 } else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) {
3416 factor = num_stripes - 2;
3417 } else {
3418 factor = num_stripes;
3419 }
3420
3421 for (i = 0; i < num_stripes; i++) {
3422 stripe = btrfs_stripe_nr(chunk, i);
3423 if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
3424 continue;
3425
3426 stripe_offset = btrfs_stripe_offset(leaf, stripe);
3427 stripe_length = btrfs_chunk_length(leaf, chunk);
3428 stripe_length = div_u64(stripe_length, factor);
3429
3430 if (stripe_offset < bargs->pend &&
3431 stripe_offset + stripe_length > bargs->pstart)
3432 return 0;
3433 }
3434
3435 return 1;
3436}
3437
3438
3439static int chunk_vrange_filter(struct extent_buffer *leaf,
3440 struct btrfs_chunk *chunk,
3441 u64 chunk_offset,
3442 struct btrfs_balance_args *bargs)
3443{
3444 if (chunk_offset < bargs->vend &&
3445 chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3446
3447 return 0;
3448
3449 return 1;
3450}
3451
3452static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3453 struct btrfs_chunk *chunk,
3454 struct btrfs_balance_args *bargs)
3455{
3456 int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3457
3458 if (bargs->stripes_min <= num_stripes
3459 && num_stripes <= bargs->stripes_max)
3460 return 0;
3461
3462 return 1;
3463}
3464
3465static int chunk_soft_convert_filter(u64 chunk_type,
3466 struct btrfs_balance_args *bargs)
3467{
3468 if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3469 return 0;
3470
3471 chunk_type = chunk_to_extended(chunk_type) &
3472 BTRFS_EXTENDED_PROFILE_MASK;
3473
3474 if (bargs->target == chunk_type)
3475 return 1;
3476
3477 return 0;
3478}
3479
3480static int should_balance_chunk(struct btrfs_fs_info *fs_info,
3481 struct extent_buffer *leaf,
3482 struct btrfs_chunk *chunk, u64 chunk_offset)
3483{
3484 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3485 struct btrfs_balance_args *bargs = NULL;
3486 u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3487
3488
3489 if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3490 (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3491 return 0;
3492 }
3493
3494 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3495 bargs = &bctl->data;
3496 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3497 bargs = &bctl->sys;
3498 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3499 bargs = &bctl->meta;
3500
3501
3502 if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3503 chunk_profiles_filter(chunk_type, bargs)) {
3504 return 0;
3505 }
3506
3507
3508 if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
3509 chunk_usage_filter(fs_info, chunk_offset, bargs)) {
3510 return 0;
3511 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
3512 chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3513 return 0;
3514 }
3515
3516
3517 if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3518 chunk_devid_filter(leaf, chunk, bargs)) {
3519 return 0;
3520 }
3521
3522
3523 if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3524 chunk_drange_filter(leaf, chunk, bargs)) {
3525 return 0;
3526 }
3527
3528
3529 if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3530 chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3531 return 0;
3532 }
3533
3534
3535 if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3536 chunk_stripes_range_filter(leaf, chunk, bargs)) {
3537 return 0;
3538 }
3539
3540
3541 if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3542 chunk_soft_convert_filter(chunk_type, bargs)) {
3543 return 0;
3544 }
3545
3546
3547
3548
3549 if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
3550 if (bargs->limit == 0)
3551 return 0;
3552 else
3553 bargs->limit--;
3554 } else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
3555
3556
3557
3558
3559
3560 if (bargs->limit_max == 0)
3561 return 0;
3562 else
3563 bargs->limit_max--;
3564 }
3565
3566 return 1;
3567}
3568
3569static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3570{
3571 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3572 struct btrfs_root *chunk_root = fs_info->chunk_root;
3573 struct btrfs_root *dev_root = fs_info->dev_root;
3574 struct list_head *devices;
3575 struct btrfs_device *device;
3576 u64 old_size;
3577 u64 size_to_free;
3578 u64 chunk_type;
3579 struct btrfs_chunk *chunk;
3580 struct btrfs_path *path = NULL;
3581 struct btrfs_key key;
3582 struct btrfs_key found_key;
3583 struct btrfs_trans_handle *trans;
3584 struct extent_buffer *leaf;
3585 int slot;
3586 int ret;
3587 int enospc_errors = 0;
3588 bool counting = true;
3589
3590 u64 limit_data = bctl->data.limit;
3591 u64 limit_meta = bctl->meta.limit;
3592 u64 limit_sys = bctl->sys.limit;
3593 u32 count_data = 0;
3594 u32 count_meta = 0;
3595 u32 count_sys = 0;
3596 int chunk_reserved = 0;
3597
3598
3599 devices = &fs_info->fs_devices->devices;
3600 list_for_each_entry(device, devices, dev_list) {
3601 old_size = btrfs_device_get_total_bytes(device);
3602 size_to_free = div_factor(old_size, 1);
3603 size_to_free = min_t(u64, size_to_free, SZ_1M);
3604 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) ||
3605 btrfs_device_get_total_bytes(device) -
3606 btrfs_device_get_bytes_used(device) > size_to_free ||
3607 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
3608 continue;
3609
3610 ret = btrfs_shrink_device(device, old_size - size_to_free);
3611 if (ret == -ENOSPC)
3612 break;
3613 if (ret) {
3614
3615 WARN_ON(ret > 0);
3616 goto error;
3617 }
3618
3619 trans = btrfs_start_transaction(dev_root, 0);
3620 if (IS_ERR(trans)) {
3621 ret = PTR_ERR(trans);
3622 btrfs_info_in_rcu(fs_info,
3623 "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu",
3624 rcu_str_deref(device->name), ret,
3625 old_size, old_size - size_to_free);
3626 goto error;
3627 }
3628
3629 ret = btrfs_grow_device(trans, device, old_size);
3630 if (ret) {
3631 btrfs_end_transaction(trans);
3632
3633 WARN_ON(ret > 0);
3634 btrfs_info_in_rcu(fs_info,
3635 "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu",
3636 rcu_str_deref(device->name), ret,
3637 old_size, old_size - size_to_free);
3638 goto error;
3639 }
3640
3641 btrfs_end_transaction(trans);
3642 }
3643
3644
3645 path = btrfs_alloc_path();
3646 if (!path) {
3647 ret = -ENOMEM;
3648 goto error;
3649 }
3650
3651
3652 spin_lock(&fs_info->balance_lock);
3653 memset(&bctl->stat, 0, sizeof(bctl->stat));
3654 spin_unlock(&fs_info->balance_lock);
3655again:
3656 if (!counting) {
3657
3658
3659
3660
3661 bctl->data.limit = limit_data;
3662 bctl->meta.limit = limit_meta;
3663 bctl->sys.limit = limit_sys;
3664 }
3665 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3666 key.offset = (u64)-1;
3667 key.type = BTRFS_CHUNK_ITEM_KEY;
3668
3669 while (1) {
3670 if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3671 atomic_read(&fs_info->balance_cancel_req)) {
3672 ret = -ECANCELED;
3673 goto error;
3674 }
3675
3676 mutex_lock(&fs_info->delete_unused_bgs_mutex);
3677 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
3678 if (ret < 0) {
3679 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3680 goto error;
3681 }
3682
3683
3684
3685
3686
3687 if (ret == 0)
3688 BUG();
3689
3690 ret = btrfs_previous_item(chunk_root, path, 0,
3691 BTRFS_CHUNK_ITEM_KEY);
3692 if (ret) {
3693 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3694 ret = 0;
3695 break;
3696 }
3697
3698 leaf = path->nodes[0];
3699 slot = path->slots[0];
3700 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3701
3702 if (found_key.objectid != key.objectid) {
3703 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3704 break;
3705 }
3706
3707 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
3708 chunk_type = btrfs_chunk_type(leaf, chunk);
3709
3710 if (!counting) {
3711 spin_lock(&fs_info->balance_lock);
3712 bctl->stat.considered++;
3713 spin_unlock(&fs_info->balance_lock);
3714 }
3715
3716 ret = should_balance_chunk(fs_info, leaf, chunk,
3717 found_key.offset);
3718
3719 btrfs_release_path(path);
3720 if (!ret) {
3721 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3722 goto loop;
3723 }
3724
3725 if (counting) {
3726 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3727 spin_lock(&fs_info->balance_lock);
3728 bctl->stat.expected++;
3729 spin_unlock(&fs_info->balance_lock);
3730
3731 if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3732 count_data++;
3733 else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3734 count_sys++;
3735 else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3736 count_meta++;
3737
3738 goto loop;
3739 }
3740
3741
3742
3743
3744
3745 if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
3746 count_data < bctl->data.limit_min)
3747 || ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
3748 count_meta < bctl->meta.limit_min)
3749 || ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
3750 count_sys < bctl->sys.limit_min)) {
3751 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3752 goto loop;
3753 }
3754
3755 if (!chunk_reserved) {
3756
3757
3758
3759
3760
3761
3762 ret = btrfs_may_alloc_data_chunk(fs_info,
3763 found_key.offset);
3764 if (ret < 0) {
3765 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3766 goto error;
3767 } else if (ret == 1) {
3768 chunk_reserved = 1;
3769 }
3770 }
3771
3772 ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3773 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3774 if (ret && ret != -ENOSPC)
3775 goto error;
3776 if (ret == -ENOSPC) {
3777 enospc_errors++;
3778 } else {
3779 spin_lock(&fs_info->balance_lock);
3780 bctl->stat.completed++;
3781 spin_unlock(&fs_info->balance_lock);
3782 }
3783loop:
3784 if (found_key.offset == 0)
3785 break;
3786 key.offset = found_key.offset - 1;
3787 }
3788
3789 if (counting) {
3790 btrfs_release_path(path);
3791 counting = false;
3792 goto again;
3793 }
3794error:
3795 btrfs_free_path(path);
3796 if (enospc_errors) {
3797 btrfs_info(fs_info, "%d enospc errors during balance",
3798 enospc_errors);
3799 if (!ret)
3800 ret = -ENOSPC;
3801 }
3802
3803 return ret;
3804}
3805
3806
3807
3808
3809
3810
3811static int alloc_profile_is_valid(u64 flags, int extended)
3812{
3813 u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
3814 BTRFS_BLOCK_GROUP_PROFILE_MASK);
3815
3816 flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
3817
3818
3819 if (flags & ~mask)
3820 return 0;
3821
3822
3823 if (flags == 0)
3824 return !extended;
3825
3826
3827 return (flags & (flags - 1)) == 0;
3828}
3829
3830static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3831{
3832
3833 return atomic_read(&fs_info->balance_cancel_req) ||
3834 (atomic_read(&fs_info->balance_pause_req) == 0 &&
3835 atomic_read(&fs_info->balance_cancel_req) == 0);
3836}
3837
3838static void __cancel_balance(struct btrfs_fs_info *fs_info)
3839{
3840 int ret;
3841
3842 unset_balance_control(fs_info);
3843 ret = del_balance_item(fs_info);
3844 if (ret)
3845 btrfs_handle_fs_error(fs_info, ret, NULL);
3846
3847 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3848}
3849
3850
3851static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
3852 u64 allowed)
3853{
3854 return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3855 (!alloc_profile_is_valid(bctl_arg->target, 1) ||
3856 (bctl_arg->target & ~allowed)));
3857}
3858
3859
3860
3861
3862int btrfs_balance(struct btrfs_balance_control *bctl,
3863 struct btrfs_ioctl_balance_args *bargs)
3864{
3865 struct btrfs_fs_info *fs_info = bctl->fs_info;
3866 u64 meta_target, data_target;
3867 u64 allowed;
3868 int mixed = 0;
3869 int ret;
3870 u64 num_devices;
3871 unsigned seq;
3872
3873 if (btrfs_fs_closing(fs_info) ||
3874 atomic_read(&fs_info->balance_pause_req) ||
3875 atomic_read(&fs_info->balance_cancel_req)) {
3876 ret = -EINVAL;
3877 goto out;
3878 }
3879
3880 allowed = btrfs_super_incompat_flags(fs_info->super_copy);
3881 if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
3882 mixed = 1;
3883
3884
3885
3886
3887
3888 allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
3889 if (mixed && (bctl->flags & allowed)) {
3890 if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
3891 !(bctl->flags & BTRFS_BALANCE_METADATA) ||
3892 memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
3893 btrfs_err(fs_info,
3894 "with mixed groups data and metadata balance options must be the same");
3895 ret = -EINVAL;
3896 goto out;
3897 }
3898 }
3899
3900 num_devices = fs_info->fs_devices->num_devices;
3901 btrfs_dev_replace_read_lock(&fs_info->dev_replace);
3902 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
3903 BUG_ON(num_devices < 1);
3904 num_devices--;
3905 }
3906 btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
3907 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
3908 if (num_devices > 1)
3909 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
3910 if (num_devices > 2)
3911 allowed |= BTRFS_BLOCK_GROUP_RAID5;
3912 if (num_devices > 3)
3913 allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
3914 BTRFS_BLOCK_GROUP_RAID6);
3915 if (validate_convert_profile(&bctl->data, allowed)) {
3916 btrfs_err(fs_info,
3917 "unable to start balance with target data profile %llu",
3918 bctl->data.target);
3919 ret = -EINVAL;
3920 goto out;
3921 }
3922 if (validate_convert_profile(&bctl->meta, allowed)) {
3923 btrfs_err(fs_info,
3924 "unable to start balance with target metadata profile %llu",
3925 bctl->meta.target);
3926 ret = -EINVAL;
3927 goto out;
3928 }
3929 if (validate_convert_profile(&bctl->sys, allowed)) {
3930 btrfs_err(fs_info,
3931 "unable to start balance with target system profile %llu",
3932 bctl->sys.target);
3933 ret = -EINVAL;
3934 goto out;
3935 }
3936
3937
3938 allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3939 BTRFS_BLOCK_GROUP_RAID10 |
3940 BTRFS_BLOCK_GROUP_RAID5 |
3941 BTRFS_BLOCK_GROUP_RAID6;
3942 do {
3943 seq = read_seqbegin(&fs_info->profiles_lock);
3944
3945 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3946 (fs_info->avail_system_alloc_bits & allowed) &&
3947 !(bctl->sys.target & allowed)) ||
3948 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3949 (fs_info->avail_metadata_alloc_bits & allowed) &&
3950 !(bctl->meta.target & allowed))) {
3951 if (bctl->flags & BTRFS_BALANCE_FORCE) {
3952 btrfs_info(fs_info,
3953 "force reducing metadata integrity");
3954 } else {
3955 btrfs_err(fs_info,
3956 "balance will reduce metadata integrity, use force if you want this");
3957 ret = -EINVAL;
3958 goto out;
3959 }
3960 }
3961 } while (read_seqretry(&fs_info->profiles_lock, seq));
3962
3963
3964 meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
3965 bctl->meta.target : fs_info->avail_metadata_alloc_bits;
3966 data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
3967 bctl->data.target : fs_info->avail_data_alloc_bits;
3968 if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
3969 btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
3970 btrfs_warn(fs_info,
3971 "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
3972 meta_target, data_target);
3973 }
3974
3975 ret = insert_balance_item(fs_info, bctl);
3976 if (ret && ret != -EEXIST)
3977 goto out;
3978
3979 if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
3980 BUG_ON(ret == -EEXIST);
3981 set_balance_control(bctl);
3982 } else {
3983 BUG_ON(ret != -EEXIST);
3984 spin_lock(&fs_info->balance_lock);
3985 update_balance_args(bctl);
3986 spin_unlock(&fs_info->balance_lock);
3987 }
3988
3989 atomic_inc(&fs_info->balance_running);
3990 mutex_unlock(&fs_info->balance_mutex);
3991
3992 ret = __btrfs_balance(fs_info);
3993
3994 mutex_lock(&fs_info->balance_mutex);
3995 atomic_dec(&fs_info->balance_running);
3996
3997 if (bargs) {
3998 memset(bargs, 0, sizeof(*bargs));
3999 update_ioctl_balance_args(fs_info, 0, bargs);
4000 }
4001
4002 if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
4003 balance_need_close(fs_info)) {
4004 __cancel_balance(fs_info);
4005 }
4006
4007 wake_up(&fs_info->balance_wait_q);
4008
4009 return ret;
4010out:
4011 if (bctl->flags & BTRFS_BALANCE_RESUME)
4012 __cancel_balance(fs_info);
4013 else {
4014 kfree(bctl);
4015 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4016 }
4017 return ret;
4018}
4019
4020static int balance_kthread(void *data)
4021{
4022 struct btrfs_fs_info *fs_info = data;
4023 int ret = 0;
4024
4025 mutex_lock(&fs_info->volume_mutex);
4026 mutex_lock(&fs_info->balance_mutex);
4027
4028 if (fs_info->balance_ctl) {
4029 btrfs_info(fs_info, "continuing balance");
4030 ret = btrfs_balance(fs_info->balance_ctl, NULL);
4031 }
4032
4033 mutex_unlock(&fs_info->balance_mutex);
4034 mutex_unlock(&fs_info->volume_mutex);
4035
4036 return ret;
4037}
4038
4039int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
4040{
4041 struct task_struct *tsk;
4042
4043 spin_lock(&fs_info->balance_lock);
4044 if (!fs_info->balance_ctl) {
4045 spin_unlock(&fs_info->balance_lock);
4046 return 0;
4047 }
4048 spin_unlock(&fs_info->balance_lock);
4049
4050 if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4051 btrfs_info(fs_info, "force skipping balance");
4052 return 0;
4053 }
4054
4055
4056
4057
4058
4059
4060 spin_lock(&fs_info->balance_lock);
4061 fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
4062 spin_unlock(&fs_info->balance_lock);
4063
4064 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4065 return PTR_ERR_OR_ZERO(tsk);
4066}
4067
4068int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
4069{
4070 struct btrfs_balance_control *bctl;
4071 struct btrfs_balance_item *item;
4072 struct btrfs_disk_balance_args disk_bargs;
4073 struct btrfs_path *path;
4074 struct extent_buffer *leaf;
4075 struct btrfs_key key;
4076 int ret;
4077
4078 path = btrfs_alloc_path();
4079 if (!path)
4080 return -ENOMEM;
4081
4082 key.objectid = BTRFS_BALANCE_OBJECTID;
4083 key.type = BTRFS_TEMPORARY_ITEM_KEY;
4084 key.offset = 0;
4085
4086 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4087 if (ret < 0)
4088 goto out;
4089 if (ret > 0) {
4090 ret = 0;
4091 goto out;
4092 }
4093
4094 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
4095 if (!bctl) {
4096 ret = -ENOMEM;
4097 goto out;
4098 }
4099
4100 leaf = path->nodes[0];
4101 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
4102
4103 bctl->fs_info = fs_info;
4104 bctl->flags = btrfs_balance_flags(leaf, item);
4105 bctl->flags |= BTRFS_BALANCE_RESUME;
4106
4107 btrfs_balance_data(leaf, item, &disk_bargs);
4108 btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
4109 btrfs_balance_meta(leaf, item, &disk_bargs);
4110 btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
4111 btrfs_balance_sys(leaf, item, &disk_bargs);
4112 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
4113
4114 WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
4115
4116 mutex_lock(&fs_info->volume_mutex);
4117 mutex_lock(&fs_info->balance_mutex);
4118
4119 set_balance_control(bctl);
4120
4121 mutex_unlock(&fs_info->balance_mutex);
4122 mutex_unlock(&fs_info->volume_mutex);
4123out:
4124 btrfs_free_path(path);
4125 return ret;
4126}
4127
4128int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4129{
4130 int ret = 0;
4131
4132 mutex_lock(&fs_info->balance_mutex);
4133 if (!fs_info->balance_ctl) {
4134 mutex_unlock(&fs_info->balance_mutex);
4135 return -ENOTCONN;
4136 }
4137
4138 if (atomic_read(&fs_info->balance_running)) {
4139 atomic_inc(&fs_info->balance_pause_req);
4140 mutex_unlock(&fs_info->balance_mutex);
4141
4142 wait_event(fs_info->balance_wait_q,
4143 atomic_read(&fs_info->balance_running) == 0);
4144
4145 mutex_lock(&fs_info->balance_mutex);
4146
4147 BUG_ON(atomic_read(&fs_info->balance_running));
4148 atomic_dec(&fs_info->balance_pause_req);
4149 } else {
4150 ret = -ENOTCONN;
4151 }
4152
4153 mutex_unlock(&fs_info->balance_mutex);
4154 return ret;
4155}
4156
4157int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4158{
4159 if (sb_rdonly(fs_info->sb))
4160 return -EROFS;
4161
4162 mutex_lock(&fs_info->balance_mutex);
4163 if (!fs_info->balance_ctl) {
4164 mutex_unlock(&fs_info->balance_mutex);
4165 return -ENOTCONN;
4166 }
4167
4168 atomic_inc(&fs_info->balance_cancel_req);
4169
4170
4171
4172
4173 if (atomic_read(&fs_info->balance_running)) {
4174 mutex_unlock(&fs_info->balance_mutex);
4175 wait_event(fs_info->balance_wait_q,
4176 atomic_read(&fs_info->balance_running) == 0);
4177 mutex_lock(&fs_info->balance_mutex);
4178 } else {
4179
4180 mutex_unlock(&fs_info->balance_mutex);
4181 mutex_lock(&fs_info->volume_mutex);
4182 mutex_lock(&fs_info->balance_mutex);
4183
4184 if (fs_info->balance_ctl)
4185 __cancel_balance(fs_info);
4186
4187 mutex_unlock(&fs_info->volume_mutex);
4188 }
4189
4190 BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
4191 atomic_dec(&fs_info->balance_cancel_req);
4192 mutex_unlock(&fs_info->balance_mutex);
4193 return 0;
4194}
4195
4196static int btrfs_uuid_scan_kthread(void *data)
4197{
4198 struct btrfs_fs_info *fs_info = data;
4199 struct btrfs_root *root = fs_info->tree_root;
4200 struct btrfs_key key;
4201 struct btrfs_path *path = NULL;
4202 int ret = 0;
4203 struct extent_buffer *eb;
4204 int slot;
4205 struct btrfs_root_item root_item;
4206 u32 item_size;
4207 struct btrfs_trans_handle *trans = NULL;
4208
4209 path = btrfs_alloc_path();
4210 if (!path) {
4211 ret = -ENOMEM;
4212 goto out;
4213 }
4214
4215 key.objectid = 0;
4216 key.type = BTRFS_ROOT_ITEM_KEY;
4217 key.offset = 0;
4218
4219 while (1) {
4220 ret = btrfs_search_forward(root, &key, path,
4221 BTRFS_OLDEST_GENERATION);
4222 if (ret) {
4223 if (ret > 0)
4224 ret = 0;
4225 break;
4226 }
4227
4228 if (key.type != BTRFS_ROOT_ITEM_KEY ||
4229 (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4230 key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4231 key.objectid > BTRFS_LAST_FREE_OBJECTID)
4232 goto skip;
4233
4234 eb = path->nodes[0];
4235 slot = path->slots[0];
4236 item_size = btrfs_item_size_nr(eb, slot);
4237 if (item_size < sizeof(root_item))
4238 goto skip;
4239
4240 read_extent_buffer(eb, &root_item,
4241 btrfs_item_ptr_offset(eb, slot),
4242 (int)sizeof(root_item));
4243 if (btrfs_root_refs(&root_item) == 0)
4244 goto skip;
4245
4246 if (!btrfs_is_empty_uuid(root_item.uuid) ||
4247 !btrfs_is_empty_uuid(root_item.received_uuid)) {
4248 if (trans)
4249 goto update_tree;
4250
4251 btrfs_release_path(path);
4252
4253
4254
4255
4256 trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4257 if (IS_ERR(trans)) {
4258 ret = PTR_ERR(trans);
4259 break;
4260 }
4261 continue;
4262 } else {
4263 goto skip;
4264 }
4265update_tree:
4266 if (!btrfs_is_empty_uuid(root_item.uuid)) {
4267 ret = btrfs_uuid_tree_add(trans, fs_info,
4268 root_item.uuid,
4269 BTRFS_UUID_KEY_SUBVOL,
4270 key.objectid);
4271 if (ret < 0) {
4272 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4273 ret);
4274 break;
4275 }
4276 }
4277
4278 if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
4279 ret = btrfs_uuid_tree_add(trans, fs_info,
4280 root_item.received_uuid,
4281 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4282 key.objectid);
4283 if (ret < 0) {
4284 btrfs_warn(fs_info, "uuid_tree_add failed %d",
4285 ret);
4286 break;
4287 }
4288 }
4289
4290skip:
4291 if (trans) {
4292 ret = btrfs_end_transaction(trans);
4293 trans = NULL;
4294 if (ret)
4295 break;
4296 }
4297
4298 btrfs_release_path(path);
4299 if (key.offset < (u64)-1) {
4300 key.offset++;
4301 } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4302 key.offset = 0;
4303 key.type = BTRFS_ROOT_ITEM_KEY;
4304 } else if (key.objectid < (u64)-1) {
4305 key.offset = 0;
4306 key.type = BTRFS_ROOT_ITEM_KEY;
4307 key.objectid++;
4308 } else {
4309 break;
4310 }
4311 cond_resched();
4312 }
4313
4314out:
4315 btrfs_free_path(path);
4316 if (trans && !IS_ERR(trans))
4317 btrfs_end_transaction(trans);
4318 if (ret)
4319 btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
4320 else
4321 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4322 up(&fs_info->uuid_tree_rescan_sem);
4323 return 0;
4324}
4325
4326
4327
4328
4329
4330
4331
4332
4333static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
4334 u8 *uuid, u8 type, u64 subid)
4335{
4336 struct btrfs_key key;
4337 int ret = 0;
4338 struct btrfs_root *subvol_root;
4339
4340 if (type != BTRFS_UUID_KEY_SUBVOL &&
4341 type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
4342 goto out;
4343
4344 key.objectid = subid;
4345 key.type = BTRFS_ROOT_ITEM_KEY;
4346 key.offset = (u64)-1;
4347 subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
4348 if (IS_ERR(subvol_root)) {
4349 ret = PTR_ERR(subvol_root);
4350 if (ret == -ENOENT)
4351 ret = 1;
4352 goto out;
4353 }
4354
4355 switch (type) {
4356 case BTRFS_UUID_KEY_SUBVOL:
4357 if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
4358 ret = 1;
4359 break;
4360 case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
4361 if (memcmp(uuid, subvol_root->root_item.received_uuid,
4362 BTRFS_UUID_SIZE))
4363 ret = 1;
4364 break;
4365 }
4366
4367out:
4368 return ret;
4369}
4370
4371static int btrfs_uuid_rescan_kthread(void *data)
4372{
4373 struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
4374 int ret;
4375
4376
4377
4378
4379
4380
4381 ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
4382 if (ret < 0) {
4383 btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
4384 up(&fs_info->uuid_tree_rescan_sem);
4385 return ret;
4386 }
4387 return btrfs_uuid_scan_kthread(data);
4388}
4389
4390int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4391{
4392 struct btrfs_trans_handle *trans;
4393 struct btrfs_root *tree_root = fs_info->tree_root;
4394 struct btrfs_root *uuid_root;
4395 struct task_struct *task;
4396 int ret;
4397
4398
4399
4400
4401
4402 trans = btrfs_start_transaction(tree_root, 2);
4403 if (IS_ERR(trans))
4404 return PTR_ERR(trans);
4405
4406 uuid_root = btrfs_create_tree(trans, fs_info,
4407 BTRFS_UUID_TREE_OBJECTID);
4408 if (IS_ERR(uuid_root)) {
4409 ret = PTR_ERR(uuid_root);
4410 btrfs_abort_transaction(trans, ret);
4411 btrfs_end_transaction(trans);
4412 return ret;
4413 }
4414
4415 fs_info->uuid_root = uuid_root;
4416
4417 ret = btrfs_commit_transaction(trans);
4418 if (ret)
4419 return ret;
4420
4421 down(&fs_info->uuid_tree_rescan_sem);
4422 task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4423 if (IS_ERR(task)) {
4424
4425 btrfs_warn(fs_info, "failed to start uuid_scan task");
4426 up(&fs_info->uuid_tree_rescan_sem);
4427 return PTR_ERR(task);
4428 }
4429
4430 return 0;
4431}
4432
4433int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
4434{
4435 struct task_struct *task;
4436
4437 down(&fs_info->uuid_tree_rescan_sem);
4438 task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
4439 if (IS_ERR(task)) {
4440
4441 btrfs_warn(fs_info, "failed to start uuid_rescan task");
4442 up(&fs_info->uuid_tree_rescan_sem);
4443 return PTR_ERR(task);
4444 }
4445
4446 return 0;
4447}
4448
4449
4450
4451
4452
4453
4454int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
4455{
4456 struct btrfs_fs_info *fs_info = device->fs_info;
4457 struct btrfs_root *root = fs_info->dev_root;
4458 struct btrfs_trans_handle *trans;
4459 struct btrfs_dev_extent *dev_extent = NULL;
4460 struct btrfs_path *path;
4461 u64 length;
4462 u64 chunk_offset;
4463 int ret;
4464 int slot;
4465 int failed = 0;
4466 bool retried = false;
4467 bool checked_pending_chunks = false;
4468 struct extent_buffer *l;
4469 struct btrfs_key key;
4470 struct btrfs_super_block *super_copy = fs_info->super_copy;
4471 u64 old_total = btrfs_super_total_bytes(super_copy);
4472 u64 old_size = btrfs_device_get_total_bytes(device);
4473 u64 diff;
4474
4475 new_size = round_down(new_size, fs_info->sectorsize);
4476 diff = round_down(old_size - new_size, fs_info->sectorsize);
4477
4478 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4479 return -EINVAL;
4480
4481 path = btrfs_alloc_path();
4482 if (!path)
4483 return -ENOMEM;
4484
4485 path->reada = READA_FORWARD;
4486
4487 mutex_lock(&fs_info->chunk_mutex);
4488
4489 btrfs_device_set_total_bytes(device, new_size);
4490 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4491 device->fs_devices->total_rw_bytes -= diff;
4492 atomic64_sub(diff, &fs_info->free_chunk_space);
4493 }
4494 mutex_unlock(&fs_info->chunk_mutex);
4495
4496again:
4497 key.objectid = device->devid;
4498 key.offset = (u64)-1;
4499 key.type = BTRFS_DEV_EXTENT_KEY;
4500
4501 do {
4502 mutex_lock(&fs_info->delete_unused_bgs_mutex);
4503 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4504 if (ret < 0) {
4505 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4506 goto done;
4507 }
4508
4509 ret = btrfs_previous_item(root, path, 0, key.type);
4510 if (ret)
4511 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4512 if (ret < 0)
4513 goto done;
4514 if (ret) {
4515 ret = 0;
4516 btrfs_release_path(path);
4517 break;
4518 }
4519
4520 l = path->nodes[0];
4521 slot = path->slots[0];
4522 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
4523
4524 if (key.objectid != device->devid) {
4525 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4526 btrfs_release_path(path);
4527 break;
4528 }
4529
4530 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
4531 length = btrfs_dev_extent_length(l, dev_extent);
4532
4533 if (key.offset + length <= new_size) {
4534 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4535 btrfs_release_path(path);
4536 break;
4537 }
4538
4539 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4540 btrfs_release_path(path);
4541
4542
4543
4544
4545
4546
4547
4548 ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4549 if (ret < 0) {
4550 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4551 goto done;
4552 }
4553
4554 ret = btrfs_relocate_chunk(fs_info, chunk_offset);
4555 mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4556 if (ret && ret != -ENOSPC)
4557 goto done;
4558 if (ret == -ENOSPC)
4559 failed++;
4560 } while (key.offset-- > 0);
4561
4562 if (failed && !retried) {
4563 failed = 0;
4564 retried = true;
4565 goto again;
4566 } else if (failed && retried) {
4567 ret = -ENOSPC;
4568 goto done;
4569 }
4570
4571
4572 trans = btrfs_start_transaction(root, 0);
4573 if (IS_ERR(trans)) {
4574 ret = PTR_ERR(trans);
4575 goto done;
4576 }
4577
4578 mutex_lock(&fs_info->chunk_mutex);
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592 if (!checked_pending_chunks) {
4593 u64 start = new_size;
4594 u64 len = old_size - new_size;
4595
4596 if (contains_pending_extent(trans->transaction, device,
4597 &start, len)) {
4598 mutex_unlock(&fs_info->chunk_mutex);
4599 checked_pending_chunks = true;
4600 failed = 0;
4601 retried = false;
4602 ret = btrfs_commit_transaction(trans);
4603 if (ret)
4604 goto done;
4605 goto again;
4606 }
4607 }
4608
4609 btrfs_device_set_disk_total_bytes(device, new_size);
4610 if (list_empty(&device->resized_list))
4611 list_add_tail(&device->resized_list,
4612 &fs_info->fs_devices->resized_devices);
4613
4614 WARN_ON(diff > old_total);
4615 btrfs_set_super_total_bytes(super_copy,
4616 round_down(old_total - diff, fs_info->sectorsize));
4617 mutex_unlock(&fs_info->chunk_mutex);
4618
4619
4620 ret = btrfs_update_device(trans, device);
4621 btrfs_end_transaction(trans);
4622done:
4623 btrfs_free_path(path);
4624 if (ret) {
4625 mutex_lock(&fs_info->chunk_mutex);
4626 btrfs_device_set_total_bytes(device, old_size);
4627 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
4628 device->fs_devices->total_rw_bytes += diff;
4629 atomic64_add(diff, &fs_info->free_chunk_space);
4630 mutex_unlock(&fs_info->chunk_mutex);
4631 }
4632 return ret;
4633}
4634
4635static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
4636 struct btrfs_key *key,
4637 struct btrfs_chunk *chunk, int item_size)
4638{
4639 struct btrfs_super_block *super_copy = fs_info->super_copy;
4640 struct btrfs_disk_key disk_key;
4641 u32 array_size;
4642 u8 *ptr;
4643
4644 mutex_lock(&fs_info->chunk_mutex);
4645 array_size = btrfs_super_sys_array_size(super_copy);
4646 if (array_size + item_size + sizeof(disk_key)
4647 > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
4648 mutex_unlock(&fs_info->chunk_mutex);
4649 return -EFBIG;
4650 }
4651
4652 ptr = super_copy->sys_chunk_array + array_size;
4653 btrfs_cpu_key_to_disk(&disk_key, key);
4654 memcpy(ptr, &disk_key, sizeof(disk_key));
4655 ptr += sizeof(disk_key);
4656 memcpy(ptr, chunk, item_size);
4657 item_size += sizeof(disk_key);
4658 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
4659 mutex_unlock(&fs_info->chunk_mutex);
4660
4661 return 0;
4662}
4663
4664
4665
4666
4667static int btrfs_cmp_device_info(const void *a, const void *b)
4668{
4669 const struct btrfs_device_info *di_a = a;
4670 const struct btrfs_device_info *di_b = b;
4671
4672 if (di_a->max_avail > di_b->max_avail)
4673 return -1;
4674 if (di_a->max_avail < di_b->max_avail)
4675 return 1;
4676 if (di_a->total_avail > di_b->total_avail)
4677 return -1;
4678 if (di_a->total_avail < di_b->total_avail)
4679 return 1;
4680 return 0;
4681}
4682
4683static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
4684{
4685 if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
4686 return;
4687
4688 btrfs_set_fs_incompat(info, RAID56);
4689}
4690
4691#define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
4692 - sizeof(struct btrfs_chunk)) \
4693 / sizeof(struct btrfs_stripe) + 1)
4694
4695#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
4696 - 2 * sizeof(struct btrfs_disk_key) \
4697 - 2 * sizeof(struct btrfs_chunk)) \
4698 / sizeof(struct btrfs_stripe) + 1)
4699
4700static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
4701 u64 start, u64 type)
4702{
4703 struct btrfs_fs_info *info = trans->fs_info;
4704 struct btrfs_fs_devices *fs_devices = info->fs_devices;
4705 struct btrfs_device *device;
4706 struct map_lookup *map = NULL;
4707 struct extent_map_tree *em_tree;
4708 struct extent_map *em;
4709 struct btrfs_device_info *devices_info = NULL;
4710 u64 total_avail;
4711 int num_stripes;
4712 int data_stripes;
4713
4714 int sub_stripes;
4715 int dev_stripes;
4716 int devs_max;
4717 int devs_min;
4718 int devs_increment;
4719 int ncopies;
4720 int ret;
4721 u64 max_stripe_size;
4722 u64 max_chunk_size;
4723 u64 stripe_size;
4724 u64 num_bytes;
4725 int ndevs;
4726 int i;
4727 int j;
4728 int index;
4729
4730 BUG_ON(!alloc_profile_is_valid(type, 0));
4731
4732 if (list_empty(&fs_devices->alloc_list)) {
4733 if (btrfs_test_opt(info, ENOSPC_DEBUG))
4734 btrfs_debug(info, "%s: no writable device", __func__);
4735 return -ENOSPC;
4736 }
4737
4738 index = btrfs_bg_flags_to_raid_index(type);
4739
4740 sub_stripes = btrfs_raid_array[index].sub_stripes;
4741 dev_stripes = btrfs_raid_array[index].dev_stripes;
4742 devs_max = btrfs_raid_array[index].devs_max;
4743 devs_min = btrfs_raid_array[index].devs_min;
4744 devs_increment = btrfs_raid_array[index].devs_increment;
4745 ncopies = btrfs_raid_array[index].ncopies;
4746
4747 if (type & BTRFS_BLOCK_GROUP_DATA) {
4748 max_stripe_size = SZ_1G;
4749 max_chunk_size = 10 * max_stripe_size;
4750 if (!devs_max)
4751 devs_max = BTRFS_MAX_DEVS(info);
4752 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
4753
4754 if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
4755 max_stripe_size = SZ_1G;
4756 else
4757 max_stripe_size = SZ_256M;
4758 max_chunk_size = max_stripe_size;
4759 if (!devs_max)
4760 devs_max = BTRFS_MAX_DEVS(info);
4761 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
4762 max_stripe_size = SZ_32M;
4763 max_chunk_size = 2 * max_stripe_size;
4764 if (!devs_max)
4765 devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
4766 } else {
4767 btrfs_err(info, "invalid chunk type 0x%llx requested",
4768 type);
4769 BUG_ON(1);
4770 }
4771
4772
4773 max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
4774 max_chunk_size);
4775
4776 devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
4777 GFP_NOFS);
4778 if (!devices_info)
4779 return -ENOMEM;
4780
4781
4782
4783
4784
4785 ndevs = 0;
4786 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
4787 u64 max_avail;
4788 u64 dev_offset;
4789
4790 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
4791 WARN(1, KERN_ERR
4792 "BTRFS: read-only device in alloc_list\n");
4793 continue;
4794 }
4795
4796 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
4797 &device->dev_state) ||
4798 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
4799 continue;
4800
4801 if (device->total_bytes > device->bytes_used)
4802 total_avail = device->total_bytes - device->bytes_used;
4803 else
4804 total_avail = 0;
4805
4806
4807 if (total_avail == 0)
4808 continue;
4809
4810 ret = find_free_dev_extent(trans, device,
4811 max_stripe_size * dev_stripes,
4812 &dev_offset, &max_avail);
4813 if (ret && ret != -ENOSPC)
4814 goto error;
4815
4816 if (ret == 0)
4817 max_avail = max_stripe_size * dev_stripes;
4818
4819 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) {
4820 if (btrfs_test_opt(info, ENOSPC_DEBUG))
4821 btrfs_debug(info,
4822 "%s: devid %llu has no free space, have=%llu want=%u",
4823 __func__, device->devid, max_avail,
4824 BTRFS_STRIPE_LEN * dev_stripes);
4825 continue;
4826 }
4827
4828 if (ndevs == fs_devices->rw_devices) {
4829 WARN(1, "%s: found more than %llu devices\n",
4830 __func__, fs_devices->rw_devices);
4831 break;
4832 }
4833 devices_info[ndevs].dev_offset = dev_offset;
4834 devices_info[ndevs].max_avail = max_avail;
4835 devices_info[ndevs].total_avail = total_avail;
4836 devices_info[ndevs].dev = device;
4837 ++ndevs;
4838 }
4839
4840
4841
4842
4843 sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
4844 btrfs_cmp_device_info, NULL);
4845
4846
4847 ndevs = round_down(ndevs, devs_increment);
4848
4849 if (ndevs < devs_min) {
4850 ret = -ENOSPC;
4851 if (btrfs_test_opt(info, ENOSPC_DEBUG)) {
4852 btrfs_debug(info,
4853 "%s: not enough devices with free space: have=%d minimum required=%d",
4854 __func__, ndevs, devs_min);
4855 }
4856 goto error;
4857 }
4858
4859 ndevs = min(ndevs, devs_max);
4860
4861
4862
4863
4864
4865
4866
4867
4868 stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes);
4869 num_stripes = ndevs * dev_stripes;
4870
4871
4872
4873
4874
4875 data_stripes = num_stripes / ncopies;
4876
4877 if (type & BTRFS_BLOCK_GROUP_RAID5)
4878 data_stripes = num_stripes - 1;
4879
4880 if (type & BTRFS_BLOCK_GROUP_RAID6)
4881 data_stripes = num_stripes - 2;
4882
4883
4884
4885
4886
4887
4888 if (stripe_size * data_stripes > max_chunk_size) {
4889 stripe_size = div_u64(max_chunk_size, data_stripes);
4890
4891
4892 stripe_size = round_up(stripe_size, SZ_16M);
4893
4894
4895
4896
4897
4898 stripe_size = min(devices_info[ndevs - 1].max_avail,
4899 stripe_size);
4900 }
4901
4902
4903 stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
4904
4905 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
4906 if (!map) {
4907 ret = -ENOMEM;
4908 goto error;
4909 }
4910 map->num_stripes = num_stripes;
4911
4912 for (i = 0; i < ndevs; ++i) {
4913 for (j = 0; j < dev_stripes; ++j) {
4914 int s = i * dev_stripes + j;
4915 map->stripes[s].dev = devices_info[i].dev;
4916 map->stripes[s].physical = devices_info[i].dev_offset +
4917 j * stripe_size;
4918 }
4919 }
4920 map->stripe_len = BTRFS_STRIPE_LEN;
4921 map->io_align = BTRFS_STRIPE_LEN;
4922 map->io_width = BTRFS_STRIPE_LEN;
4923 map->type = type;
4924 map->sub_stripes = sub_stripes;
4925
4926 num_bytes = stripe_size * data_stripes;
4927
4928 trace_btrfs_chunk_alloc(info, map, start, num_bytes);
4929
4930 em = alloc_extent_map();
4931 if (!em) {
4932 kfree(map);
4933 ret = -ENOMEM;
4934 goto error;
4935 }
4936 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
4937 em->map_lookup = map;
4938 em->start = start;
4939 em->len = num_bytes;
4940 em->block_start = 0;
4941 em->block_len = em->len;
4942 em->orig_block_len = stripe_size;
4943
4944 em_tree = &info->mapping_tree.map_tree;
4945 write_lock(&em_tree->lock);
4946 ret = add_extent_mapping(em_tree, em, 0);
4947 if (ret) {
4948 write_unlock(&em_tree->lock);
4949 free_extent_map(em);
4950 goto error;
4951 }
4952
4953 list_add_tail(&em->list, &trans->transaction->pending_chunks);
4954 refcount_inc(&em->refs);
4955 write_unlock(&em_tree->lock);
4956
4957 ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
4958 if (ret)
4959 goto error_del_extent;
4960
4961 for (i = 0; i < map->num_stripes; i++) {
4962 num_bytes = map->stripes[i].dev->bytes_used + stripe_size;
4963 btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes);
4964 }
4965
4966 atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
4967
4968 free_extent_map(em);
4969 check_raid56_incompat_flag(info, type);
4970
4971 kfree(devices_info);
4972 return 0;
4973
4974error_del_extent:
4975 write_lock(&em_tree->lock);
4976 remove_extent_mapping(em_tree, em);
4977 write_unlock(&em_tree->lock);
4978
4979
4980 free_extent_map(em);
4981
4982 free_extent_map(em);
4983
4984 free_extent_map(em);
4985error:
4986 kfree(devices_info);
4987 return ret;
4988}
4989
4990int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
4991 struct btrfs_fs_info *fs_info,
4992 u64 chunk_offset, u64 chunk_size)
4993{
4994 struct btrfs_root *extent_root = fs_info->extent_root;
4995 struct btrfs_root *chunk_root = fs_info->chunk_root;
4996 struct btrfs_key key;
4997 struct btrfs_device *device;
4998 struct btrfs_chunk *chunk;
4999 struct btrfs_stripe *stripe;
5000 struct extent_map *em;
5001 struct map_lookup *map;
5002 size_t item_size;
5003 u64 dev_offset;
5004 u64 stripe_size;
5005 int i = 0;
5006 int ret = 0;
5007
5008 em = get_chunk_map(fs_info, chunk_offset, chunk_size);
5009 if (IS_ERR(em))
5010 return PTR_ERR(em);
5011
5012 map = em->map_lookup;
5013 item_size = btrfs_chunk_item_size(map->num_stripes);
5014 stripe_size = em->orig_block_len;
5015
5016 chunk = kzalloc(item_size, GFP_NOFS);
5017 if (!chunk) {
5018 ret = -ENOMEM;
5019 goto out;
5020 }
5021
5022
5023
5024
5025
5026
5027
5028
5029 mutex_lock(&fs_info->fs_devices->device_list_mutex);
5030 for (i = 0; i < map->num_stripes; i++) {
5031 device = map->stripes[i].dev;
5032 dev_offset = map->stripes[i].physical;
5033
5034 ret = btrfs_update_device(trans, device);
5035 if (ret)
5036 break;
5037 ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5038 dev_offset, stripe_size);
5039 if (ret)
5040 break;
5041 }
5042 if (ret) {
5043 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5044 goto out;
5045 }
5046
5047 stripe = &chunk->stripe;
5048 for (i = 0; i < map->num_stripes; i++) {
5049 device = map->stripes[i].dev;
5050 dev_offset = map->stripes[i].physical;
5051
5052 btrfs_set_stack_stripe_devid(stripe, device->devid);
5053 btrfs_set_stack_stripe_offset(stripe, dev_offset);
5054 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
5055 stripe++;
5056 }
5057 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
5058
5059 btrfs_set_stack_chunk_length(chunk, chunk_size);
5060 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
5061 btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
5062 btrfs_set_stack_chunk_type(chunk, map->type);
5063 btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
5064 btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
5065 btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
5066 btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
5067 btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
5068
5069 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
5070 key.type = BTRFS_CHUNK_ITEM_KEY;
5071 key.offset = chunk_offset;
5072
5073 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
5074 if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
5075
5076
5077
5078
5079 ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
5080 }
5081
5082out:
5083 kfree(chunk);
5084 free_extent_map(em);
5085 return ret;
5086}
5087
5088
5089
5090
5091
5092
5093
5094
5095int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
5096 struct btrfs_fs_info *fs_info, u64 type)
5097{
5098 u64 chunk_offset;
5099
5100 lockdep_assert_held(&fs_info->chunk_mutex);
5101 chunk_offset = find_next_chunk(fs_info);
5102 return __btrfs_alloc_chunk(trans, chunk_offset, type);
5103}
5104
5105static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
5106 struct btrfs_fs_info *fs_info)
5107{
5108 u64 chunk_offset;
5109 u64 sys_chunk_offset;
5110 u64 alloc_profile;
5111 int ret;
5112
5113 chunk_offset = find_next_chunk(fs_info);
5114 alloc_profile = btrfs_metadata_alloc_profile(fs_info);
5115 ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
5116 if (ret)
5117 return ret;
5118
5119 sys_chunk_offset = find_next_chunk(fs_info);
5120 alloc_profile = btrfs_system_alloc_profile(fs_info);
5121 ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
5122 return ret;
5123}
5124
5125static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5126{
5127 int max_errors;
5128
5129 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
5130 BTRFS_BLOCK_GROUP_RAID10 |
5131 BTRFS_BLOCK_GROUP_RAID5 |
5132 BTRFS_BLOCK_GROUP_DUP)) {
5133 max_errors = 1;
5134 } else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
5135 max_errors = 2;
5136 } else {
5137 max_errors = 0;
5138 }
5139
5140 return max_errors;
5141}
5142
5143int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
5144{
5145 struct extent_map *em;
5146 struct map_lookup *map;
5147 int readonly = 0;
5148 int miss_ndevs = 0;
5149 int i;
5150
5151 em = get_chunk_map(fs_info, chunk_offset, 1);
5152 if (IS_ERR(em))
5153 return 1;
5154
5155 map = em->map_lookup;
5156 for (i = 0; i < map->num_stripes; i++) {
5157 if (test_bit(BTRFS_DEV_STATE_MISSING,
5158 &map->stripes[i].dev->dev_state)) {
5159 miss_ndevs++;
5160 continue;
5161 }
5162 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5163 &map->stripes[i].dev->dev_state)) {
5164 readonly = 1;
5165 goto end;
5166 }
5167 }
5168
5169
5170
5171
5172
5173
5174 if (miss_ndevs > btrfs_chunk_max_errors(map))
5175 readonly = 1;
5176end:
5177 free_extent_map(em);
5178 return readonly;
5179}
5180
5181void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
5182{
5183 extent_map_tree_init(&tree->map_tree);
5184}
5185
5186void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
5187{
5188 struct extent_map *em;
5189
5190 while (1) {
5191 write_lock(&tree->map_tree.lock);
5192 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
5193 if (em)
5194 remove_extent_mapping(&tree->map_tree, em);
5195 write_unlock(&tree->map_tree.lock);
5196 if (!em)
5197 break;
5198
5199 free_extent_map(em);
5200
5201 free_extent_map(em);
5202 }
5203}
5204
5205int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5206{
5207 struct extent_map *em;
5208 struct map_lookup *map;
5209 int ret;
5210
5211 em = get_chunk_map(fs_info, logical, len);
5212 if (IS_ERR(em))
5213
5214
5215
5216
5217
5218
5219 return 1;
5220
5221 map = em->map_lookup;
5222 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
5223 ret = map->num_stripes;
5224 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5225 ret = map->sub_stripes;
5226 else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
5227 ret = 2;
5228 else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5229
5230
5231
5232
5233
5234
5235
5236 ret = map->num_stripes;
5237 else
5238 ret = 1;
5239 free_extent_map(em);
5240
5241 btrfs_dev_replace_read_lock(&fs_info->dev_replace);
5242 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
5243 fs_info->dev_replace.tgtdev)
5244 ret++;
5245 btrfs_dev_replace_read_unlock(&fs_info->dev_replace);
5246
5247 return ret;
5248}
5249
5250unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
5251 u64 logical)
5252{
5253 struct extent_map *em;
5254 struct map_lookup *map;
5255 unsigned long len = fs_info->sectorsize;
5256
5257 em = get_chunk_map(fs_info, logical, len);
5258
5259 if (!WARN_ON(IS_ERR(em))) {
5260 map = em->map_lookup;
5261 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5262 len = map->stripe_len * nr_data_stripes(map);
5263 free_extent_map(em);
5264 }
5265 return len;
5266}
5267
5268int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5269{
5270 struct extent_map *em;
5271 struct map_lookup *map;
5272 int ret = 0;
5273
5274 em = get_chunk_map(fs_info, logical, len);
5275
5276 if(!WARN_ON(IS_ERR(em))) {
5277 map = em->map_lookup;
5278 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
5279 ret = 1;
5280 free_extent_map(em);
5281 }
5282 return ret;
5283}
5284
5285static int find_live_mirror(struct btrfs_fs_info *fs_info,
5286 struct map_lookup *map, int first,
5287 int dev_replace_is_ongoing)
5288{
5289 int i;
5290 int num_stripes;
5291 int preferred_mirror;
5292 int tolerance;
5293 struct btrfs_device *srcdev;
5294
5295 ASSERT((map->type &
5296 (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
5297
5298 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5299 num_stripes = map->sub_stripes;
5300 else
5301 num_stripes = map->num_stripes;
5302
5303 preferred_mirror = first + current->pid % num_stripes;
5304
5305 if (dev_replace_is_ongoing &&
5306 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
5307 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
5308 srcdev = fs_info->dev_replace.srcdev;
5309 else
5310 srcdev = NULL;
5311
5312
5313
5314
5315
5316
5317 for (tolerance = 0; tolerance < 2; tolerance++) {
5318 if (map->stripes[preferred_mirror].dev->bdev &&
5319 (tolerance || map->stripes[preferred_mirror].dev != srcdev))
5320 return preferred_mirror;
5321 for (i = first; i < first + num_stripes; i++) {
5322 if (map->stripes[i].dev->bdev &&
5323 (tolerance || map->stripes[i].dev != srcdev))
5324 return i;
5325 }
5326 }
5327
5328
5329
5330
5331 return preferred_mirror;
5332}
5333
5334static inline int parity_smaller(u64 a, u64 b)
5335{
5336 return a > b;
5337}
5338
5339
5340static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
5341{
5342 struct btrfs_bio_stripe s;
5343 int i;
5344 u64 l;
5345 int again = 1;
5346
5347 while (again) {
5348 again = 0;
5349 for (i = 0; i < num_stripes - 1; i++) {
5350 if (parity_smaller(bbio->raid_map[i],
5351 bbio->raid_map[i+1])) {
5352 s = bbio->stripes[i];
5353 l = bbio->raid_map[i];
5354 bbio->stripes[i] = bbio->stripes[i+1];
5355 bbio->raid_map[i] = bbio->raid_map[i+1];
5356 bbio->stripes[i+1] = s;
5357 bbio->raid_map[i+1] = l;
5358
5359 again = 1;
5360 }
5361 }
5362 }
5363}
5364
5365static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
5366{
5367 struct btrfs_bio *bbio = kzalloc(
5368
5369 sizeof(struct btrfs_bio) +
5370
5371 sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5372
5373 sizeof(int) * (real_stripes) +
5374
5375
5376
5377
5378 sizeof(u64) * (total_stripes),
5379 GFP_NOFS|__GFP_NOFAIL);
5380
5381 atomic_set(&bbio->error, 0);
5382 refcount_set(&bbio->refs, 1);
5383
5384 return bbio;
5385}
5386
5387void btrfs_get_bbio(struct btrfs_bio *bbio)
5388{
5389 WARN_ON(!refcount_read(&bbio->refs));
5390 refcount_inc(&bbio->refs);
5391}
5392
5393void btrfs_put_bbio(struct btrfs_bio *bbio)
5394{
5395 if (!bbio)
5396 return;
5397 if (refcount_dec_and_test(&bbio->refs))
5398 kfree(bbio);
5399}
5400
5401
5402
5403
5404
5405
5406static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
5407 u64 logical, u64 length,
5408 struct btrfs_bio **bbio_ret)
5409{
5410 struct extent_map *em;
5411 struct map_lookup *map;
5412 struct btrfs_bio *bbio;
5413 u64 offset;
5414 u64 stripe_nr;
5415 u64 stripe_nr_end;
5416 u64 stripe_end_offset;
5417 u64 stripe_cnt;
5418 u64 stripe_len;
5419 u64 stripe_offset;
5420 u64 num_stripes;
5421 u32 stripe_index;
5422 u32 factor = 0;
5423 u32 sub_stripes = 0;
5424 u64 stripes_per_dev = 0;
5425 u32 remaining_stripes = 0;
5426 u32 last_stripe = 0;
5427 int ret = 0;
5428 int i;
5429
5430
5431 ASSERT(bbio_ret);
5432
5433 em = get_chunk_map(fs_info, logical, length);
5434 if (IS_ERR(em))
5435 return PTR_ERR(em);
5436
5437 map = em->map_lookup;
5438
5439 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5440 ret = -EOPNOTSUPP;
5441 goto out;
5442 }
5443
5444 offset = logical - em->start;
5445 length = min_t(u64, em->len - offset, length);
5446
5447 stripe_len = map->stripe_len;
5448
5449
5450
5451
5452 stripe_nr = div64_u64(offset, stripe_len);
5453
5454
5455 stripe_offset = offset - stripe_nr * stripe_len;
5456
5457 stripe_nr_end = round_up(offset + length, map->stripe_len);
5458 stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
5459 stripe_cnt = stripe_nr_end - stripe_nr;
5460 stripe_end_offset = stripe_nr_end * map->stripe_len -
5461 (offset + length);
5462
5463
5464
5465
5466
5467 num_stripes = 1;
5468 stripe_index = 0;
5469 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5470 BTRFS_BLOCK_GROUP_RAID10)) {
5471 if (map->type & BTRFS_BLOCK_GROUP_RAID0)
5472 sub_stripes = 1;
5473 else
5474 sub_stripes = map->sub_stripes;
5475
5476 factor = map->num_stripes / sub_stripes;
5477 num_stripes = min_t(u64, map->num_stripes,
5478 sub_stripes * stripe_cnt);
5479 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5480 stripe_index *= sub_stripes;
5481 stripes_per_dev = div_u64_rem(stripe_cnt, factor,
5482 &remaining_stripes);
5483 div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
5484 last_stripe *= sub_stripes;
5485 } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
5486 BTRFS_BLOCK_GROUP_DUP)) {
5487 num_stripes = map->num_stripes;
5488 } else {
5489 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5490 &stripe_index);
5491 }
5492
5493 bbio = alloc_btrfs_bio(num_stripes, 0);
5494 if (!bbio) {
5495 ret = -ENOMEM;
5496 goto out;
5497 }
5498
5499 for (i = 0; i < num_stripes; i++) {
5500 bbio->stripes[i].physical =
5501 map->stripes[stripe_index].physical +
5502 stripe_offset + stripe_nr * map->stripe_len;
5503 bbio->stripes[i].dev = map->stripes[stripe_index].dev;
5504
5505 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
5506 BTRFS_BLOCK_GROUP_RAID10)) {
5507 bbio->stripes[i].length = stripes_per_dev *
5508 map->stripe_len;
5509
5510 if (i / sub_stripes < remaining_stripes)
5511 bbio->stripes[i].length +=
5512 map->stripe_len;
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522 if (i < sub_stripes)
5523 bbio->stripes[i].length -=
5524 stripe_offset;
5525
5526 if (stripe_index >= last_stripe &&
5527 stripe_index <= (last_stripe +
5528 sub_stripes - 1))
5529 bbio->stripes[i].length -=
5530 stripe_end_offset;
5531
5532 if (i == sub_stripes - 1)
5533 stripe_offset = 0;
5534 } else {
5535 bbio->stripes[i].length = length;
5536 }
5537
5538 stripe_index++;
5539 if (stripe_index == map->num_stripes) {
5540 stripe_index = 0;
5541 stripe_nr++;
5542 }
5543 }
5544
5545 *bbio_ret = bbio;
5546 bbio->map_type = map->type;
5547 bbio->num_stripes = num_stripes;
5548out:
5549 free_extent_map(em);
5550 return ret;
5551}
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564
5565
5566static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
5567 u64 logical, u64 length,
5568 u64 srcdev_devid, int *mirror_num,
5569 u64 *physical)
5570{
5571 struct btrfs_bio *bbio = NULL;
5572 int num_stripes;
5573 int index_srcdev = 0;
5574 int found = 0;
5575 u64 physical_of_found = 0;
5576 int i;
5577 int ret = 0;
5578
5579 ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
5580 logical, &length, &bbio, 0, 0);
5581 if (ret) {
5582 ASSERT(bbio == NULL);
5583 return ret;
5584 }
5585
5586 num_stripes = bbio->num_stripes;
5587 if (*mirror_num > num_stripes) {
5588
5589
5590
5591
5592
5593 btrfs_put_bbio(bbio);
5594 return -EIO;
5595 }
5596
5597
5598
5599
5600
5601
5602 for (i = 0; i < num_stripes; i++) {
5603 if (bbio->stripes[i].dev->devid != srcdev_devid)
5604 continue;
5605
5606
5607
5608
5609
5610 if (found &&
5611 physical_of_found <= bbio->stripes[i].physical)
5612 continue;
5613
5614 index_srcdev = i;
5615 found = 1;
5616 physical_of_found = bbio->stripes[i].physical;
5617 }
5618
5619 btrfs_put_bbio(bbio);
5620
5621 ASSERT(found);
5622 if (!found)
5623 return -EIO;
5624
5625 *mirror_num = index_srcdev + 1;
5626 *physical = physical_of_found;
5627 return ret;
5628}
5629
5630static void handle_ops_on_dev_replace(enum btrfs_map_op op,
5631 struct btrfs_bio **bbio_ret,
5632 struct btrfs_dev_replace *dev_replace,
5633 int *num_stripes_ret, int *max_errors_ret)
5634{
5635 struct btrfs_bio *bbio = *bbio_ret;
5636 u64 srcdev_devid = dev_replace->srcdev->devid;
5637 int tgtdev_indexes = 0;
5638 int num_stripes = *num_stripes_ret;
5639 int max_errors = *max_errors_ret;
5640 int i;
5641
5642 if (op == BTRFS_MAP_WRITE) {
5643 int index_where_to_add;
5644
5645
5646
5647
5648
5649
5650
5651
5652
5653
5654
5655
5656 index_where_to_add = num_stripes;
5657 for (i = 0; i < num_stripes; i++) {
5658 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5659
5660 struct btrfs_bio_stripe *new =
5661 bbio->stripes + index_where_to_add;
5662 struct btrfs_bio_stripe *old =
5663 bbio->stripes + i;
5664
5665 new->physical = old->physical;
5666 new->length = old->length;
5667 new->dev = dev_replace->tgtdev;
5668 bbio->tgtdev_map[i] = index_where_to_add;
5669 index_where_to_add++;
5670 max_errors++;
5671 tgtdev_indexes++;
5672 }
5673 }
5674 num_stripes = index_where_to_add;
5675 } else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
5676 int index_srcdev = 0;
5677 int found = 0;
5678 u64 physical_of_found = 0;
5679
5680
5681
5682
5683
5684
5685
5686
5687 for (i = 0; i < num_stripes; i++) {
5688 if (bbio->stripes[i].dev->devid == srcdev_devid) {
5689
5690
5691
5692
5693
5694 if (found &&
5695 physical_of_found <=
5696 bbio->stripes[i].physical)
5697 continue;
5698 index_srcdev = i;
5699 found = 1;
5700 physical_of_found = bbio->stripes[i].physical;
5701 }
5702 }
5703 if (found) {
5704 struct btrfs_bio_stripe *tgtdev_stripe =
5705 bbio->stripes + num_stripes;
5706
5707 tgtdev_stripe->physical = physical_of_found;
5708 tgtdev_stripe->length =
5709 bbio->stripes[index_srcdev].length;
5710 tgtdev_stripe->dev = dev_replace->tgtdev;
5711 bbio->tgtdev_map[index_srcdev] = num_stripes;
5712
5713 tgtdev_indexes++;
5714 num_stripes++;
5715 }
5716 }
5717
5718 *num_stripes_ret = num_stripes;
5719 *max_errors_ret = max_errors;
5720 bbio->num_tgtdevs = tgtdev_indexes;
5721 *bbio_ret = bbio;
5722}
5723
5724static bool need_full_stripe(enum btrfs_map_op op)
5725{
5726 return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
5727}
5728
5729static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
5730 enum btrfs_map_op op,
5731 u64 logical, u64 *length,
5732 struct btrfs_bio **bbio_ret,
5733 int mirror_num, int need_raid_map)
5734{
5735 struct extent_map *em;
5736 struct map_lookup *map;
5737 u64 offset;
5738 u64 stripe_offset;
5739 u64 stripe_nr;
5740 u64 stripe_len;
5741 u32 stripe_index;
5742 int i;
5743 int ret = 0;
5744 int num_stripes;
5745 int max_errors = 0;
5746 int tgtdev_indexes = 0;
5747 struct btrfs_bio *bbio = NULL;
5748 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
5749 int dev_replace_is_ongoing = 0;
5750 int num_alloc_stripes;
5751 int patch_the_first_stripe_for_dev_replace = 0;
5752 u64 physical_to_patch_in_first_stripe = 0;
5753 u64 raid56_full_stripe_start = (u64)-1;
5754
5755 if (op == BTRFS_MAP_DISCARD)
5756 return __btrfs_map_block_for_discard(fs_info, logical,
5757 *length, bbio_ret);
5758
5759 em = get_chunk_map(fs_info, logical, *length);
5760 if (IS_ERR(em))
5761 return PTR_ERR(em);
5762
5763 map = em->map_lookup;
5764 offset = logical - em->start;
5765
5766 stripe_len = map->stripe_len;
5767 stripe_nr = offset;
5768
5769
5770
5771
5772 stripe_nr = div64_u64(stripe_nr, stripe_len);
5773
5774 stripe_offset = stripe_nr * stripe_len;
5775 if (offset < stripe_offset) {
5776 btrfs_crit(fs_info,
5777 "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu",
5778 stripe_offset, offset, em->start, logical,
5779 stripe_len);
5780 free_extent_map(em);
5781 return -EINVAL;
5782 }
5783
5784
5785 stripe_offset = offset - stripe_offset;
5786
5787
5788 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5789 unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
5790 raid56_full_stripe_start = offset;
5791
5792
5793
5794
5795 raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
5796 full_stripe_len);
5797 raid56_full_stripe_start *= full_stripe_len;
5798 }
5799
5800 if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
5801 u64 max_len;
5802
5803
5804
5805 if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
5806 (op == BTRFS_MAP_WRITE)) {
5807 max_len = stripe_len * nr_data_stripes(map) -
5808 (offset - raid56_full_stripe_start);
5809 } else {
5810
5811 max_len = stripe_len - stripe_offset;
5812 }
5813 *length = min_t(u64, em->len - offset, max_len);
5814 } else {
5815 *length = em->len - offset;
5816 }
5817
5818
5819
5820 if (!bbio_ret)
5821 goto out;
5822
5823 btrfs_dev_replace_read_lock(dev_replace);
5824 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
5825 if (!dev_replace_is_ongoing)
5826 btrfs_dev_replace_read_unlock(dev_replace);
5827 else
5828 btrfs_dev_replace_set_lock_blocking(dev_replace);
5829
5830 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
5831 !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
5832 ret = get_extra_mirror_from_replace(fs_info, logical, *length,
5833 dev_replace->srcdev->devid,
5834 &mirror_num,
5835 &physical_to_patch_in_first_stripe);
5836 if (ret)
5837 goto out;
5838 else
5839 patch_the_first_stripe_for_dev_replace = 1;
5840 } else if (mirror_num > map->num_stripes) {
5841 mirror_num = 0;
5842 }
5843
5844 num_stripes = 1;
5845 stripe_index = 0;
5846 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
5847 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5848 &stripe_index);
5849 if (!need_full_stripe(op))
5850 mirror_num = 1;
5851 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
5852 if (need_full_stripe(op))
5853 num_stripes = map->num_stripes;
5854 else if (mirror_num)
5855 stripe_index = mirror_num - 1;
5856 else {
5857 stripe_index = find_live_mirror(fs_info, map, 0,
5858 dev_replace_is_ongoing);
5859 mirror_num = stripe_index + 1;
5860 }
5861
5862 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
5863 if (need_full_stripe(op)) {
5864 num_stripes = map->num_stripes;
5865 } else if (mirror_num) {
5866 stripe_index = mirror_num - 1;
5867 } else {
5868 mirror_num = 1;
5869 }
5870
5871 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
5872 u32 factor = map->num_stripes / map->sub_stripes;
5873
5874 stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5875 stripe_index *= map->sub_stripes;
5876
5877 if (need_full_stripe(op))
5878 num_stripes = map->sub_stripes;
5879 else if (mirror_num)
5880 stripe_index += mirror_num - 1;
5881 else {
5882 int old_stripe_index = stripe_index;
5883 stripe_index = find_live_mirror(fs_info, map,
5884 stripe_index,
5885 dev_replace_is_ongoing);
5886 mirror_num = stripe_index - old_stripe_index + 1;
5887 }
5888
5889 } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5890 if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
5891
5892 stripe_nr = div64_u64(raid56_full_stripe_start,
5893 stripe_len * nr_data_stripes(map));
5894
5895
5896 num_stripes = map->num_stripes;
5897 max_errors = nr_parity_stripes(map);
5898
5899 *length = map->stripe_len;
5900 stripe_index = 0;
5901 stripe_offset = 0;
5902 } else {
5903
5904
5905
5906
5907
5908 stripe_nr = div_u64_rem(stripe_nr,
5909 nr_data_stripes(map), &stripe_index);
5910 if (mirror_num > 1)
5911 stripe_index = nr_data_stripes(map) +
5912 mirror_num - 2;
5913
5914
5915 div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
5916 &stripe_index);
5917 if (!need_full_stripe(op) && mirror_num <= 1)
5918 mirror_num = 1;
5919 }
5920 } else {
5921
5922
5923
5924
5925
5926 stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
5927 &stripe_index);
5928 mirror_num = stripe_index + 1;
5929 }
5930 if (stripe_index >= map->num_stripes) {
5931 btrfs_crit(fs_info,
5932 "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
5933 stripe_index, map->num_stripes);
5934 ret = -EINVAL;
5935 goto out;
5936 }
5937
5938 num_alloc_stripes = num_stripes;
5939 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
5940 if (op == BTRFS_MAP_WRITE)
5941 num_alloc_stripes <<= 1;
5942 if (op == BTRFS_MAP_GET_READ_MIRRORS)
5943 num_alloc_stripes++;
5944 tgtdev_indexes = num_stripes;
5945 }
5946
5947 bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
5948 if (!bbio) {
5949 ret = -ENOMEM;
5950 goto out;
5951 }
5952 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
5953 bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
5954
5955
5956 if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
5957 (need_full_stripe(op) || mirror_num > 1)) {
5958 u64 tmp;
5959 unsigned rot;
5960
5961 bbio->raid_map = (u64 *)((void *)bbio->stripes +
5962 sizeof(struct btrfs_bio_stripe) *
5963 num_alloc_stripes +
5964 sizeof(int) * tgtdev_indexes);
5965
5966
5967 div_u64_rem(stripe_nr, num_stripes, &rot);
5968
5969
5970 tmp = stripe_nr * nr_data_stripes(map);
5971 for (i = 0; i < nr_data_stripes(map); i++)
5972 bbio->raid_map[(i+rot) % num_stripes] =
5973 em->start + (tmp + i) * map->stripe_len;
5974
5975 bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
5976 if (map->type & BTRFS_BLOCK_GROUP_RAID6)
5977 bbio->raid_map[(i+rot+1) % num_stripes] =
5978 RAID6_Q_STRIPE;
5979 }
5980
5981
5982 for (i = 0; i < num_stripes; i++) {
5983 bbio->stripes[i].physical =
5984 map->stripes[stripe_index].physical +
5985 stripe_offset +
5986 stripe_nr * map->stripe_len;
5987 bbio->stripes[i].dev =
5988 map->stripes[stripe_index].dev;
5989 stripe_index++;
5990 }
5991
5992 if (need_full_stripe(op))
5993 max_errors = btrfs_chunk_max_errors(map);
5994
5995 if (bbio->raid_map)
5996 sort_parity_stripes(bbio, num_stripes);
5997
5998 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
5999 need_full_stripe(op)) {
6000 handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
6001 &max_errors);
6002 }
6003
6004 *bbio_ret = bbio;
6005 bbio->map_type = map->type;
6006 bbio->num_stripes = num_stripes;
6007 bbio->max_errors = max_errors;
6008 bbio->mirror_num = mirror_num;
6009
6010
6011
6012
6013
6014
6015 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
6016 WARN_ON(num_stripes > 1);
6017 bbio->stripes[0].dev = dev_replace->tgtdev;
6018 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
6019 bbio->mirror_num = map->num_stripes + 1;
6020 }
6021out:
6022 if (dev_replace_is_ongoing) {
6023 btrfs_dev_replace_clear_lock_blocking(dev_replace);
6024 btrfs_dev_replace_read_unlock(dev_replace);
6025 }
6026 free_extent_map(em);
6027 return ret;
6028}
6029
6030int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6031 u64 logical, u64 *length,
6032 struct btrfs_bio **bbio_ret, int mirror_num)
6033{
6034 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
6035 mirror_num, 0);
6036}
6037
6038
6039int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6040 u64 logical, u64 *length,
6041 struct btrfs_bio **bbio_ret)
6042{
6043 return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6044}
6045
6046int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
6047 u64 chunk_start, u64 physical, u64 devid,
6048 u64 **logical, int *naddrs, int *stripe_len)
6049{
6050 struct extent_map *em;
6051 struct map_lookup *map;
6052 u64 *buf;
6053 u64 bytenr;
6054 u64 length;
6055 u64 stripe_nr;
6056 u64 rmap_len;
6057 int i, j, nr = 0;
6058
6059 em = get_chunk_map(fs_info, chunk_start, 1);
6060 if (IS_ERR(em))
6061 return -EIO;
6062
6063 map = em->map_lookup;
6064 length = em->len;
6065 rmap_len = map->stripe_len;
6066
6067 if (map->type & BTRFS_BLOCK_GROUP_RAID10)
6068 length = div_u64(length, map->num_stripes / map->sub_stripes);
6069 else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
6070 length = div_u64(length, map->num_stripes);
6071 else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6072 length = div_u64(length, nr_data_stripes(map));
6073 rmap_len = map->stripe_len * nr_data_stripes(map);
6074 }
6075
6076 buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
6077 BUG_ON(!buf);
6078
6079 for (i = 0; i < map->num_stripes; i++) {
6080 if (devid && map->stripes[i].dev->devid != devid)
6081 continue;
6082 if (map->stripes[i].physical > physical ||
6083 map->stripes[i].physical + length <= physical)
6084 continue;
6085
6086 stripe_nr = physical - map->stripes[i].physical;
6087 stripe_nr = div64_u64(stripe_nr, map->stripe_len);
6088
6089 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6090 stripe_nr = stripe_nr * map->num_stripes + i;
6091 stripe_nr = div_u64(stripe_nr, map->sub_stripes);
6092 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6093 stripe_nr = stripe_nr * map->num_stripes + i;
6094 }
6095
6096
6097
6098 bytenr = chunk_start + stripe_nr * rmap_len;
6099 WARN_ON(nr >= map->num_stripes);
6100 for (j = 0; j < nr; j++) {
6101 if (buf[j] == bytenr)
6102 break;
6103 }
6104 if (j == nr) {
6105 WARN_ON(nr >= map->num_stripes);
6106 buf[nr++] = bytenr;
6107 }
6108 }
6109
6110 *logical = buf;
6111 *naddrs = nr;
6112 *stripe_len = rmap_len;
6113
6114 free_extent_map(em);
6115 return 0;
6116}
6117
6118static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
6119{
6120 bio->bi_private = bbio->private;
6121 bio->bi_end_io = bbio->end_io;
6122 bio_endio(bio);
6123
6124 btrfs_put_bbio(bbio);
6125}
6126
6127static void btrfs_end_bio(struct bio *bio)
6128{
6129 struct btrfs_bio *bbio = bio->bi_private;
6130 int is_orig_bio = 0;
6131
6132 if (bio->bi_status) {
6133 atomic_inc(&bbio->error);
6134 if (bio->bi_status == BLK_STS_IOERR ||
6135 bio->bi_status == BLK_STS_TARGET) {
6136 unsigned int stripe_index =
6137 btrfs_io_bio(bio)->stripe_index;
6138 struct btrfs_device *dev;
6139
6140 BUG_ON(stripe_index >= bbio->num_stripes);
6141 dev = bbio->stripes[stripe_index].dev;
6142 if (dev->bdev) {
6143 if (bio_op(bio) == REQ_OP_WRITE)
6144 btrfs_dev_stat_inc_and_print(dev,
6145 BTRFS_DEV_STAT_WRITE_ERRS);
6146 else
6147 btrfs_dev_stat_inc_and_print(dev,
6148 BTRFS_DEV_STAT_READ_ERRS);
6149 if (bio->bi_opf & REQ_PREFLUSH)
6150 btrfs_dev_stat_inc_and_print(dev,
6151 BTRFS_DEV_STAT_FLUSH_ERRS);
6152 }
6153 }
6154 }
6155
6156 if (bio == bbio->orig_bio)
6157 is_orig_bio = 1;
6158
6159 btrfs_bio_counter_dec(bbio->fs_info);
6160
6161 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6162 if (!is_orig_bio) {
6163 bio_put(bio);
6164 bio = bbio->orig_bio;
6165 }
6166
6167 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6168
6169
6170
6171 if (atomic_read(&bbio->error) > bbio->max_errors) {
6172 bio->bi_status = BLK_STS_IOERR;
6173 } else {
6174
6175
6176
6177
6178 bio->bi_status = BLK_STS_OK;
6179 }
6180
6181 btrfs_end_bbio(bbio, bio);
6182 } else if (!is_orig_bio) {
6183 bio_put(bio);
6184 }
6185}
6186
6187
6188
6189
6190
6191
6192
6193
6194static noinline void btrfs_schedule_bio(struct btrfs_device *device,
6195 struct bio *bio)
6196{
6197 struct btrfs_fs_info *fs_info = device->fs_info;
6198 int should_queue = 1;
6199 struct btrfs_pending_bios *pending_bios;
6200
6201 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
6202 !device->bdev) {
6203 bio_io_error(bio);
6204 return;
6205 }
6206
6207
6208 if (bio_op(bio) == REQ_OP_READ) {
6209 btrfsic_submit_bio(bio);
6210 return;
6211 }
6212
6213 WARN_ON(bio->bi_next);
6214 bio->bi_next = NULL;
6215
6216 spin_lock(&device->io_lock);
6217 if (op_is_sync(bio->bi_opf))
6218 pending_bios = &device->pending_sync_bios;
6219 else
6220 pending_bios = &device->pending_bios;
6221
6222 if (pending_bios->tail)
6223 pending_bios->tail->bi_next = bio;
6224
6225 pending_bios->tail = bio;
6226 if (!pending_bios->head)
6227 pending_bios->head = bio;
6228 if (device->running_pending)
6229 should_queue = 0;
6230
6231 spin_unlock(&device->io_lock);
6232
6233 if (should_queue)
6234 btrfs_queue_work(fs_info->submit_workers, &device->work);
6235}
6236
6237static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
6238 u64 physical, int dev_nr, int async)
6239{
6240 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
6241 struct btrfs_fs_info *fs_info = bbio->fs_info;
6242
6243 bio->bi_private = bbio;
6244 btrfs_io_bio(bio)->stripe_index = dev_nr;
6245 bio->bi_end_io = btrfs_end_bio;
6246 bio->bi_iter.bi_sector = physical >> 9;
6247#ifdef DEBUG
6248 {
6249 struct rcu_string *name;
6250
6251 rcu_read_lock();
6252 name = rcu_dereference(dev->name);
6253 btrfs_debug(fs_info,
6254 "btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6255 bio_op(bio), bio->bi_opf,
6256 (u64)bio->bi_iter.bi_sector,
6257 (u_long)dev->bdev->bd_dev, name->str, dev->devid,
6258 bio->bi_iter.bi_size);
6259 rcu_read_unlock();
6260 }
6261#endif
6262 bio_set_dev(bio, dev->bdev);
6263
6264 btrfs_bio_counter_inc_noblocked(fs_info);
6265
6266 if (async)
6267 btrfs_schedule_bio(dev, bio);
6268 else
6269 btrfsic_submit_bio(bio);
6270}
6271
6272static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6273{
6274 atomic_inc(&bbio->error);
6275 if (atomic_dec_and_test(&bbio->stripes_pending)) {
6276
6277 WARN_ON(bio != bbio->orig_bio);
6278
6279 btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6280 bio->bi_iter.bi_sector = logical >> 9;
6281 if (atomic_read(&bbio->error) > bbio->max_errors)
6282 bio->bi_status = BLK_STS_IOERR;
6283 else
6284 bio->bi_status = BLK_STS_OK;
6285 btrfs_end_bbio(bbio, bio);
6286 }
6287}
6288
6289blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
6290 int mirror_num, int async_submit)
6291{
6292 struct btrfs_device *dev;
6293 struct bio *first_bio = bio;
6294 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
6295 u64 length = 0;
6296 u64 map_length;
6297 int ret;
6298 int dev_nr;
6299 int total_devs;
6300 struct btrfs_bio *bbio = NULL;
6301
6302 length = bio->bi_iter.bi_size;
6303 map_length = length;
6304
6305 btrfs_bio_counter_inc_blocked(fs_info);
6306 ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
6307 &map_length, &bbio, mirror_num, 1);
6308 if (ret) {
6309 btrfs_bio_counter_dec(fs_info);
6310 return errno_to_blk_status(ret);
6311 }
6312
6313 total_devs = bbio->num_stripes;
6314 bbio->orig_bio = first_bio;
6315 bbio->private = first_bio->bi_private;
6316 bbio->end_io = first_bio->bi_end_io;
6317 bbio->fs_info = fs_info;
6318 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
6319
6320 if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
6321 ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
6322
6323
6324 if (bio_op(bio) == REQ_OP_WRITE) {
6325 ret = raid56_parity_write(fs_info, bio, bbio,
6326 map_length);
6327 } else {
6328 ret = raid56_parity_recover(fs_info, bio, bbio,
6329 map_length, mirror_num, 1);
6330 }
6331
6332 btrfs_bio_counter_dec(fs_info);
6333 return errno_to_blk_status(ret);
6334 }
6335
6336 if (map_length < length) {
6337 btrfs_crit(fs_info,
6338 "mapping failed logical %llu bio len %llu len %llu",
6339 logical, length, map_length);
6340 BUG();
6341 }
6342
6343 for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6344 dev = bbio->stripes[dev_nr].dev;
6345 if (!dev || !dev->bdev ||
6346 (bio_op(first_bio) == REQ_OP_WRITE &&
6347 !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6348 bbio_error(bbio, first_bio, logical);
6349 continue;
6350 }
6351
6352 if (dev_nr < total_devs - 1)
6353 bio = btrfs_bio_clone(first_bio);
6354 else
6355 bio = first_bio;
6356
6357 submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
6358 dev_nr, async_submit);
6359 }
6360 btrfs_bio_counter_dec(fs_info);
6361 return BLK_STS_OK;
6362}
6363
6364struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
6365 u8 *uuid, u8 *fsid)
6366{
6367 struct btrfs_device *device;
6368 struct btrfs_fs_devices *cur_devices;
6369
6370 cur_devices = fs_info->fs_devices;
6371 while (cur_devices) {
6372 if (!fsid ||
6373 !memcmp(cur_devices->fsid, fsid, BTRFS_FSID_SIZE)) {
6374 device = find_device(cur_devices, devid, uuid);
6375 if (device)
6376 return device;
6377 }
6378 cur_devices = cur_devices->seed;
6379 }
6380 return NULL;
6381}
6382
6383static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6384 u64 devid, u8 *dev_uuid)
6385{
6386 struct btrfs_device *device;
6387
6388 device = btrfs_alloc_device(NULL, &devid, dev_uuid);
6389 if (IS_ERR(device))
6390 return device;
6391
6392 list_add(&device->dev_list, &fs_devices->devices);
6393 device->fs_devices = fs_devices;
6394 fs_devices->num_devices++;
6395
6396 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6397 fs_devices->missing_devices++;
6398
6399 return device;
6400}
6401
6402
6403
6404
6405
6406
6407
6408
6409
6410
6411
6412
6413
6414
6415struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
6416 const u64 *devid,
6417 const u8 *uuid)
6418{
6419 struct btrfs_device *dev;
6420 u64 tmp;
6421
6422 if (WARN_ON(!devid && !fs_info))
6423 return ERR_PTR(-EINVAL);
6424
6425 dev = __alloc_device();
6426 if (IS_ERR(dev))
6427 return dev;
6428
6429 if (devid)
6430 tmp = *devid;
6431 else {
6432 int ret;
6433
6434 ret = find_next_devid(fs_info, &tmp);
6435 if (ret) {
6436 free_device(dev);
6437 return ERR_PTR(ret);
6438 }
6439 }
6440 dev->devid = tmp;
6441
6442 if (uuid)
6443 memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
6444 else
6445 generate_random_uuid(dev->uuid);
6446
6447 btrfs_init_work(&dev->work, btrfs_submit_helper,
6448 pending_bios_fn, NULL, NULL);
6449
6450 return dev;
6451}
6452
6453
6454static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
6455 struct extent_buffer *leaf,
6456 struct btrfs_chunk *chunk, u64 logical)
6457{
6458 u64 length;
6459 u64 stripe_len;
6460 u16 num_stripes;
6461 u16 sub_stripes;
6462 u64 type;
6463
6464 length = btrfs_chunk_length(leaf, chunk);
6465 stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6466 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6467 sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6468 type = btrfs_chunk_type(leaf, chunk);
6469
6470 if (!num_stripes) {
6471 btrfs_err(fs_info, "invalid chunk num_stripes: %u",
6472 num_stripes);
6473 return -EIO;
6474 }
6475 if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
6476 btrfs_err(fs_info, "invalid chunk logical %llu", logical);
6477 return -EIO;
6478 }
6479 if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
6480 btrfs_err(fs_info, "invalid chunk sectorsize %u",
6481 btrfs_chunk_sector_size(leaf, chunk));
6482 return -EIO;
6483 }
6484 if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
6485 btrfs_err(fs_info, "invalid chunk length %llu", length);
6486 return -EIO;
6487 }
6488 if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
6489 btrfs_err(fs_info, "invalid chunk stripe length: %llu",
6490 stripe_len);
6491 return -EIO;
6492 }
6493 if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
6494 type) {
6495 btrfs_err(fs_info, "unrecognized chunk type: %llu",
6496 ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
6497 BTRFS_BLOCK_GROUP_PROFILE_MASK) &
6498 btrfs_chunk_type(leaf, chunk));
6499 return -EIO;
6500 }
6501 if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
6502 (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
6503 (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
6504 (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
6505 (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
6506 ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
6507 num_stripes != 1)) {
6508 btrfs_err(fs_info,
6509 "invalid num_stripes:sub_stripes %u:%u for profile %llu",
6510 num_stripes, sub_stripes,
6511 type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
6512 return -EIO;
6513 }
6514
6515 return 0;
6516}
6517
6518static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
6519 u64 devid, u8 *uuid, bool error)
6520{
6521 if (error)
6522 btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
6523 devid, uuid);
6524 else
6525 btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
6526 devid, uuid);
6527}
6528
6529static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
6530 struct extent_buffer *leaf,
6531 struct btrfs_chunk *chunk)
6532{
6533 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
6534 struct map_lookup *map;
6535 struct extent_map *em;
6536 u64 logical;
6537 u64 length;
6538 u64 devid;
6539 u8 uuid[BTRFS_UUID_SIZE];
6540 int num_stripes;
6541 int ret;
6542 int i;
6543
6544 logical = key->offset;
6545 length = btrfs_chunk_length(leaf, chunk);
6546 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6547
6548 ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical);
6549 if (ret)
6550 return ret;
6551
6552 read_lock(&map_tree->map_tree.lock);
6553 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
6554 read_unlock(&map_tree->map_tree.lock);
6555
6556
6557 if (em && em->start <= logical && em->start + em->len > logical) {
6558 free_extent_map(em);
6559 return 0;
6560 } else if (em) {
6561 free_extent_map(em);
6562 }
6563
6564 em = alloc_extent_map();
6565 if (!em)
6566 return -ENOMEM;
6567 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
6568 if (!map) {
6569 free_extent_map(em);
6570 return -ENOMEM;
6571 }
6572
6573 set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
6574 em->map_lookup = map;
6575 em->start = logical;
6576 em->len = length;
6577 em->orig_start = 0;
6578 em->block_start = 0;
6579 em->block_len = em->len;
6580
6581 map->num_stripes = num_stripes;
6582 map->io_width = btrfs_chunk_io_width(leaf, chunk);
6583 map->io_align = btrfs_chunk_io_align(leaf, chunk);
6584 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6585 map->type = btrfs_chunk_type(leaf, chunk);
6586 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6587 for (i = 0; i < num_stripes; i++) {
6588 map->stripes[i].physical =
6589 btrfs_stripe_offset_nr(leaf, chunk, i);
6590 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
6591 read_extent_buffer(leaf, uuid, (unsigned long)
6592 btrfs_stripe_dev_uuid_nr(chunk, i),
6593 BTRFS_UUID_SIZE);
6594 map->stripes[i].dev = btrfs_find_device(fs_info, devid,
6595 uuid, NULL);
6596 if (!map->stripes[i].dev &&
6597 !btrfs_test_opt(fs_info, DEGRADED)) {
6598 free_extent_map(em);
6599 btrfs_report_missing_device(fs_info, devid, uuid, true);
6600 return -ENOENT;
6601 }
6602 if (!map->stripes[i].dev) {
6603 map->stripes[i].dev =
6604 add_missing_dev(fs_info->fs_devices, devid,
6605 uuid);
6606 if (IS_ERR(map->stripes[i].dev)) {
6607 free_extent_map(em);
6608 btrfs_err(fs_info,
6609 "failed to init missing dev %llu: %ld",
6610 devid, PTR_ERR(map->stripes[i].dev));
6611 return PTR_ERR(map->stripes[i].dev);
6612 }
6613 btrfs_report_missing_device(fs_info, devid, uuid, false);
6614 }
6615 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
6616 &(map->stripes[i].dev->dev_state));
6617
6618 }
6619
6620 write_lock(&map_tree->map_tree.lock);
6621 ret = add_extent_mapping(&map_tree->map_tree, em, 0);
6622 write_unlock(&map_tree->map_tree.lock);
6623 BUG_ON(ret);
6624 free_extent_map(em);
6625
6626 return 0;
6627}
6628
6629static void fill_device_from_item(struct extent_buffer *leaf,
6630 struct btrfs_dev_item *dev_item,
6631 struct btrfs_device *device)
6632{
6633 unsigned long ptr;
6634
6635 device->devid = btrfs_device_id(leaf, dev_item);
6636 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
6637 device->total_bytes = device->disk_total_bytes;
6638 device->commit_total_bytes = device->disk_total_bytes;
6639 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
6640 device->commit_bytes_used = device->bytes_used;
6641 device->type = btrfs_device_type(leaf, dev_item);
6642 device->io_align = btrfs_device_io_align(leaf, dev_item);
6643 device->io_width = btrfs_device_io_width(leaf, dev_item);
6644 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
6645 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
6646 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
6647
6648 ptr = btrfs_device_uuid(dev_item);
6649 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
6650}
6651
6652static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
6653 u8 *fsid)
6654{
6655 struct btrfs_fs_devices *fs_devices;
6656 int ret;
6657
6658 lockdep_assert_held(&uuid_mutex);
6659 ASSERT(fsid);
6660
6661 fs_devices = fs_info->fs_devices->seed;
6662 while (fs_devices) {
6663 if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
6664 return fs_devices;
6665
6666 fs_devices = fs_devices->seed;
6667 }
6668
6669 fs_devices = find_fsid(fsid);
6670 if (!fs_devices) {
6671 if (!btrfs_test_opt(fs_info, DEGRADED))
6672 return ERR_PTR(-ENOENT);
6673
6674 fs_devices = alloc_fs_devices(fsid);
6675 if (IS_ERR(fs_devices))
6676 return fs_devices;
6677
6678 fs_devices->seeding = 1;
6679 fs_devices->opened = 1;
6680 return fs_devices;
6681 }
6682
6683 fs_devices = clone_fs_devices(fs_devices);
6684 if (IS_ERR(fs_devices))
6685 return fs_devices;
6686
6687 ret = __btrfs_open_devices(fs_devices, FMODE_READ,
6688 fs_info->bdev_holder);
6689 if (ret) {
6690 free_fs_devices(fs_devices);
6691 fs_devices = ERR_PTR(ret);
6692 goto out;
6693 }
6694
6695 if (!fs_devices->seeding) {
6696 __btrfs_close_devices(fs_devices);
6697 free_fs_devices(fs_devices);
6698 fs_devices = ERR_PTR(-EINVAL);
6699 goto out;
6700 }
6701
6702 fs_devices->seed = fs_info->fs_devices->seed;
6703 fs_info->fs_devices->seed = fs_devices;
6704out:
6705 return fs_devices;
6706}
6707
6708static int read_one_dev(struct btrfs_fs_info *fs_info,
6709 struct extent_buffer *leaf,
6710 struct btrfs_dev_item *dev_item)
6711{
6712 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
6713 struct btrfs_device *device;
6714 u64 devid;
6715 int ret;
6716 u8 fs_uuid[BTRFS_FSID_SIZE];
6717 u8 dev_uuid[BTRFS_UUID_SIZE];
6718
6719 devid = btrfs_device_id(leaf, dev_item);
6720 read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
6721 BTRFS_UUID_SIZE);
6722 read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
6723 BTRFS_FSID_SIZE);
6724
6725 if (memcmp(fs_uuid, fs_info->fsid, BTRFS_FSID_SIZE)) {
6726 fs_devices = open_seed_devices(fs_info, fs_uuid);
6727 if (IS_ERR(fs_devices))
6728 return PTR_ERR(fs_devices);
6729 }
6730
6731 device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
6732 if (!device) {
6733 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6734 btrfs_report_missing_device(fs_info, devid,
6735 dev_uuid, true);
6736 return -ENOENT;
6737 }
6738
6739 device = add_missing_dev(fs_devices, devid, dev_uuid);
6740 if (IS_ERR(device)) {
6741 btrfs_err(fs_info,
6742 "failed to add missing dev %llu: %ld",
6743 devid, PTR_ERR(device));
6744 return PTR_ERR(device);
6745 }
6746 btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
6747 } else {
6748 if (!device->bdev) {
6749 if (!btrfs_test_opt(fs_info, DEGRADED)) {
6750 btrfs_report_missing_device(fs_info,
6751 devid, dev_uuid, true);
6752 return -ENOENT;
6753 }
6754 btrfs_report_missing_device(fs_info, devid,
6755 dev_uuid, false);
6756 }
6757
6758 if (!device->bdev &&
6759 !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
6760
6761
6762
6763
6764
6765
6766 device->fs_devices->missing_devices++;
6767 set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6768 }
6769
6770
6771 if (device->fs_devices != fs_devices) {
6772 ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
6773 &device->dev_state));
6774
6775 list_move(&device->dev_list, &fs_devices->devices);
6776 device->fs_devices->num_devices--;
6777 fs_devices->num_devices++;
6778
6779 device->fs_devices->missing_devices--;
6780 fs_devices->missing_devices++;
6781
6782 device->fs_devices = fs_devices;
6783 }
6784 }
6785
6786 if (device->fs_devices != fs_info->fs_devices) {
6787 BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
6788 if (device->generation !=
6789 btrfs_device_generation(leaf, dev_item))
6790 return -EINVAL;
6791 }
6792
6793 fill_device_from_item(leaf, dev_item, device);
6794 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
6795 if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
6796 !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
6797 device->fs_devices->total_rw_bytes += device->total_bytes;
6798 atomic64_add(device->total_bytes - device->bytes_used,
6799 &fs_info->free_chunk_space);
6800 }
6801 ret = 0;
6802 return ret;
6803}
6804
6805int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
6806{
6807 struct btrfs_root *root = fs_info->tree_root;
6808 struct btrfs_super_block *super_copy = fs_info->super_copy;
6809 struct extent_buffer *sb;
6810 struct btrfs_disk_key *disk_key;
6811 struct btrfs_chunk *chunk;
6812 u8 *array_ptr;
6813 unsigned long sb_array_offset;
6814 int ret = 0;
6815 u32 num_stripes;
6816 u32 array_size;
6817 u32 len = 0;
6818 u32 cur_offset;
6819 u64 type;
6820 struct btrfs_key key;
6821
6822 ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
6823
6824
6825
6826
6827
6828 sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
6829 if (IS_ERR(sb))
6830 return PTR_ERR(sb);
6831 set_extent_buffer_uptodate(sb);
6832 btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
6833
6834
6835
6836
6837
6838
6839
6840
6841
6842
6843
6844
6845 if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
6846 SetPageUptodate(sb->pages[0]);
6847
6848 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
6849 array_size = btrfs_super_sys_array_size(super_copy);
6850
6851 array_ptr = super_copy->sys_chunk_array;
6852 sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
6853 cur_offset = 0;
6854
6855 while (cur_offset < array_size) {
6856 disk_key = (struct btrfs_disk_key *)array_ptr;
6857 len = sizeof(*disk_key);
6858 if (cur_offset + len > array_size)
6859 goto out_short_read;
6860
6861 btrfs_disk_key_to_cpu(&key, disk_key);
6862
6863 array_ptr += len;
6864 sb_array_offset += len;
6865 cur_offset += len;
6866
6867 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
6868 chunk = (struct btrfs_chunk *)sb_array_offset;
6869
6870
6871
6872
6873 len = btrfs_chunk_item_size(1);
6874 if (cur_offset + len > array_size)
6875 goto out_short_read;
6876
6877 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6878 if (!num_stripes) {
6879 btrfs_err(fs_info,
6880 "invalid number of stripes %u in sys_array at offset %u",
6881 num_stripes, cur_offset);
6882 ret = -EIO;
6883 break;
6884 }
6885
6886 type = btrfs_chunk_type(sb, chunk);
6887 if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
6888 btrfs_err(fs_info,
6889 "invalid chunk type %llu in sys_array at offset %u",
6890 type, cur_offset);
6891 ret = -EIO;
6892 break;
6893 }
6894
6895 len = btrfs_chunk_item_size(num_stripes);
6896 if (cur_offset + len > array_size)
6897 goto out_short_read;
6898
6899 ret = read_one_chunk(fs_info, &key, sb, chunk);
6900 if (ret)
6901 break;
6902 } else {
6903 btrfs_err(fs_info,
6904 "unexpected item type %u in sys_array at offset %u",
6905 (u32)key.type, cur_offset);
6906 ret = -EIO;
6907 break;
6908 }
6909 array_ptr += len;
6910 sb_array_offset += len;
6911 cur_offset += len;
6912 }
6913 clear_extent_buffer_uptodate(sb);
6914 free_extent_buffer_stale(sb);
6915 return ret;
6916
6917out_short_read:
6918 btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
6919 len, cur_offset);
6920 clear_extent_buffer_uptodate(sb);
6921 free_extent_buffer_stale(sb);
6922 return -EIO;
6923}
6924
6925
6926
6927
6928
6929
6930
6931
6932
6933bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
6934 struct btrfs_device *failing_dev)
6935{
6936 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
6937 struct extent_map *em;
6938 u64 next_start = 0;
6939 bool ret = true;
6940
6941 read_lock(&map_tree->map_tree.lock);
6942 em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1);
6943 read_unlock(&map_tree->map_tree.lock);
6944
6945 if (!em) {
6946 ret = false;
6947 goto out;
6948 }
6949 while (em) {
6950 struct map_lookup *map;
6951 int missing = 0;
6952 int max_tolerated;
6953 int i;
6954
6955 map = em->map_lookup;
6956 max_tolerated =
6957 btrfs_get_num_tolerated_disk_barrier_failures(
6958 map->type);
6959 for (i = 0; i < map->num_stripes; i++) {
6960 struct btrfs_device *dev = map->stripes[i].dev;
6961
6962 if (!dev || !dev->bdev ||
6963 test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
6964 dev->last_flush_error)
6965 missing++;
6966 else if (failing_dev && failing_dev == dev)
6967 missing++;
6968 }
6969 if (missing > max_tolerated) {
6970 if (!failing_dev)
6971 btrfs_warn(fs_info,
6972 "chunk %llu missing %d devices, max tolerance is %d for writeable mount",
6973 em->start, missing, max_tolerated);
6974 free_extent_map(em);
6975 ret = false;
6976 goto out;
6977 }
6978 next_start = extent_map_end(em);
6979 free_extent_map(em);
6980
6981 read_lock(&map_tree->map_tree.lock);
6982 em = lookup_extent_mapping(&map_tree->map_tree, next_start,
6983 (u64)(-1) - next_start);
6984 read_unlock(&map_tree->map_tree.lock);
6985 }
6986out:
6987 return ret;
6988}
6989
6990int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
6991{
6992 struct btrfs_root *root = fs_info->chunk_root;
6993 struct btrfs_path *path;
6994 struct extent_buffer *leaf;
6995 struct btrfs_key key;
6996 struct btrfs_key found_key;
6997 int ret;
6998 int slot;
6999 u64 total_dev = 0;
7000
7001 path = btrfs_alloc_path();
7002 if (!path)
7003 return -ENOMEM;
7004
7005 mutex_lock(&uuid_mutex);
7006 mutex_lock(&fs_info->chunk_mutex);
7007
7008
7009
7010
7011
7012
7013
7014 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
7015 key.offset = 0;
7016 key.type = 0;
7017 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
7018 if (ret < 0)
7019 goto error;
7020 while (1) {
7021 leaf = path->nodes[0];
7022 slot = path->slots[0];
7023 if (slot >= btrfs_header_nritems(leaf)) {
7024 ret = btrfs_next_leaf(root, path);
7025 if (ret == 0)
7026 continue;
7027 if (ret < 0)
7028 goto error;
7029 break;
7030 }
7031 btrfs_item_key_to_cpu(leaf, &found_key, slot);
7032 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
7033 struct btrfs_dev_item *dev_item;
7034 dev_item = btrfs_item_ptr(leaf, slot,
7035 struct btrfs_dev_item);
7036 ret = read_one_dev(fs_info, leaf, dev_item);
7037 if (ret)
7038 goto error;
7039 total_dev++;
7040 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
7041 struct btrfs_chunk *chunk;
7042 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
7043 ret = read_one_chunk(fs_info, &found_key, leaf, chunk);
7044 if (ret)
7045 goto error;
7046 }
7047 path->slots[0]++;
7048 }
7049
7050
7051
7052
7053
7054 if (total_dev != fs_info->fs_devices->total_devices) {
7055 btrfs_err(fs_info,
7056 "super_num_devices %llu mismatch with num_devices %llu found here",
7057 btrfs_super_num_devices(fs_info->super_copy),
7058 total_dev);
7059 ret = -EINVAL;
7060 goto error;
7061 }
7062 if (btrfs_super_total_bytes(fs_info->super_copy) <
7063 fs_info->fs_devices->total_rw_bytes) {
7064 btrfs_err(fs_info,
7065 "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
7066 btrfs_super_total_bytes(fs_info->super_copy),
7067 fs_info->fs_devices->total_rw_bytes);
7068 ret = -EINVAL;
7069 goto error;
7070 }
7071 ret = 0;
7072error:
7073 mutex_unlock(&fs_info->chunk_mutex);
7074 mutex_unlock(&uuid_mutex);
7075
7076 btrfs_free_path(path);
7077 return ret;
7078}
7079
7080void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7081{
7082 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7083 struct btrfs_device *device;
7084
7085 while (fs_devices) {
7086 mutex_lock(&fs_devices->device_list_mutex);
7087 list_for_each_entry(device, &fs_devices->devices, dev_list)
7088 device->fs_info = fs_info;
7089 mutex_unlock(&fs_devices->device_list_mutex);
7090
7091 fs_devices = fs_devices->seed;
7092 }
7093}
7094
7095static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
7096{
7097 int i;
7098
7099 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7100 btrfs_dev_stat_reset(dev, i);
7101}
7102
7103int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7104{
7105 struct btrfs_key key;
7106 struct btrfs_key found_key;
7107 struct btrfs_root *dev_root = fs_info->dev_root;
7108 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7109 struct extent_buffer *eb;
7110 int slot;
7111 int ret = 0;
7112 struct btrfs_device *device;
7113 struct btrfs_path *path = NULL;
7114 int i;
7115
7116 path = btrfs_alloc_path();
7117 if (!path) {
7118 ret = -ENOMEM;
7119 goto out;
7120 }
7121
7122 mutex_lock(&fs_devices->device_list_mutex);
7123 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7124 int item_size;
7125 struct btrfs_dev_stats_item *ptr;
7126
7127 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7128 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7129 key.offset = device->devid;
7130 ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
7131 if (ret) {
7132 __btrfs_reset_dev_stats(device);
7133 device->dev_stats_valid = 1;
7134 btrfs_release_path(path);
7135 continue;
7136 }
7137 slot = path->slots[0];
7138 eb = path->nodes[0];
7139 btrfs_item_key_to_cpu(eb, &found_key, slot);
7140 item_size = btrfs_item_size_nr(eb, slot);
7141
7142 ptr = btrfs_item_ptr(eb, slot,
7143 struct btrfs_dev_stats_item);
7144
7145 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7146 if (item_size >= (1 + i) * sizeof(__le64))
7147 btrfs_dev_stat_set(device, i,
7148 btrfs_dev_stats_value(eb, ptr, i));
7149 else
7150 btrfs_dev_stat_reset(device, i);
7151 }
7152
7153 device->dev_stats_valid = 1;
7154 btrfs_dev_stat_print_on_load(device);
7155 btrfs_release_path(path);
7156 }
7157 mutex_unlock(&fs_devices->device_list_mutex);
7158
7159out:
7160 btrfs_free_path(path);
7161 return ret < 0 ? ret : 0;
7162}
7163
7164static int update_dev_stat_item(struct btrfs_trans_handle *trans,
7165 struct btrfs_fs_info *fs_info,
7166 struct btrfs_device *device)
7167{
7168 struct btrfs_root *dev_root = fs_info->dev_root;
7169 struct btrfs_path *path;
7170 struct btrfs_key key;
7171 struct extent_buffer *eb;
7172 struct btrfs_dev_stats_item *ptr;
7173 int ret;
7174 int i;
7175
7176 key.objectid = BTRFS_DEV_STATS_OBJECTID;
7177 key.type = BTRFS_PERSISTENT_ITEM_KEY;
7178 key.offset = device->devid;
7179
7180 path = btrfs_alloc_path();
7181 if (!path)
7182 return -ENOMEM;
7183 ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7184 if (ret < 0) {
7185 btrfs_warn_in_rcu(fs_info,
7186 "error %d while searching for dev_stats item for device %s",
7187 ret, rcu_str_deref(device->name));
7188 goto out;
7189 }
7190
7191 if (ret == 0 &&
7192 btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7193
7194 ret = btrfs_del_item(trans, dev_root, path);
7195 if (ret != 0) {
7196 btrfs_warn_in_rcu(fs_info,
7197 "delete too small dev_stats item for device %s failed %d",
7198 rcu_str_deref(device->name), ret);
7199 goto out;
7200 }
7201 ret = 1;
7202 }
7203
7204 if (ret == 1) {
7205
7206 btrfs_release_path(path);
7207 ret = btrfs_insert_empty_item(trans, dev_root, path,
7208 &key, sizeof(*ptr));
7209 if (ret < 0) {
7210 btrfs_warn_in_rcu(fs_info,
7211 "insert dev_stats item for device %s failed %d",
7212 rcu_str_deref(device->name), ret);
7213 goto out;
7214 }
7215 }
7216
7217 eb = path->nodes[0];
7218 ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7219 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7220 btrfs_set_dev_stats_value(eb, ptr, i,
7221 btrfs_dev_stat_read(device, i));
7222 btrfs_mark_buffer_dirty(eb);
7223
7224out:
7225 btrfs_free_path(path);
7226 return ret;
7227}
7228
7229
7230
7231
7232int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
7233 struct btrfs_fs_info *fs_info)
7234{
7235 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7236 struct btrfs_device *device;
7237 int stats_cnt;
7238 int ret = 0;
7239
7240 mutex_lock(&fs_devices->device_list_mutex);
7241 list_for_each_entry(device, &fs_devices->devices, dev_list) {
7242 stats_cnt = atomic_read(&device->dev_stats_ccnt);
7243 if (!device->dev_stats_valid || stats_cnt == 0)
7244 continue;
7245
7246
7247
7248
7249
7250
7251
7252
7253
7254
7255
7256
7257
7258 smp_rmb();
7259
7260 ret = update_dev_stat_item(trans, fs_info, device);
7261 if (!ret)
7262 atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7263 }
7264 mutex_unlock(&fs_devices->device_list_mutex);
7265
7266 return ret;
7267}
7268
7269void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7270{
7271 btrfs_dev_stat_inc(dev, index);
7272 btrfs_dev_stat_print_on_error(dev);
7273}
7274
7275static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7276{
7277 if (!dev->dev_stats_valid)
7278 return;
7279 btrfs_err_rl_in_rcu(dev->fs_info,
7280 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7281 rcu_str_deref(dev->name),
7282 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7283 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7284 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7285 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7286 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7287}
7288
7289static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7290{
7291 int i;
7292
7293 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7294 if (btrfs_dev_stat_read(dev, i) != 0)
7295 break;
7296 if (i == BTRFS_DEV_STAT_VALUES_MAX)
7297 return;
7298
7299 btrfs_info_in_rcu(dev->fs_info,
7300 "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7301 rcu_str_deref(dev->name),
7302 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7303 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7304 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7305 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7306 btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7307}
7308
7309int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7310 struct btrfs_ioctl_get_dev_stats *stats)
7311{
7312 struct btrfs_device *dev;
7313 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7314 int i;
7315
7316 mutex_lock(&fs_devices->device_list_mutex);
7317 dev = btrfs_find_device(fs_info, stats->devid, NULL, NULL);
7318 mutex_unlock(&fs_devices->device_list_mutex);
7319
7320 if (!dev) {
7321 btrfs_warn(fs_info, "get dev_stats failed, device not found");
7322 return -ENODEV;
7323 } else if (!dev->dev_stats_valid) {
7324 btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7325 return -ENODEV;
7326 } else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7327 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7328 if (stats->nr_items > i)
7329 stats->values[i] =
7330 btrfs_dev_stat_read_and_reset(dev, i);
7331 else
7332 btrfs_dev_stat_reset(dev, i);
7333 }
7334 } else {
7335 for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7336 if (stats->nr_items > i)
7337 stats->values[i] = btrfs_dev_stat_read(dev, i);
7338 }
7339 if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7340 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7341 return 0;
7342}
7343
7344void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
7345{
7346 struct buffer_head *bh;
7347 struct btrfs_super_block *disk_super;
7348 int copy_num;
7349
7350 if (!bdev)
7351 return;
7352
7353 for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
7354 copy_num++) {
7355
7356 if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
7357 continue;
7358
7359 disk_super = (struct btrfs_super_block *)bh->b_data;
7360
7361 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
7362 set_buffer_dirty(bh);
7363 sync_dirty_buffer(bh);
7364 brelse(bh);
7365 }
7366
7367
7368 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
7369
7370
7371 update_dev_time(device_path);
7372}
7373
7374
7375
7376
7377
7378void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
7379{
7380 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7381 struct btrfs_device *curr, *next;
7382
7383 if (list_empty(&fs_devices->resized_devices))
7384 return;
7385
7386 mutex_lock(&fs_devices->device_list_mutex);
7387 mutex_lock(&fs_info->chunk_mutex);
7388 list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
7389 resized_list) {
7390 list_del_init(&curr->resized_list);
7391 curr->commit_total_bytes = curr->disk_total_bytes;
7392 }
7393 mutex_unlock(&fs_info->chunk_mutex);
7394 mutex_unlock(&fs_devices->device_list_mutex);
7395}
7396
7397
7398void btrfs_update_commit_device_bytes_used(struct btrfs_transaction *trans)
7399{
7400 struct btrfs_fs_info *fs_info = trans->fs_info;
7401 struct extent_map *em;
7402 struct map_lookup *map;
7403 struct btrfs_device *dev;
7404 int i;
7405
7406 if (list_empty(&trans->pending_chunks))
7407 return;
7408
7409
7410 mutex_lock(&fs_info->chunk_mutex);
7411 list_for_each_entry(em, &trans->pending_chunks, list) {
7412 map = em->map_lookup;
7413
7414 for (i = 0; i < map->num_stripes; i++) {
7415 dev = map->stripes[i].dev;
7416 dev->commit_bytes_used = dev->bytes_used;
7417 }
7418 }
7419 mutex_unlock(&fs_info->chunk_mutex);
7420}
7421
7422void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
7423{
7424 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7425 while (fs_devices) {
7426 fs_devices->fs_info = fs_info;
7427 fs_devices = fs_devices->seed;
7428 }
7429}
7430
7431void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
7432{
7433 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7434 while (fs_devices) {
7435 fs_devices->fs_info = NULL;
7436 fs_devices = fs_devices->seed;
7437 }
7438}
7439