1
2
3
4
5
6#include <linux/kernel.h>
7#include <linux/bio.h>
8#include <linux/buffer_head.h>
9#include <linux/file.h>
10#include <linux/fs.h>
11#include <linux/pagemap.h>
12#include <linux/highmem.h>
13#include <linux/time.h>
14#include <linux/init.h>
15#include <linux/string.h>
16#include <linux/backing-dev.h>
17#include <linux/writeback.h>
18#include <linux/compat.h>
19#include <linux/xattr.h>
20#include <linux/posix_acl.h>
21#include <linux/falloc.h>
22#include <linux/slab.h>
23#include <linux/ratelimit.h>
24#include <linux/btrfs.h>
25#include <linux/blkdev.h>
26#include <linux/posix_acl_xattr.h>
27#include <linux/uio.h>
28#include <linux/magic.h>
29#include <linux/iversion.h>
30#include <linux/swap.h>
31#include <asm/unaligned.h>
32#include "ctree.h"
33#include "disk-io.h"
34#include "transaction.h"
35#include "btrfs_inode.h"
36#include "print-tree.h"
37#include "ordered-data.h"
38#include "xattr.h"
39#include "tree-log.h"
40#include "volumes.h"
41#include "compression.h"
42#include "locking.h"
43#include "free-space-cache.h"
44#include "inode-map.h"
45#include "backref.h"
46#include "props.h"
47#include "qgroup.h"
48#include "dedupe.h"
49
50struct btrfs_iget_args {
51 struct btrfs_key *location;
52 struct btrfs_root *root;
53};
54
55struct btrfs_dio_data {
56 u64 reserve;
57 u64 unsubmitted_oe_range_start;
58 u64 unsubmitted_oe_range_end;
59 int overwrite;
60};
61
62static const struct inode_operations btrfs_dir_inode_operations;
63static const struct inode_operations btrfs_symlink_inode_operations;
64static const struct inode_operations btrfs_dir_ro_inode_operations;
65static const struct inode_operations btrfs_special_inode_operations;
66static const struct inode_operations btrfs_file_inode_operations;
67static const struct address_space_operations btrfs_aops;
68static const struct file_operations btrfs_dir_file_operations;
69static const struct extent_io_ops btrfs_extent_io_ops;
70
71static struct kmem_cache *btrfs_inode_cachep;
72struct kmem_cache *btrfs_trans_handle_cachep;
73struct kmem_cache *btrfs_path_cachep;
74struct kmem_cache *btrfs_free_space_cachep;
75
76#define S_SHIFT 12
77static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
78 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
79 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
80 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
81 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
82 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
83 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
84 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
85};
86
87static int btrfs_setsize(struct inode *inode, struct iattr *attr);
88static int btrfs_truncate(struct inode *inode, bool skip_writeback);
89static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
90static noinline int cow_file_range(struct inode *inode,
91 struct page *locked_page,
92 u64 start, u64 end, u64 delalloc_end,
93 int *page_started, unsigned long *nr_written,
94 int unlock, struct btrfs_dedupe_hash *hash);
95static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
96 u64 orig_start, u64 block_start,
97 u64 block_len, u64 orig_block_len,
98 u64 ram_bytes, int compress_type,
99 int type);
100
101static void __endio_write_update_ordered(struct inode *inode,
102 const u64 offset, const u64 bytes,
103 const bool uptodate);
104
105
106
107
108
109
110
111
112
113
114
115static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
116 struct page *locked_page,
117 u64 offset, u64 bytes)
118{
119 unsigned long index = offset >> PAGE_SHIFT;
120 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
121 u64 page_start = page_offset(locked_page);
122 u64 page_end = page_start + PAGE_SIZE - 1;
123
124 struct page *page;
125
126 while (index <= end_index) {
127 page = find_get_page(inode->i_mapping, index);
128 index++;
129 if (!page)
130 continue;
131 ClearPagePrivate2(page);
132 put_page(page);
133 }
134
135
136
137
138
139
140 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
141 offset += PAGE_SIZE;
142 bytes -= PAGE_SIZE;
143 }
144
145 return __endio_write_update_ordered(inode, offset, bytes, false);
146}
147
148static int btrfs_dirty_inode(struct inode *inode);
149
150#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
151void btrfs_test_inode_set_ops(struct inode *inode)
152{
153 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
154}
155#endif
156
157static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
158 struct inode *inode, struct inode *dir,
159 const struct qstr *qstr)
160{
161 int err;
162
163 err = btrfs_init_acl(trans, inode, dir);
164 if (!err)
165 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
166 return err;
167}
168
169
170
171
172
173
174static int insert_inline_extent(struct btrfs_trans_handle *trans,
175 struct btrfs_path *path, int extent_inserted,
176 struct btrfs_root *root, struct inode *inode,
177 u64 start, size_t size, size_t compressed_size,
178 int compress_type,
179 struct page **compressed_pages)
180{
181 struct extent_buffer *leaf;
182 struct page *page = NULL;
183 char *kaddr;
184 unsigned long ptr;
185 struct btrfs_file_extent_item *ei;
186 int ret;
187 size_t cur_size = size;
188 unsigned long offset;
189
190 if (compressed_size && compressed_pages)
191 cur_size = compressed_size;
192
193 inode_add_bytes(inode, size);
194
195 if (!extent_inserted) {
196 struct btrfs_key key;
197 size_t datasize;
198
199 key.objectid = btrfs_ino(BTRFS_I(inode));
200 key.offset = start;
201 key.type = BTRFS_EXTENT_DATA_KEY;
202
203 datasize = btrfs_file_extent_calc_inline_size(cur_size);
204 path->leave_spinning = 1;
205 ret = btrfs_insert_empty_item(trans, root, path, &key,
206 datasize);
207 if (ret)
208 goto fail;
209 }
210 leaf = path->nodes[0];
211 ei = btrfs_item_ptr(leaf, path->slots[0],
212 struct btrfs_file_extent_item);
213 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
214 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
215 btrfs_set_file_extent_encryption(leaf, ei, 0);
216 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
217 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
218 ptr = btrfs_file_extent_inline_start(ei);
219
220 if (compress_type != BTRFS_COMPRESS_NONE) {
221 struct page *cpage;
222 int i = 0;
223 while (compressed_size > 0) {
224 cpage = compressed_pages[i];
225 cur_size = min_t(unsigned long, compressed_size,
226 PAGE_SIZE);
227
228 kaddr = kmap_atomic(cpage);
229 write_extent_buffer(leaf, kaddr, ptr, cur_size);
230 kunmap_atomic(kaddr);
231
232 i++;
233 ptr += cur_size;
234 compressed_size -= cur_size;
235 }
236 btrfs_set_file_extent_compression(leaf, ei,
237 compress_type);
238 } else {
239 page = find_get_page(inode->i_mapping,
240 start >> PAGE_SHIFT);
241 btrfs_set_file_extent_compression(leaf, ei, 0);
242 kaddr = kmap_atomic(page);
243 offset = offset_in_page(start);
244 write_extent_buffer(leaf, kaddr + offset, ptr, size);
245 kunmap_atomic(kaddr);
246 put_page(page);
247 }
248 btrfs_mark_buffer_dirty(leaf);
249 btrfs_release_path(path);
250
251
252
253
254
255
256
257
258
259
260 BTRFS_I(inode)->disk_i_size = inode->i_size;
261 ret = btrfs_update_inode(trans, root, inode);
262
263fail:
264 return ret;
265}
266
267
268
269
270
271
272
273static noinline int cow_file_range_inline(struct inode *inode, u64 start,
274 u64 end, size_t compressed_size,
275 int compress_type,
276 struct page **compressed_pages)
277{
278 struct btrfs_root *root = BTRFS_I(inode)->root;
279 struct btrfs_fs_info *fs_info = root->fs_info;
280 struct btrfs_trans_handle *trans;
281 u64 isize = i_size_read(inode);
282 u64 actual_end = min(end + 1, isize);
283 u64 inline_len = actual_end - start;
284 u64 aligned_end = ALIGN(end, fs_info->sectorsize);
285 u64 data_len = inline_len;
286 int ret;
287 struct btrfs_path *path;
288 int extent_inserted = 0;
289 u32 extent_item_size;
290
291 if (compressed_size)
292 data_len = compressed_size;
293
294 if (start > 0 ||
295 actual_end > fs_info->sectorsize ||
296 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
297 (!compressed_size &&
298 (actual_end & (fs_info->sectorsize - 1)) == 0) ||
299 end + 1 < isize ||
300 data_len > fs_info->max_inline) {
301 return 1;
302 }
303
304 path = btrfs_alloc_path();
305 if (!path)
306 return -ENOMEM;
307
308 trans = btrfs_join_transaction(root);
309 if (IS_ERR(trans)) {
310 btrfs_free_path(path);
311 return PTR_ERR(trans);
312 }
313 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
314
315 if (compressed_size && compressed_pages)
316 extent_item_size = btrfs_file_extent_calc_inline_size(
317 compressed_size);
318 else
319 extent_item_size = btrfs_file_extent_calc_inline_size(
320 inline_len);
321
322 ret = __btrfs_drop_extents(trans, root, inode, path,
323 start, aligned_end, NULL,
324 1, 1, extent_item_size, &extent_inserted);
325 if (ret) {
326 btrfs_abort_transaction(trans, ret);
327 goto out;
328 }
329
330 if (isize > actual_end)
331 inline_len = min_t(u64, isize, actual_end);
332 ret = insert_inline_extent(trans, path, extent_inserted,
333 root, inode, start,
334 inline_len, compressed_size,
335 compress_type, compressed_pages);
336 if (ret && ret != -ENOSPC) {
337 btrfs_abort_transaction(trans, ret);
338 goto out;
339 } else if (ret == -ENOSPC) {
340 ret = 1;
341 goto out;
342 }
343
344 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
345 btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
346out:
347
348
349
350
351
352
353 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
354 btrfs_free_path(path);
355 btrfs_end_transaction(trans);
356 return ret;
357}
358
359struct async_extent {
360 u64 start;
361 u64 ram_size;
362 u64 compressed_size;
363 struct page **pages;
364 unsigned long nr_pages;
365 int compress_type;
366 struct list_head list;
367};
368
369struct async_cow {
370 struct inode *inode;
371 struct btrfs_fs_info *fs_info;
372 struct page *locked_page;
373 u64 start;
374 u64 end;
375 unsigned int write_flags;
376 struct list_head extents;
377 struct btrfs_work work;
378};
379
380static noinline int add_async_extent(struct async_cow *cow,
381 u64 start, u64 ram_size,
382 u64 compressed_size,
383 struct page **pages,
384 unsigned long nr_pages,
385 int compress_type)
386{
387 struct async_extent *async_extent;
388
389 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
390 BUG_ON(!async_extent);
391 async_extent->start = start;
392 async_extent->ram_size = ram_size;
393 async_extent->compressed_size = compressed_size;
394 async_extent->pages = pages;
395 async_extent->nr_pages = nr_pages;
396 async_extent->compress_type = compress_type;
397 list_add_tail(&async_extent->list, &cow->extents);
398 return 0;
399}
400
401static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
402{
403 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
404
405
406 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
407 return 1;
408
409 if (BTRFS_I(inode)->defrag_compress)
410 return 1;
411
412 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
413 return 0;
414 if (btrfs_test_opt(fs_info, COMPRESS) ||
415 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
416 BTRFS_I(inode)->prop_compress)
417 return btrfs_compress_heuristic(inode, start, end);
418 return 0;
419}
420
421static inline void inode_should_defrag(struct btrfs_inode *inode,
422 u64 start, u64 end, u64 num_bytes, u64 small_write)
423{
424
425 if (num_bytes < small_write &&
426 (start > 0 || end + 1 < inode->disk_i_size))
427 btrfs_add_inode_defrag(NULL, inode);
428}
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447static noinline void compress_file_range(struct inode *inode,
448 struct page *locked_page,
449 u64 start, u64 end,
450 struct async_cow *async_cow,
451 int *num_added)
452{
453 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
454 u64 blocksize = fs_info->sectorsize;
455 u64 actual_end;
456 int ret = 0;
457 struct page **pages = NULL;
458 unsigned long nr_pages;
459 unsigned long total_compressed = 0;
460 unsigned long total_in = 0;
461 int i;
462 int will_compress;
463 int compress_type = fs_info->compress_type;
464 int redirty = 0;
465
466 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
467 SZ_16K);
468
469 actual_end = min_t(u64, i_size_read(inode), end + 1);
470again:
471 will_compress = 0;
472 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
473 BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
474 nr_pages = min_t(unsigned long, nr_pages,
475 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
476
477
478
479
480
481
482
483
484
485
486
487 if (actual_end <= start)
488 goto cleanup_and_bail_uncompressed;
489
490 total_compressed = actual_end - start;
491
492
493
494
495
496 if (total_compressed <= blocksize &&
497 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
498 goto cleanup_and_bail_uncompressed;
499
500 total_compressed = min_t(unsigned long, total_compressed,
501 BTRFS_MAX_UNCOMPRESSED);
502 total_in = 0;
503 ret = 0;
504
505
506
507
508
509
510 if (inode_need_compress(inode, start, end)) {
511 WARN_ON(pages);
512 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
513 if (!pages) {
514
515 nr_pages = 0;
516 goto cont;
517 }
518
519 if (BTRFS_I(inode)->defrag_compress)
520 compress_type = BTRFS_I(inode)->defrag_compress;
521 else if (BTRFS_I(inode)->prop_compress)
522 compress_type = BTRFS_I(inode)->prop_compress;
523
524
525
526
527
528
529
530
531
532
533
534
535
536 if (!redirty) {
537 extent_range_clear_dirty_for_io(inode, start, end);
538 redirty = 1;
539 }
540
541
542 ret = btrfs_compress_pages(
543 compress_type | (fs_info->compress_level << 4),
544 inode->i_mapping, start,
545 pages,
546 &nr_pages,
547 &total_in,
548 &total_compressed);
549
550 if (!ret) {
551 unsigned long offset = offset_in_page(total_compressed);
552 struct page *page = pages[nr_pages - 1];
553 char *kaddr;
554
555
556
557
558 if (offset) {
559 kaddr = kmap_atomic(page);
560 memset(kaddr + offset, 0,
561 PAGE_SIZE - offset);
562 kunmap_atomic(kaddr);
563 }
564 will_compress = 1;
565 }
566 }
567cont:
568 if (start == 0) {
569
570 if (ret || total_in < actual_end) {
571
572
573
574 ret = cow_file_range_inline(inode, start, end, 0,
575 BTRFS_COMPRESS_NONE, NULL);
576 } else {
577
578 ret = cow_file_range_inline(inode, start, end,
579 total_compressed,
580 compress_type, pages);
581 }
582 if (ret <= 0) {
583 unsigned long clear_flags = EXTENT_DELALLOC |
584 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
585 EXTENT_DO_ACCOUNTING;
586 unsigned long page_error_op;
587
588 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
589
590
591
592
593
594
595
596
597
598
599
600 extent_clear_unlock_delalloc(inode, start, end, end,
601 NULL, clear_flags,
602 PAGE_UNLOCK |
603 PAGE_CLEAR_DIRTY |
604 PAGE_SET_WRITEBACK |
605 page_error_op |
606 PAGE_END_WRITEBACK);
607 goto free_pages_out;
608 }
609 }
610
611 if (will_compress) {
612
613
614
615
616
617 total_compressed = ALIGN(total_compressed, blocksize);
618
619
620
621
622
623
624 total_in = ALIGN(total_in, PAGE_SIZE);
625 if (total_compressed + blocksize <= total_in) {
626 *num_added += 1;
627
628
629
630
631
632
633 add_async_extent(async_cow, start, total_in,
634 total_compressed, pages, nr_pages,
635 compress_type);
636
637 if (start + total_in < end) {
638 start += total_in;
639 pages = NULL;
640 cond_resched();
641 goto again;
642 }
643 return;
644 }
645 }
646 if (pages) {
647
648
649
650
651 for (i = 0; i < nr_pages; i++) {
652 WARN_ON(pages[i]->mapping);
653 put_page(pages[i]);
654 }
655 kfree(pages);
656 pages = NULL;
657 total_compressed = 0;
658 nr_pages = 0;
659
660
661 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
662 !(BTRFS_I(inode)->prop_compress)) {
663 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
664 }
665 }
666cleanup_and_bail_uncompressed:
667
668
669
670
671
672
673 if (page_offset(locked_page) >= start &&
674 page_offset(locked_page) <= end)
675 __set_page_dirty_nobuffers(locked_page);
676
677
678 if (redirty)
679 extent_range_redirty_for_io(inode, start, end);
680 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
681 BTRFS_COMPRESS_NONE);
682 *num_added += 1;
683
684 return;
685
686free_pages_out:
687 for (i = 0; i < nr_pages; i++) {
688 WARN_ON(pages[i]->mapping);
689 put_page(pages[i]);
690 }
691 kfree(pages);
692}
693
694static void free_async_extent_pages(struct async_extent *async_extent)
695{
696 int i;
697
698 if (!async_extent->pages)
699 return;
700
701 for (i = 0; i < async_extent->nr_pages; i++) {
702 WARN_ON(async_extent->pages[i]->mapping);
703 put_page(async_extent->pages[i]);
704 }
705 kfree(async_extent->pages);
706 async_extent->nr_pages = 0;
707 async_extent->pages = NULL;
708}
709
710
711
712
713
714
715
716static noinline void submit_compressed_extents(struct async_cow *async_cow)
717{
718 struct inode *inode = async_cow->inode;
719 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
720 struct async_extent *async_extent;
721 u64 alloc_hint = 0;
722 struct btrfs_key ins;
723 struct extent_map *em;
724 struct btrfs_root *root = BTRFS_I(inode)->root;
725 struct extent_io_tree *io_tree;
726 int ret = 0;
727
728again:
729 while (!list_empty(&async_cow->extents)) {
730 async_extent = list_entry(async_cow->extents.next,
731 struct async_extent, list);
732 list_del(&async_extent->list);
733
734 io_tree = &BTRFS_I(inode)->io_tree;
735
736retry:
737
738 if (!async_extent->pages) {
739 int page_started = 0;
740 unsigned long nr_written = 0;
741
742 lock_extent(io_tree, async_extent->start,
743 async_extent->start +
744 async_extent->ram_size - 1);
745
746
747 ret = cow_file_range(inode, async_cow->locked_page,
748 async_extent->start,
749 async_extent->start +
750 async_extent->ram_size - 1,
751 async_extent->start +
752 async_extent->ram_size - 1,
753 &page_started, &nr_written, 0,
754 NULL);
755
756
757
758
759
760
761
762
763
764 if (!page_started && !ret)
765 extent_write_locked_range(inode,
766 async_extent->start,
767 async_extent->start +
768 async_extent->ram_size - 1,
769 WB_SYNC_ALL);
770 else if (ret)
771 unlock_page(async_cow->locked_page);
772 kfree(async_extent);
773 cond_resched();
774 continue;
775 }
776
777 lock_extent(io_tree, async_extent->start,
778 async_extent->start + async_extent->ram_size - 1);
779
780 ret = btrfs_reserve_extent(root, async_extent->ram_size,
781 async_extent->compressed_size,
782 async_extent->compressed_size,
783 0, alloc_hint, &ins, 1, 1);
784 if (ret) {
785 free_async_extent_pages(async_extent);
786
787 if (ret == -ENOSPC) {
788 unlock_extent(io_tree, async_extent->start,
789 async_extent->start +
790 async_extent->ram_size - 1);
791
792
793
794
795
796
797
798 extent_range_redirty_for_io(inode,
799 async_extent->start,
800 async_extent->start +
801 async_extent->ram_size - 1);
802
803 goto retry;
804 }
805 goto out_free;
806 }
807
808
809
810
811 em = create_io_em(inode, async_extent->start,
812 async_extent->ram_size,
813 async_extent->start,
814 ins.objectid,
815 ins.offset,
816 ins.offset,
817 async_extent->ram_size,
818 async_extent->compress_type,
819 BTRFS_ORDERED_COMPRESSED);
820 if (IS_ERR(em))
821
822 goto out_free_reserve;
823 free_extent_map(em);
824
825 ret = btrfs_add_ordered_extent_compress(inode,
826 async_extent->start,
827 ins.objectid,
828 async_extent->ram_size,
829 ins.offset,
830 BTRFS_ORDERED_COMPRESSED,
831 async_extent->compress_type);
832 if (ret) {
833 btrfs_drop_extent_cache(BTRFS_I(inode),
834 async_extent->start,
835 async_extent->start +
836 async_extent->ram_size - 1, 0);
837 goto out_free_reserve;
838 }
839 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
840
841
842
843
844 extent_clear_unlock_delalloc(inode, async_extent->start,
845 async_extent->start +
846 async_extent->ram_size - 1,
847 async_extent->start +
848 async_extent->ram_size - 1,
849 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
850 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
851 PAGE_SET_WRITEBACK);
852 if (btrfs_submit_compressed_write(inode,
853 async_extent->start,
854 async_extent->ram_size,
855 ins.objectid,
856 ins.offset, async_extent->pages,
857 async_extent->nr_pages,
858 async_cow->write_flags)) {
859 struct page *p = async_extent->pages[0];
860 const u64 start = async_extent->start;
861 const u64 end = start + async_extent->ram_size - 1;
862
863 p->mapping = inode->i_mapping;
864 btrfs_writepage_endio_finish_ordered(p, start, end, 0);
865
866 p->mapping = NULL;
867 extent_clear_unlock_delalloc(inode, start, end, end,
868 NULL, 0,
869 PAGE_END_WRITEBACK |
870 PAGE_SET_ERROR);
871 free_async_extent_pages(async_extent);
872 }
873 alloc_hint = ins.objectid + ins.offset;
874 kfree(async_extent);
875 cond_resched();
876 }
877 return;
878out_free_reserve:
879 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
880 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
881out_free:
882 extent_clear_unlock_delalloc(inode, async_extent->start,
883 async_extent->start +
884 async_extent->ram_size - 1,
885 async_extent->start +
886 async_extent->ram_size - 1,
887 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
888 EXTENT_DELALLOC_NEW |
889 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
890 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
891 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
892 PAGE_SET_ERROR);
893 free_async_extent_pages(async_extent);
894 kfree(async_extent);
895 goto again;
896}
897
898static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
899 u64 num_bytes)
900{
901 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
902 struct extent_map *em;
903 u64 alloc_hint = 0;
904
905 read_lock(&em_tree->lock);
906 em = search_extent_mapping(em_tree, start, num_bytes);
907 if (em) {
908
909
910
911
912
913 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
914 free_extent_map(em);
915 em = search_extent_mapping(em_tree, 0, 0);
916 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
917 alloc_hint = em->block_start;
918 if (em)
919 free_extent_map(em);
920 } else {
921 alloc_hint = em->block_start;
922 free_extent_map(em);
923 }
924 }
925 read_unlock(&em_tree->lock);
926
927 return alloc_hint;
928}
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943static noinline int cow_file_range(struct inode *inode,
944 struct page *locked_page,
945 u64 start, u64 end, u64 delalloc_end,
946 int *page_started, unsigned long *nr_written,
947 int unlock, struct btrfs_dedupe_hash *hash)
948{
949 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
950 struct btrfs_root *root = BTRFS_I(inode)->root;
951 u64 alloc_hint = 0;
952 u64 num_bytes;
953 unsigned long ram_size;
954 u64 cur_alloc_size = 0;
955 u64 blocksize = fs_info->sectorsize;
956 struct btrfs_key ins;
957 struct extent_map *em;
958 unsigned clear_bits;
959 unsigned long page_ops;
960 bool extent_reserved = false;
961 int ret = 0;
962
963 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
964 WARN_ON_ONCE(1);
965 ret = -EINVAL;
966 goto out_unlock;
967 }
968
969 num_bytes = ALIGN(end - start + 1, blocksize);
970 num_bytes = max(blocksize, num_bytes);
971 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
972
973 inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
974
975 if (start == 0) {
976
977 ret = cow_file_range_inline(inode, start, end, 0,
978 BTRFS_COMPRESS_NONE, NULL);
979 if (ret == 0) {
980
981
982
983
984
985
986 extent_clear_unlock_delalloc(inode, start, end,
987 delalloc_end, NULL,
988 EXTENT_LOCKED | EXTENT_DELALLOC |
989 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
990 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
991 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
992 PAGE_END_WRITEBACK);
993 *nr_written = *nr_written +
994 (end - start + PAGE_SIZE) / PAGE_SIZE;
995 *page_started = 1;
996 goto out;
997 } else if (ret < 0) {
998 goto out_unlock;
999 }
1000 }
1001
1002 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
1003 btrfs_drop_extent_cache(BTRFS_I(inode), start,
1004 start + num_bytes - 1, 0);
1005
1006 while (num_bytes > 0) {
1007 cur_alloc_size = num_bytes;
1008 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
1009 fs_info->sectorsize, 0, alloc_hint,
1010 &ins, 1, 1);
1011 if (ret < 0)
1012 goto out_unlock;
1013 cur_alloc_size = ins.offset;
1014 extent_reserved = true;
1015
1016 ram_size = ins.offset;
1017 em = create_io_em(inode, start, ins.offset,
1018 start,
1019 ins.objectid,
1020 ins.offset,
1021 ins.offset,
1022 ram_size,
1023 BTRFS_COMPRESS_NONE,
1024 BTRFS_ORDERED_REGULAR );
1025 if (IS_ERR(em)) {
1026 ret = PTR_ERR(em);
1027 goto out_reserve;
1028 }
1029 free_extent_map(em);
1030
1031 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1032 ram_size, cur_alloc_size, 0);
1033 if (ret)
1034 goto out_drop_extent_cache;
1035
1036 if (root->root_key.objectid ==
1037 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1038 ret = btrfs_reloc_clone_csums(inode, start,
1039 cur_alloc_size);
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051 if (ret)
1052 btrfs_drop_extent_cache(BTRFS_I(inode), start,
1053 start + ram_size - 1, 0);
1054 }
1055
1056 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1057
1058
1059
1060
1061
1062
1063
1064
1065 page_ops = unlock ? PAGE_UNLOCK : 0;
1066 page_ops |= PAGE_SET_PRIVATE2;
1067
1068 extent_clear_unlock_delalloc(inode, start,
1069 start + ram_size - 1,
1070 delalloc_end, locked_page,
1071 EXTENT_LOCKED | EXTENT_DELALLOC,
1072 page_ops);
1073 if (num_bytes < cur_alloc_size)
1074 num_bytes = 0;
1075 else
1076 num_bytes -= cur_alloc_size;
1077 alloc_hint = ins.objectid + ins.offset;
1078 start += cur_alloc_size;
1079 extent_reserved = false;
1080
1081
1082
1083
1084
1085
1086 if (ret)
1087 goto out_unlock;
1088 }
1089out:
1090 return ret;
1091
1092out_drop_extent_cache:
1093 btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
1094out_reserve:
1095 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1096 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1097out_unlock:
1098 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1099 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1100 page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1101 PAGE_END_WRITEBACK;
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112 if (extent_reserved) {
1113 extent_clear_unlock_delalloc(inode, start,
1114 start + cur_alloc_size,
1115 start + cur_alloc_size,
1116 locked_page,
1117 clear_bits,
1118 page_ops);
1119 start += cur_alloc_size;
1120 if (start >= end)
1121 goto out;
1122 }
1123 extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
1124 locked_page,
1125 clear_bits | EXTENT_CLEAR_DATA_RESV,
1126 page_ops);
1127 goto out;
1128}
1129
1130
1131
1132
1133static noinline void async_cow_start(struct btrfs_work *work)
1134{
1135 struct async_cow *async_cow;
1136 int num_added = 0;
1137 async_cow = container_of(work, struct async_cow, work);
1138
1139 compress_file_range(async_cow->inode, async_cow->locked_page,
1140 async_cow->start, async_cow->end, async_cow,
1141 &num_added);
1142 if (num_added == 0) {
1143 btrfs_add_delayed_iput(async_cow->inode);
1144 async_cow->inode = NULL;
1145 }
1146}
1147
1148
1149
1150
1151static noinline void async_cow_submit(struct btrfs_work *work)
1152{
1153 struct btrfs_fs_info *fs_info;
1154 struct async_cow *async_cow;
1155 unsigned long nr_pages;
1156
1157 async_cow = container_of(work, struct async_cow, work);
1158
1159 fs_info = async_cow->fs_info;
1160 nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
1161 PAGE_SHIFT;
1162
1163
1164 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1165 5 * SZ_1M)
1166 cond_wake_up_nomb(&fs_info->async_submit_wait);
1167
1168
1169
1170
1171
1172
1173
1174 if (async_cow->inode)
1175 submit_compressed_extents(async_cow);
1176}
1177
1178static noinline void async_cow_free(struct btrfs_work *work)
1179{
1180 struct async_cow *async_cow;
1181 async_cow = container_of(work, struct async_cow, work);
1182 if (async_cow->inode)
1183 btrfs_add_delayed_iput(async_cow->inode);
1184 kfree(async_cow);
1185}
1186
1187static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1188 u64 start, u64 end, int *page_started,
1189 unsigned long *nr_written,
1190 unsigned int write_flags)
1191{
1192 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1193 struct async_cow *async_cow;
1194 unsigned long nr_pages;
1195 u64 cur_end;
1196
1197 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1198 1, 0, NULL);
1199 while (start < end) {
1200 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1201 BUG_ON(!async_cow);
1202
1203
1204
1205
1206 ihold(inode);
1207 async_cow->inode = inode;
1208 async_cow->fs_info = fs_info;
1209 async_cow->locked_page = locked_page;
1210 async_cow->start = start;
1211 async_cow->write_flags = write_flags;
1212
1213 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1214 !btrfs_test_opt(fs_info, FORCE_COMPRESS))
1215 cur_end = end;
1216 else
1217 cur_end = min(end, start + SZ_512K - 1);
1218
1219 async_cow->end = cur_end;
1220 INIT_LIST_HEAD(&async_cow->extents);
1221
1222 btrfs_init_work(&async_cow->work,
1223 btrfs_delalloc_helper,
1224 async_cow_start, async_cow_submit,
1225 async_cow_free);
1226
1227 nr_pages = (cur_end - start + PAGE_SIZE) >>
1228 PAGE_SHIFT;
1229 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1230
1231 btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work);
1232
1233 *nr_written += nr_pages;
1234 start = cur_end + 1;
1235 }
1236 *page_started = 1;
1237 return 0;
1238}
1239
1240static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1241 u64 bytenr, u64 num_bytes)
1242{
1243 int ret;
1244 struct btrfs_ordered_sum *sums;
1245 LIST_HEAD(list);
1246
1247 ret = btrfs_lookup_csums_range(fs_info->csum_root, bytenr,
1248 bytenr + num_bytes - 1, &list, 0);
1249 if (ret == 0 && list_empty(&list))
1250 return 0;
1251
1252 while (!list_empty(&list)) {
1253 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1254 list_del(&sums->list);
1255 kfree(sums);
1256 }
1257 if (ret < 0)
1258 return ret;
1259 return 1;
1260}
1261
1262
1263
1264
1265
1266
1267
1268
1269static noinline int run_delalloc_nocow(struct inode *inode,
1270 struct page *locked_page,
1271 u64 start, u64 end, int *page_started, int force,
1272 unsigned long *nr_written)
1273{
1274 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1275 struct btrfs_root *root = BTRFS_I(inode)->root;
1276 struct extent_buffer *leaf;
1277 struct btrfs_path *path;
1278 struct btrfs_file_extent_item *fi;
1279 struct btrfs_key found_key;
1280 struct extent_map *em;
1281 u64 cow_start;
1282 u64 cur_offset;
1283 u64 extent_end;
1284 u64 extent_offset;
1285 u64 disk_bytenr;
1286 u64 num_bytes;
1287 u64 disk_num_bytes;
1288 u64 ram_bytes;
1289 int extent_type;
1290 int ret;
1291 int type;
1292 int nocow;
1293 int check_prev = 1;
1294 bool nolock;
1295 u64 ino = btrfs_ino(BTRFS_I(inode));
1296
1297 path = btrfs_alloc_path();
1298 if (!path) {
1299 extent_clear_unlock_delalloc(inode, start, end, end,
1300 locked_page,
1301 EXTENT_LOCKED | EXTENT_DELALLOC |
1302 EXTENT_DO_ACCOUNTING |
1303 EXTENT_DEFRAG, PAGE_UNLOCK |
1304 PAGE_CLEAR_DIRTY |
1305 PAGE_SET_WRITEBACK |
1306 PAGE_END_WRITEBACK);
1307 return -ENOMEM;
1308 }
1309
1310 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
1311
1312 cow_start = (u64)-1;
1313 cur_offset = start;
1314 while (1) {
1315 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1316 cur_offset, 0);
1317 if (ret < 0)
1318 goto error;
1319 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1320 leaf = path->nodes[0];
1321 btrfs_item_key_to_cpu(leaf, &found_key,
1322 path->slots[0] - 1);
1323 if (found_key.objectid == ino &&
1324 found_key.type == BTRFS_EXTENT_DATA_KEY)
1325 path->slots[0]--;
1326 }
1327 check_prev = 0;
1328next_slot:
1329 leaf = path->nodes[0];
1330 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1331 ret = btrfs_next_leaf(root, path);
1332 if (ret < 0) {
1333 if (cow_start != (u64)-1)
1334 cur_offset = cow_start;
1335 goto error;
1336 }
1337 if (ret > 0)
1338 break;
1339 leaf = path->nodes[0];
1340 }
1341
1342 nocow = 0;
1343 disk_bytenr = 0;
1344 num_bytes = 0;
1345 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1346
1347 if (found_key.objectid > ino)
1348 break;
1349 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1350 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1351 path->slots[0]++;
1352 goto next_slot;
1353 }
1354 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1355 found_key.offset > end)
1356 break;
1357
1358 if (found_key.offset > cur_offset) {
1359 extent_end = found_key.offset;
1360 extent_type = 0;
1361 goto out_check;
1362 }
1363
1364 fi = btrfs_item_ptr(leaf, path->slots[0],
1365 struct btrfs_file_extent_item);
1366 extent_type = btrfs_file_extent_type(leaf, fi);
1367
1368 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1369 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1370 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1371 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1372 extent_offset = btrfs_file_extent_offset(leaf, fi);
1373 extent_end = found_key.offset +
1374 btrfs_file_extent_num_bytes(leaf, fi);
1375 disk_num_bytes =
1376 btrfs_file_extent_disk_num_bytes(leaf, fi);
1377 if (extent_end <= start) {
1378 path->slots[0]++;
1379 goto next_slot;
1380 }
1381 if (disk_bytenr == 0)
1382 goto out_check;
1383 if (btrfs_file_extent_compression(leaf, fi) ||
1384 btrfs_file_extent_encryption(leaf, fi) ||
1385 btrfs_file_extent_other_encoding(leaf, fi))
1386 goto out_check;
1387
1388
1389
1390
1391 if (!nolock &&
1392 btrfs_file_extent_generation(leaf, fi) <=
1393 btrfs_root_last_snapshot(&root->root_item))
1394 goto out_check;
1395 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1396 goto out_check;
1397 if (btrfs_extent_readonly(fs_info, disk_bytenr))
1398 goto out_check;
1399 ret = btrfs_cross_ref_exist(root, ino,
1400 found_key.offset -
1401 extent_offset, disk_bytenr);
1402 if (ret) {
1403
1404
1405
1406
1407 if (ret < 0) {
1408 if (cow_start != (u64)-1)
1409 cur_offset = cow_start;
1410 goto error;
1411 }
1412
1413 WARN_ON_ONCE(nolock);
1414 goto out_check;
1415 }
1416 disk_bytenr += extent_offset;
1417 disk_bytenr += cur_offset - found_key.offset;
1418 num_bytes = min(end + 1, extent_end) - cur_offset;
1419
1420
1421
1422
1423 if (!nolock && atomic_read(&root->snapshot_force_cow))
1424 goto out_check;
1425
1426
1427
1428
1429
1430 ret = csum_exist_in_range(fs_info, disk_bytenr,
1431 num_bytes);
1432 if (ret) {
1433
1434
1435
1436
1437 if (ret < 0) {
1438 if (cow_start != (u64)-1)
1439 cur_offset = cow_start;
1440 goto error;
1441 }
1442 WARN_ON_ONCE(nolock);
1443 goto out_check;
1444 }
1445 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
1446 goto out_check;
1447 nocow = 1;
1448 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1449 extent_end = found_key.offset +
1450 btrfs_file_extent_ram_bytes(leaf, fi);
1451 extent_end = ALIGN(extent_end,
1452 fs_info->sectorsize);
1453 } else {
1454 BUG_ON(1);
1455 }
1456out_check:
1457 if (extent_end <= start) {
1458 path->slots[0]++;
1459 if (nocow)
1460 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1461 goto next_slot;
1462 }
1463 if (!nocow) {
1464 if (cow_start == (u64)-1)
1465 cow_start = cur_offset;
1466 cur_offset = extent_end;
1467 if (cur_offset > end)
1468 break;
1469 path->slots[0]++;
1470 goto next_slot;
1471 }
1472
1473 btrfs_release_path(path);
1474 if (cow_start != (u64)-1) {
1475 ret = cow_file_range(inode, locked_page,
1476 cow_start, found_key.offset - 1,
1477 end, page_started, nr_written, 1,
1478 NULL);
1479 if (ret) {
1480 if (nocow)
1481 btrfs_dec_nocow_writers(fs_info,
1482 disk_bytenr);
1483 goto error;
1484 }
1485 cow_start = (u64)-1;
1486 }
1487
1488 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1489 u64 orig_start = found_key.offset - extent_offset;
1490
1491 em = create_io_em(inode, cur_offset, num_bytes,
1492 orig_start,
1493 disk_bytenr,
1494 num_bytes,
1495 disk_num_bytes,
1496 ram_bytes, BTRFS_COMPRESS_NONE,
1497 BTRFS_ORDERED_PREALLOC);
1498 if (IS_ERR(em)) {
1499 if (nocow)
1500 btrfs_dec_nocow_writers(fs_info,
1501 disk_bytenr);
1502 ret = PTR_ERR(em);
1503 goto error;
1504 }
1505 free_extent_map(em);
1506 }
1507
1508 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1509 type = BTRFS_ORDERED_PREALLOC;
1510 } else {
1511 type = BTRFS_ORDERED_NOCOW;
1512 }
1513
1514 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1515 num_bytes, num_bytes, type);
1516 if (nocow)
1517 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1518 BUG_ON(ret);
1519
1520 if (root->root_key.objectid ==
1521 BTRFS_DATA_RELOC_TREE_OBJECTID)
1522
1523
1524
1525
1526
1527 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1528 num_bytes);
1529
1530 extent_clear_unlock_delalloc(inode, cur_offset,
1531 cur_offset + num_bytes - 1, end,
1532 locked_page, EXTENT_LOCKED |
1533 EXTENT_DELALLOC |
1534 EXTENT_CLEAR_DATA_RESV,
1535 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1536
1537 cur_offset = extent_end;
1538
1539
1540
1541
1542
1543
1544 if (ret)
1545 goto error;
1546 if (cur_offset > end)
1547 break;
1548 }
1549 btrfs_release_path(path);
1550
1551 if (cur_offset <= end && cow_start == (u64)-1)
1552 cow_start = cur_offset;
1553
1554 if (cow_start != (u64)-1) {
1555 cur_offset = end;
1556 ret = cow_file_range(inode, locked_page, cow_start, end, end,
1557 page_started, nr_written, 1, NULL);
1558 if (ret)
1559 goto error;
1560 }
1561
1562error:
1563 if (ret && cur_offset < end)
1564 extent_clear_unlock_delalloc(inode, cur_offset, end, end,
1565 locked_page, EXTENT_LOCKED |
1566 EXTENT_DELALLOC | EXTENT_DEFRAG |
1567 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1568 PAGE_CLEAR_DIRTY |
1569 PAGE_SET_WRITEBACK |
1570 PAGE_END_WRITEBACK);
1571 btrfs_free_path(path);
1572 return ret;
1573}
1574
1575static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
1576{
1577
1578 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
1579 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
1580 return 0;
1581
1582
1583
1584
1585
1586
1587 if (BTRFS_I(inode)->defrag_bytes &&
1588 test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1589 EXTENT_DEFRAG, 0, NULL))
1590 return 1;
1591
1592 return 0;
1593}
1594
1595
1596
1597
1598
1599int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
1600 u64 start, u64 end, int *page_started, unsigned long *nr_written,
1601 struct writeback_control *wbc)
1602{
1603 int ret;
1604 int force_cow = need_force_cow(inode, start, end);
1605 unsigned int write_flags = wbc_to_write_flags(wbc);
1606
1607 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1608 ret = run_delalloc_nocow(inode, locked_page, start, end,
1609 page_started, 1, nr_written);
1610 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1611 ret = run_delalloc_nocow(inode, locked_page, start, end,
1612 page_started, 0, nr_written);
1613 } else if (!inode_need_compress(inode, start, end)) {
1614 ret = cow_file_range(inode, locked_page, start, end, end,
1615 page_started, nr_written, 1, NULL);
1616 } else {
1617 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1618 &BTRFS_I(inode)->runtime_flags);
1619 ret = cow_file_range_async(inode, locked_page, start, end,
1620 page_started, nr_written,
1621 write_flags);
1622 }
1623 if (ret)
1624 btrfs_cleanup_ordered_extents(inode, locked_page, start,
1625 end - start + 1);
1626 return ret;
1627}
1628
1629void btrfs_split_delalloc_extent(struct inode *inode,
1630 struct extent_state *orig, u64 split)
1631{
1632 u64 size;
1633
1634
1635 if (!(orig->state & EXTENT_DELALLOC))
1636 return;
1637
1638 size = orig->end - orig->start + 1;
1639 if (size > BTRFS_MAX_EXTENT_SIZE) {
1640 u32 num_extents;
1641 u64 new_size;
1642
1643
1644
1645
1646
1647 new_size = orig->end - split + 1;
1648 num_extents = count_max_extents(new_size);
1649 new_size = split - orig->start;
1650 num_extents += count_max_extents(new_size);
1651 if (count_max_extents(size) >= num_extents)
1652 return;
1653 }
1654
1655 spin_lock(&BTRFS_I(inode)->lock);
1656 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
1657 spin_unlock(&BTRFS_I(inode)->lock);
1658}
1659
1660
1661
1662
1663
1664
1665void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
1666 struct extent_state *other)
1667{
1668 u64 new_size, old_size;
1669 u32 num_extents;
1670
1671
1672 if (!(other->state & EXTENT_DELALLOC))
1673 return;
1674
1675 if (new->start > other->start)
1676 new_size = new->end - other->start + 1;
1677 else
1678 new_size = other->end - new->start + 1;
1679
1680
1681 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1682 spin_lock(&BTRFS_I(inode)->lock);
1683 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1684 spin_unlock(&BTRFS_I(inode)->lock);
1685 return;
1686 }
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706 old_size = other->end - other->start + 1;
1707 num_extents = count_max_extents(old_size);
1708 old_size = new->end - new->start + 1;
1709 num_extents += count_max_extents(old_size);
1710 if (count_max_extents(new_size) >= num_extents)
1711 return;
1712
1713 spin_lock(&BTRFS_I(inode)->lock);
1714 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1715 spin_unlock(&BTRFS_I(inode)->lock);
1716}
1717
1718static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1719 struct inode *inode)
1720{
1721 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1722
1723 spin_lock(&root->delalloc_lock);
1724 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1725 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1726 &root->delalloc_inodes);
1727 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1728 &BTRFS_I(inode)->runtime_flags);
1729 root->nr_delalloc_inodes++;
1730 if (root->nr_delalloc_inodes == 1) {
1731 spin_lock(&fs_info->delalloc_root_lock);
1732 BUG_ON(!list_empty(&root->delalloc_root));
1733 list_add_tail(&root->delalloc_root,
1734 &fs_info->delalloc_roots);
1735 spin_unlock(&fs_info->delalloc_root_lock);
1736 }
1737 }
1738 spin_unlock(&root->delalloc_lock);
1739}
1740
1741
1742void __btrfs_del_delalloc_inode(struct btrfs_root *root,
1743 struct btrfs_inode *inode)
1744{
1745 struct btrfs_fs_info *fs_info = root->fs_info;
1746
1747 if (!list_empty(&inode->delalloc_inodes)) {
1748 list_del_init(&inode->delalloc_inodes);
1749 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1750 &inode->runtime_flags);
1751 root->nr_delalloc_inodes--;
1752 if (!root->nr_delalloc_inodes) {
1753 ASSERT(list_empty(&root->delalloc_inodes));
1754 spin_lock(&fs_info->delalloc_root_lock);
1755 BUG_ON(list_empty(&root->delalloc_root));
1756 list_del_init(&root->delalloc_root);
1757 spin_unlock(&fs_info->delalloc_root_lock);
1758 }
1759 }
1760}
1761
1762static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1763 struct btrfs_inode *inode)
1764{
1765 spin_lock(&root->delalloc_lock);
1766 __btrfs_del_delalloc_inode(root, inode);
1767 spin_unlock(&root->delalloc_lock);
1768}
1769
1770
1771
1772
1773
1774void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
1775 unsigned *bits)
1776{
1777 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1778
1779 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
1780 WARN_ON(1);
1781
1782
1783
1784
1785
1786 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1787 struct btrfs_root *root = BTRFS_I(inode)->root;
1788 u64 len = state->end + 1 - state->start;
1789 u32 num_extents = count_max_extents(len);
1790 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
1791
1792 spin_lock(&BTRFS_I(inode)->lock);
1793 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
1794 spin_unlock(&BTRFS_I(inode)->lock);
1795
1796
1797 if (btrfs_is_testing(fs_info))
1798 return;
1799
1800 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
1801 fs_info->delalloc_batch);
1802 spin_lock(&BTRFS_I(inode)->lock);
1803 BTRFS_I(inode)->delalloc_bytes += len;
1804 if (*bits & EXTENT_DEFRAG)
1805 BTRFS_I(inode)->defrag_bytes += len;
1806 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1807 &BTRFS_I(inode)->runtime_flags))
1808 btrfs_add_delalloc_inodes(root, inode);
1809 spin_unlock(&BTRFS_I(inode)->lock);
1810 }
1811
1812 if (!(state->state & EXTENT_DELALLOC_NEW) &&
1813 (*bits & EXTENT_DELALLOC_NEW)) {
1814 spin_lock(&BTRFS_I(inode)->lock);
1815 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
1816 state->start;
1817 spin_unlock(&BTRFS_I(inode)->lock);
1818 }
1819}
1820
1821
1822
1823
1824
1825void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
1826 struct extent_state *state, unsigned *bits)
1827{
1828 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
1829 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
1830 u64 len = state->end + 1 - state->start;
1831 u32 num_extents = count_max_extents(len);
1832
1833 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
1834 spin_lock(&inode->lock);
1835 inode->defrag_bytes -= len;
1836 spin_unlock(&inode->lock);
1837 }
1838
1839
1840
1841
1842
1843
1844 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1845 struct btrfs_root *root = inode->root;
1846 bool do_list = !btrfs_is_free_space_inode(inode);
1847
1848 spin_lock(&inode->lock);
1849 btrfs_mod_outstanding_extents(inode, -num_extents);
1850 spin_unlock(&inode->lock);
1851
1852
1853
1854
1855
1856
1857 if (*bits & EXTENT_CLEAR_META_RESV &&
1858 root != fs_info->tree_root)
1859 btrfs_delalloc_release_metadata(inode, len, false);
1860
1861
1862 if (btrfs_is_testing(fs_info))
1863 return;
1864
1865 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
1866 do_list && !(state->state & EXTENT_NORESERVE) &&
1867 (*bits & EXTENT_CLEAR_DATA_RESV))
1868 btrfs_free_reserved_data_space_noquota(
1869 &inode->vfs_inode,
1870 state->start, len);
1871
1872 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
1873 fs_info->delalloc_batch);
1874 spin_lock(&inode->lock);
1875 inode->delalloc_bytes -= len;
1876 if (do_list && inode->delalloc_bytes == 0 &&
1877 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1878 &inode->runtime_flags))
1879 btrfs_del_delalloc_inode(root, inode);
1880 spin_unlock(&inode->lock);
1881 }
1882
1883 if ((state->state & EXTENT_DELALLOC_NEW) &&
1884 (*bits & EXTENT_DELALLOC_NEW)) {
1885 spin_lock(&inode->lock);
1886 ASSERT(inode->new_delalloc_bytes >= len);
1887 inode->new_delalloc_bytes -= len;
1888 spin_unlock(&inode->lock);
1889 }
1890}
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
1907 unsigned long bio_flags)
1908{
1909 struct inode *inode = page->mapping->host;
1910 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1911 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1912 u64 length = 0;
1913 u64 map_length;
1914 int ret;
1915
1916 if (bio_flags & EXTENT_BIO_COMPRESSED)
1917 return 0;
1918
1919 length = bio->bi_iter.bi_size;
1920 map_length = length;
1921 ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
1922 NULL, 0);
1923 if (ret < 0)
1924 return ret;
1925 if (map_length < length + size)
1926 return 1;
1927 return 0;
1928}
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
1939 u64 bio_offset)
1940{
1941 struct inode *inode = private_data;
1942 blk_status_t ret = 0;
1943
1944 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
1945 BUG_ON(ret);
1946 return 0;
1947}
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
1968 int mirror_num, unsigned long bio_flags,
1969 u64 bio_offset)
1970{
1971 struct inode *inode = private_data;
1972 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1973 struct btrfs_root *root = BTRFS_I(inode)->root;
1974 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1975 blk_status_t ret = 0;
1976 int skip_sum;
1977 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1978
1979 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1980
1981 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
1982 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
1983
1984 if (bio_op(bio) != REQ_OP_WRITE) {
1985 ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
1986 if (ret)
1987 goto out;
1988
1989 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1990 ret = btrfs_submit_compressed_read(inode, bio,
1991 mirror_num,
1992 bio_flags);
1993 goto out;
1994 } else if (!skip_sum) {
1995 ret = btrfs_lookup_bio_sums(inode, bio, NULL);
1996 if (ret)
1997 goto out;
1998 }
1999 goto mapit;
2000 } else if (async && !skip_sum) {
2001
2002 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2003 goto mapit;
2004
2005 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
2006 bio_offset, inode,
2007 btrfs_submit_bio_start);
2008 goto out;
2009 } else if (!skip_sum) {
2010 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
2011 if (ret)
2012 goto out;
2013 }
2014
2015mapit:
2016 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
2017
2018out:
2019 if (ret) {
2020 bio->bi_status = ret;
2021 bio_endio(bio);
2022 }
2023 return ret;
2024}
2025
2026
2027
2028
2029
2030static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
2031 struct inode *inode, struct list_head *list)
2032{
2033 struct btrfs_ordered_sum *sum;
2034 int ret;
2035
2036 list_for_each_entry(sum, list, list) {
2037 trans->adding_csums = true;
2038 ret = btrfs_csum_file_blocks(trans,
2039 BTRFS_I(inode)->root->fs_info->csum_root, sum);
2040 trans->adding_csums = false;
2041 if (ret)
2042 return ret;
2043 }
2044 return 0;
2045}
2046
2047int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2048 unsigned int extra_bits,
2049 struct extent_state **cached_state, int dedupe)
2050{
2051 WARN_ON(PAGE_ALIGNED(end));
2052 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
2053 extra_bits, cached_state);
2054}
2055
2056
2057struct btrfs_writepage_fixup {
2058 struct page *page;
2059 struct btrfs_work work;
2060};
2061
2062static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2063{
2064 struct btrfs_writepage_fixup *fixup;
2065 struct btrfs_ordered_extent *ordered;
2066 struct extent_state *cached_state = NULL;
2067 struct extent_changeset *data_reserved = NULL;
2068 struct page *page;
2069 struct inode *inode;
2070 u64 page_start;
2071 u64 page_end;
2072 int ret;
2073
2074 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2075 page = fixup->page;
2076again:
2077 lock_page(page);
2078 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2079 ClearPageChecked(page);
2080 goto out_page;
2081 }
2082
2083 inode = page->mapping->host;
2084 page_start = page_offset(page);
2085 page_end = page_offset(page) + PAGE_SIZE - 1;
2086
2087 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
2088 &cached_state);
2089
2090
2091 if (PagePrivate2(page))
2092 goto out;
2093
2094 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
2095 PAGE_SIZE);
2096 if (ordered) {
2097 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
2098 page_end, &cached_state);
2099 unlock_page(page);
2100 btrfs_start_ordered_extent(inode, ordered, 1);
2101 btrfs_put_ordered_extent(ordered);
2102 goto again;
2103 }
2104
2105 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2106 PAGE_SIZE);
2107 if (ret) {
2108 mapping_set_error(page->mapping, ret);
2109 end_extent_writepage(page, ret, page_start, page_end);
2110 ClearPageChecked(page);
2111 goto out;
2112 }
2113
2114 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2115 &cached_state, 0);
2116 if (ret) {
2117 mapping_set_error(page->mapping, ret);
2118 end_extent_writepage(page, ret, page_start, page_end);
2119 ClearPageChecked(page);
2120 goto out;
2121 }
2122
2123 ClearPageChecked(page);
2124 set_page_dirty(page);
2125 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
2126out:
2127 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2128 &cached_state);
2129out_page:
2130 unlock_page(page);
2131 put_page(page);
2132 kfree(fixup);
2133 extent_changeset_free(data_reserved);
2134}
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
2148{
2149 struct inode *inode = page->mapping->host;
2150 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2151 struct btrfs_writepage_fixup *fixup;
2152
2153
2154 if (TestClearPagePrivate2(page))
2155 return 0;
2156
2157 if (PageChecked(page))
2158 return -EAGAIN;
2159
2160 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2161 if (!fixup)
2162 return -EAGAIN;
2163
2164 SetPageChecked(page);
2165 get_page(page);
2166 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
2167 btrfs_writepage_fixup_worker, NULL, NULL);
2168 fixup->page = page;
2169 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
2170 return -EBUSY;
2171}
2172
2173static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2174 struct inode *inode, u64 file_pos,
2175 u64 disk_bytenr, u64 disk_num_bytes,
2176 u64 num_bytes, u64 ram_bytes,
2177 u8 compression, u8 encryption,
2178 u16 other_encoding, int extent_type)
2179{
2180 struct btrfs_root *root = BTRFS_I(inode)->root;
2181 struct btrfs_file_extent_item *fi;
2182 struct btrfs_path *path;
2183 struct extent_buffer *leaf;
2184 struct btrfs_key ins;
2185 u64 qg_released;
2186 int extent_inserted = 0;
2187 int ret;
2188
2189 path = btrfs_alloc_path();
2190 if (!path)
2191 return -ENOMEM;
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
2203 file_pos + num_bytes, NULL, 0,
2204 1, sizeof(*fi), &extent_inserted);
2205 if (ret)
2206 goto out;
2207
2208 if (!extent_inserted) {
2209 ins.objectid = btrfs_ino(BTRFS_I(inode));
2210 ins.offset = file_pos;
2211 ins.type = BTRFS_EXTENT_DATA_KEY;
2212
2213 path->leave_spinning = 1;
2214 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2215 sizeof(*fi));
2216 if (ret)
2217 goto out;
2218 }
2219 leaf = path->nodes[0];
2220 fi = btrfs_item_ptr(leaf, path->slots[0],
2221 struct btrfs_file_extent_item);
2222 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
2223 btrfs_set_file_extent_type(leaf, fi, extent_type);
2224 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
2225 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
2226 btrfs_set_file_extent_offset(leaf, fi, 0);
2227 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2228 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
2229 btrfs_set_file_extent_compression(leaf, fi, compression);
2230 btrfs_set_file_extent_encryption(leaf, fi, encryption);
2231 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
2232
2233 btrfs_mark_buffer_dirty(leaf);
2234 btrfs_release_path(path);
2235
2236 inode_add_bytes(inode, num_bytes);
2237
2238 ins.objectid = disk_bytenr;
2239 ins.offset = disk_num_bytes;
2240 ins.type = BTRFS_EXTENT_ITEM_KEY;
2241
2242
2243
2244
2245
2246 ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
2247 if (ret < 0)
2248 goto out;
2249 qg_released = ret;
2250 ret = btrfs_alloc_reserved_file_extent(trans, root,
2251 btrfs_ino(BTRFS_I(inode)),
2252 file_pos, qg_released, &ins);
2253out:
2254 btrfs_free_path(path);
2255
2256 return ret;
2257}
2258
2259
2260struct sa_defrag_extent_backref {
2261 struct rb_node node;
2262 struct old_sa_defrag_extent *old;
2263 u64 root_id;
2264 u64 inum;
2265 u64 file_pos;
2266 u64 extent_offset;
2267 u64 num_bytes;
2268 u64 generation;
2269};
2270
2271struct old_sa_defrag_extent {
2272 struct list_head list;
2273 struct new_sa_defrag_extent *new;
2274
2275 u64 extent_offset;
2276 u64 bytenr;
2277 u64 offset;
2278 u64 len;
2279 int count;
2280};
2281
2282struct new_sa_defrag_extent {
2283 struct rb_root root;
2284 struct list_head head;
2285 struct btrfs_path *path;
2286 struct inode *inode;
2287 u64 file_pos;
2288 u64 len;
2289 u64 bytenr;
2290 u64 disk_len;
2291 u8 compress_type;
2292};
2293
2294static int backref_comp(struct sa_defrag_extent_backref *b1,
2295 struct sa_defrag_extent_backref *b2)
2296{
2297 if (b1->root_id < b2->root_id)
2298 return -1;
2299 else if (b1->root_id > b2->root_id)
2300 return 1;
2301
2302 if (b1->inum < b2->inum)
2303 return -1;
2304 else if (b1->inum > b2->inum)
2305 return 1;
2306
2307 if (b1->file_pos < b2->file_pos)
2308 return -1;
2309 else if (b1->file_pos > b2->file_pos)
2310 return 1;
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324 return 0;
2325}
2326
2327static void backref_insert(struct rb_root *root,
2328 struct sa_defrag_extent_backref *backref)
2329{
2330 struct rb_node **p = &root->rb_node;
2331 struct rb_node *parent = NULL;
2332 struct sa_defrag_extent_backref *entry;
2333 int ret;
2334
2335 while (*p) {
2336 parent = *p;
2337 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2338
2339 ret = backref_comp(backref, entry);
2340 if (ret < 0)
2341 p = &(*p)->rb_left;
2342 else
2343 p = &(*p)->rb_right;
2344 }
2345
2346 rb_link_node(&backref->node, parent, p);
2347 rb_insert_color(&backref->node, root);
2348}
2349
2350
2351
2352
2353static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2354 void *ctx)
2355{
2356 struct btrfs_file_extent_item *extent;
2357 struct old_sa_defrag_extent *old = ctx;
2358 struct new_sa_defrag_extent *new = old->new;
2359 struct btrfs_path *path = new->path;
2360 struct btrfs_key key;
2361 struct btrfs_root *root;
2362 struct sa_defrag_extent_backref *backref;
2363 struct extent_buffer *leaf;
2364 struct inode *inode = new->inode;
2365 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2366 int slot;
2367 int ret;
2368 u64 extent_offset;
2369 u64 num_bytes;
2370
2371 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2372 inum == btrfs_ino(BTRFS_I(inode)))
2373 return 0;
2374
2375 key.objectid = root_id;
2376 key.type = BTRFS_ROOT_ITEM_KEY;
2377 key.offset = (u64)-1;
2378
2379 root = btrfs_read_fs_root_no_name(fs_info, &key);
2380 if (IS_ERR(root)) {
2381 if (PTR_ERR(root) == -ENOENT)
2382 return 0;
2383 WARN_ON(1);
2384 btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu",
2385 inum, offset, root_id);
2386 return PTR_ERR(root);
2387 }
2388
2389 key.objectid = inum;
2390 key.type = BTRFS_EXTENT_DATA_KEY;
2391 if (offset > (u64)-1 << 32)
2392 key.offset = 0;
2393 else
2394 key.offset = offset;
2395
2396 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2397 if (WARN_ON(ret < 0))
2398 return ret;
2399 ret = 0;
2400
2401 while (1) {
2402 cond_resched();
2403
2404 leaf = path->nodes[0];
2405 slot = path->slots[0];
2406
2407 if (slot >= btrfs_header_nritems(leaf)) {
2408 ret = btrfs_next_leaf(root, path);
2409 if (ret < 0) {
2410 goto out;
2411 } else if (ret > 0) {
2412 ret = 0;
2413 goto out;
2414 }
2415 continue;
2416 }
2417
2418 path->slots[0]++;
2419
2420 btrfs_item_key_to_cpu(leaf, &key, slot);
2421
2422 if (key.objectid > inum)
2423 goto out;
2424
2425 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2426 continue;
2427
2428 extent = btrfs_item_ptr(leaf, slot,
2429 struct btrfs_file_extent_item);
2430
2431 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2432 continue;
2433
2434
2435
2436
2437
2438
2439 if (key.offset != offset)
2440 continue;
2441
2442 extent_offset = btrfs_file_extent_offset(leaf, extent);
2443 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2444
2445 if (extent_offset >= old->extent_offset + old->offset +
2446 old->len || extent_offset + num_bytes <=
2447 old->extent_offset + old->offset)
2448 continue;
2449 break;
2450 }
2451
2452 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2453 if (!backref) {
2454 ret = -ENOENT;
2455 goto out;
2456 }
2457
2458 backref->root_id = root_id;
2459 backref->inum = inum;
2460 backref->file_pos = offset;
2461 backref->num_bytes = num_bytes;
2462 backref->extent_offset = extent_offset;
2463 backref->generation = btrfs_file_extent_generation(leaf, extent);
2464 backref->old = old;
2465 backref_insert(&new->root, backref);
2466 old->count++;
2467out:
2468 btrfs_release_path(path);
2469 WARN_ON(ret);
2470 return ret;
2471}
2472
2473static noinline bool record_extent_backrefs(struct btrfs_path *path,
2474 struct new_sa_defrag_extent *new)
2475{
2476 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2477 struct old_sa_defrag_extent *old, *tmp;
2478 int ret;
2479
2480 new->path = path;
2481
2482 list_for_each_entry_safe(old, tmp, &new->head, list) {
2483 ret = iterate_inodes_from_logical(old->bytenr +
2484 old->extent_offset, fs_info,
2485 path, record_one_backref,
2486 old, false);
2487 if (ret < 0 && ret != -ENOENT)
2488 return false;
2489
2490
2491 if (!old->count) {
2492 list_del(&old->list);
2493 kfree(old);
2494 }
2495 }
2496
2497 if (list_empty(&new->head))
2498 return false;
2499
2500 return true;
2501}
2502
2503static int relink_is_mergable(struct extent_buffer *leaf,
2504 struct btrfs_file_extent_item *fi,
2505 struct new_sa_defrag_extent *new)
2506{
2507 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2508 return 0;
2509
2510 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2511 return 0;
2512
2513 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2514 return 0;
2515
2516 if (btrfs_file_extent_encryption(leaf, fi) ||
2517 btrfs_file_extent_other_encoding(leaf, fi))
2518 return 0;
2519
2520 return 1;
2521}
2522
2523
2524
2525
2526static noinline int relink_extent_backref(struct btrfs_path *path,
2527 struct sa_defrag_extent_backref *prev,
2528 struct sa_defrag_extent_backref *backref)
2529{
2530 struct btrfs_file_extent_item *extent;
2531 struct btrfs_file_extent_item *item;
2532 struct btrfs_ordered_extent *ordered;
2533 struct btrfs_trans_handle *trans;
2534 struct btrfs_root *root;
2535 struct btrfs_key key;
2536 struct extent_buffer *leaf;
2537 struct old_sa_defrag_extent *old = backref->old;
2538 struct new_sa_defrag_extent *new = old->new;
2539 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2540 struct inode *inode;
2541 struct extent_state *cached = NULL;
2542 int ret = 0;
2543 u64 start;
2544 u64 len;
2545 u64 lock_start;
2546 u64 lock_end;
2547 bool merge = false;
2548 int index;
2549
2550 if (prev && prev->root_id == backref->root_id &&
2551 prev->inum == backref->inum &&
2552 prev->file_pos + prev->num_bytes == backref->file_pos)
2553 merge = true;
2554
2555
2556 key.objectid = backref->root_id;
2557 key.type = BTRFS_ROOT_ITEM_KEY;
2558 key.offset = (u64)-1;
2559
2560 index = srcu_read_lock(&fs_info->subvol_srcu);
2561
2562 root = btrfs_read_fs_root_no_name(fs_info, &key);
2563 if (IS_ERR(root)) {
2564 srcu_read_unlock(&fs_info->subvol_srcu, index);
2565 if (PTR_ERR(root) == -ENOENT)
2566 return 0;
2567 return PTR_ERR(root);
2568 }
2569
2570 if (btrfs_root_readonly(root)) {
2571 srcu_read_unlock(&fs_info->subvol_srcu, index);
2572 return 0;
2573 }
2574
2575
2576 key.objectid = backref->inum;
2577 key.type = BTRFS_INODE_ITEM_KEY;
2578 key.offset = 0;
2579
2580 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2581 if (IS_ERR(inode)) {
2582 srcu_read_unlock(&fs_info->subvol_srcu, index);
2583 return 0;
2584 }
2585
2586 srcu_read_unlock(&fs_info->subvol_srcu, index);
2587
2588
2589 lock_start = backref->file_pos;
2590 lock_end = backref->file_pos + backref->num_bytes - 1;
2591 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2592 &cached);
2593
2594 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2595 if (ordered) {
2596 btrfs_put_ordered_extent(ordered);
2597 goto out_unlock;
2598 }
2599
2600 trans = btrfs_join_transaction(root);
2601 if (IS_ERR(trans)) {
2602 ret = PTR_ERR(trans);
2603 goto out_unlock;
2604 }
2605
2606 key.objectid = backref->inum;
2607 key.type = BTRFS_EXTENT_DATA_KEY;
2608 key.offset = backref->file_pos;
2609
2610 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2611 if (ret < 0) {
2612 goto out_free_path;
2613 } else if (ret > 0) {
2614 ret = 0;
2615 goto out_free_path;
2616 }
2617
2618 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2619 struct btrfs_file_extent_item);
2620
2621 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2622 backref->generation)
2623 goto out_free_path;
2624
2625 btrfs_release_path(path);
2626
2627 start = backref->file_pos;
2628 if (backref->extent_offset < old->extent_offset + old->offset)
2629 start += old->extent_offset + old->offset -
2630 backref->extent_offset;
2631
2632 len = min(backref->extent_offset + backref->num_bytes,
2633 old->extent_offset + old->offset + old->len);
2634 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2635
2636 ret = btrfs_drop_extents(trans, root, inode, start,
2637 start + len, 1);
2638 if (ret)
2639 goto out_free_path;
2640again:
2641 key.objectid = btrfs_ino(BTRFS_I(inode));
2642 key.type = BTRFS_EXTENT_DATA_KEY;
2643 key.offset = start;
2644
2645 path->leave_spinning = 1;
2646 if (merge) {
2647 struct btrfs_file_extent_item *fi;
2648 u64 extent_len;
2649 struct btrfs_key found_key;
2650
2651 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2652 if (ret < 0)
2653 goto out_free_path;
2654
2655 path->slots[0]--;
2656 leaf = path->nodes[0];
2657 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2658
2659 fi = btrfs_item_ptr(leaf, path->slots[0],
2660 struct btrfs_file_extent_item);
2661 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2662
2663 if (extent_len + found_key.offset == start &&
2664 relink_is_mergable(leaf, fi, new)) {
2665 btrfs_set_file_extent_num_bytes(leaf, fi,
2666 extent_len + len);
2667 btrfs_mark_buffer_dirty(leaf);
2668 inode_add_bytes(inode, len);
2669
2670 ret = 1;
2671 goto out_free_path;
2672 } else {
2673 merge = false;
2674 btrfs_release_path(path);
2675 goto again;
2676 }
2677 }
2678
2679 ret = btrfs_insert_empty_item(trans, root, path, &key,
2680 sizeof(*extent));
2681 if (ret) {
2682 btrfs_abort_transaction(trans, ret);
2683 goto out_free_path;
2684 }
2685
2686 leaf = path->nodes[0];
2687 item = btrfs_item_ptr(leaf, path->slots[0],
2688 struct btrfs_file_extent_item);
2689 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2690 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2691 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2692 btrfs_set_file_extent_num_bytes(leaf, item, len);
2693 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2694 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2695 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2696 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2697 btrfs_set_file_extent_encryption(leaf, item, 0);
2698 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2699
2700 btrfs_mark_buffer_dirty(leaf);
2701 inode_add_bytes(inode, len);
2702 btrfs_release_path(path);
2703
2704 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2705 new->disk_len, 0,
2706 backref->root_id, backref->inum,
2707 new->file_pos);
2708 if (ret) {
2709 btrfs_abort_transaction(trans, ret);
2710 goto out_free_path;
2711 }
2712
2713 ret = 1;
2714out_free_path:
2715 btrfs_release_path(path);
2716 path->leave_spinning = 0;
2717 btrfs_end_transaction(trans);
2718out_unlock:
2719 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2720 &cached);
2721 iput(inode);
2722 return ret;
2723}
2724
2725static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2726{
2727 struct old_sa_defrag_extent *old, *tmp;
2728
2729 if (!new)
2730 return;
2731
2732 list_for_each_entry_safe(old, tmp, &new->head, list) {
2733 kfree(old);
2734 }
2735 kfree(new);
2736}
2737
2738static void relink_file_extents(struct new_sa_defrag_extent *new)
2739{
2740 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2741 struct btrfs_path *path;
2742 struct sa_defrag_extent_backref *backref;
2743 struct sa_defrag_extent_backref *prev = NULL;
2744 struct rb_node *node;
2745 int ret;
2746
2747 path = btrfs_alloc_path();
2748 if (!path)
2749 return;
2750
2751 if (!record_extent_backrefs(path, new)) {
2752 btrfs_free_path(path);
2753 goto out;
2754 }
2755 btrfs_release_path(path);
2756
2757 while (1) {
2758 node = rb_first(&new->root);
2759 if (!node)
2760 break;
2761 rb_erase(node, &new->root);
2762
2763 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2764
2765 ret = relink_extent_backref(path, prev, backref);
2766 WARN_ON(ret < 0);
2767
2768 kfree(prev);
2769
2770 if (ret == 1)
2771 prev = backref;
2772 else
2773 prev = NULL;
2774 cond_resched();
2775 }
2776 kfree(prev);
2777
2778 btrfs_free_path(path);
2779out:
2780 free_sa_defrag_extent(new);
2781
2782 atomic_dec(&fs_info->defrag_running);
2783 wake_up(&fs_info->transaction_wait);
2784}
2785
2786static struct new_sa_defrag_extent *
2787record_old_file_extents(struct inode *inode,
2788 struct btrfs_ordered_extent *ordered)
2789{
2790 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2791 struct btrfs_root *root = BTRFS_I(inode)->root;
2792 struct btrfs_path *path;
2793 struct btrfs_key key;
2794 struct old_sa_defrag_extent *old;
2795 struct new_sa_defrag_extent *new;
2796 int ret;
2797
2798 new = kmalloc(sizeof(*new), GFP_NOFS);
2799 if (!new)
2800 return NULL;
2801
2802 new->inode = inode;
2803 new->file_pos = ordered->file_offset;
2804 new->len = ordered->len;
2805 new->bytenr = ordered->start;
2806 new->disk_len = ordered->disk_len;
2807 new->compress_type = ordered->compress_type;
2808 new->root = RB_ROOT;
2809 INIT_LIST_HEAD(&new->head);
2810
2811 path = btrfs_alloc_path();
2812 if (!path)
2813 goto out_kfree;
2814
2815 key.objectid = btrfs_ino(BTRFS_I(inode));
2816 key.type = BTRFS_EXTENT_DATA_KEY;
2817 key.offset = new->file_pos;
2818
2819 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2820 if (ret < 0)
2821 goto out_free_path;
2822 if (ret > 0 && path->slots[0] > 0)
2823 path->slots[0]--;
2824
2825
2826 while (1) {
2827 struct btrfs_file_extent_item *extent;
2828 struct extent_buffer *l;
2829 int slot;
2830 u64 num_bytes;
2831 u64 offset;
2832 u64 end;
2833 u64 disk_bytenr;
2834 u64 extent_offset;
2835
2836 l = path->nodes[0];
2837 slot = path->slots[0];
2838
2839 if (slot >= btrfs_header_nritems(l)) {
2840 ret = btrfs_next_leaf(root, path);
2841 if (ret < 0)
2842 goto out_free_path;
2843 else if (ret > 0)
2844 break;
2845 continue;
2846 }
2847
2848 btrfs_item_key_to_cpu(l, &key, slot);
2849
2850 if (key.objectid != btrfs_ino(BTRFS_I(inode)))
2851 break;
2852 if (key.type != BTRFS_EXTENT_DATA_KEY)
2853 break;
2854 if (key.offset >= new->file_pos + new->len)
2855 break;
2856
2857 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2858
2859 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2860 if (key.offset + num_bytes < new->file_pos)
2861 goto next;
2862
2863 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2864 if (!disk_bytenr)
2865 goto next;
2866
2867 extent_offset = btrfs_file_extent_offset(l, extent);
2868
2869 old = kmalloc(sizeof(*old), GFP_NOFS);
2870 if (!old)
2871 goto out_free_path;
2872
2873 offset = max(new->file_pos, key.offset);
2874 end = min(new->file_pos + new->len, key.offset + num_bytes);
2875
2876 old->bytenr = disk_bytenr;
2877 old->extent_offset = extent_offset;
2878 old->offset = offset - key.offset;
2879 old->len = end - offset;
2880 old->new = new;
2881 old->count = 0;
2882 list_add_tail(&old->list, &new->head);
2883next:
2884 path->slots[0]++;
2885 cond_resched();
2886 }
2887
2888 btrfs_free_path(path);
2889 atomic_inc(&fs_info->defrag_running);
2890
2891 return new;
2892
2893out_free_path:
2894 btrfs_free_path(path);
2895out_kfree:
2896 free_sa_defrag_extent(new);
2897 return NULL;
2898}
2899
2900static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
2901 u64 start, u64 len)
2902{
2903 struct btrfs_block_group_cache *cache;
2904
2905 cache = btrfs_lookup_block_group(fs_info, start);
2906 ASSERT(cache);
2907
2908 spin_lock(&cache->lock);
2909 cache->delalloc_bytes -= len;
2910 spin_unlock(&cache->lock);
2911
2912 btrfs_put_block_group(cache);
2913}
2914
2915
2916
2917
2918
2919static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2920{
2921 struct inode *inode = ordered_extent->inode;
2922 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2923 struct btrfs_root *root = BTRFS_I(inode)->root;
2924 struct btrfs_trans_handle *trans = NULL;
2925 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2926 struct extent_state *cached_state = NULL;
2927 struct new_sa_defrag_extent *new = NULL;
2928 int compress_type = 0;
2929 int ret = 0;
2930 u64 logical_len = ordered_extent->len;
2931 bool nolock;
2932 bool truncated = false;
2933 bool range_locked = false;
2934 bool clear_new_delalloc_bytes = false;
2935 bool clear_reserved_extent = true;
2936
2937 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2938 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
2939 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
2940 clear_new_delalloc_bytes = true;
2941
2942 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
2943
2944 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2945 ret = -EIO;
2946 goto out;
2947 }
2948
2949 btrfs_free_io_failure_record(BTRFS_I(inode),
2950 ordered_extent->file_offset,
2951 ordered_extent->file_offset +
2952 ordered_extent->len - 1);
2953
2954 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2955 truncated = true;
2956 logical_len = ordered_extent->truncated_len;
2957
2958 if (!logical_len)
2959 goto out;
2960 }
2961
2962 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2963 BUG_ON(!list_empty(&ordered_extent->list));
2964
2965
2966
2967
2968
2969
2970 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
2971 ordered_extent->len);
2972 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2973 if (nolock)
2974 trans = btrfs_join_transaction_nolock(root);
2975 else
2976 trans = btrfs_join_transaction(root);
2977 if (IS_ERR(trans)) {
2978 ret = PTR_ERR(trans);
2979 trans = NULL;
2980 goto out;
2981 }
2982 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
2983 ret = btrfs_update_inode_fallback(trans, root, inode);
2984 if (ret)
2985 btrfs_abort_transaction(trans, ret);
2986 goto out;
2987 }
2988
2989 range_locked = true;
2990 lock_extent_bits(io_tree, ordered_extent->file_offset,
2991 ordered_extent->file_offset + ordered_extent->len - 1,
2992 &cached_state);
2993
2994 ret = test_range_bit(io_tree, ordered_extent->file_offset,
2995 ordered_extent->file_offset + ordered_extent->len - 1,
2996 EXTENT_DEFRAG, 0, cached_state);
2997 if (ret) {
2998 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2999 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
3000
3001 new = record_old_file_extents(inode, ordered_extent);
3002
3003 clear_extent_bit(io_tree, ordered_extent->file_offset,
3004 ordered_extent->file_offset + ordered_extent->len - 1,
3005 EXTENT_DEFRAG, 0, 0, &cached_state);
3006 }
3007
3008 if (nolock)
3009 trans = btrfs_join_transaction_nolock(root);
3010 else
3011 trans = btrfs_join_transaction(root);
3012 if (IS_ERR(trans)) {
3013 ret = PTR_ERR(trans);
3014 trans = NULL;
3015 goto out;
3016 }
3017
3018 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
3019
3020 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
3021 compress_type = ordered_extent->compress_type;
3022 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3023 BUG_ON(compress_type);
3024 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
3025 ordered_extent->len);
3026 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
3027 ordered_extent->file_offset,
3028 ordered_extent->file_offset +
3029 logical_len);
3030 } else {
3031 BUG_ON(root == fs_info->tree_root);
3032 ret = insert_reserved_file_extent(trans, inode,
3033 ordered_extent->file_offset,
3034 ordered_extent->start,
3035 ordered_extent->disk_len,
3036 logical_len, logical_len,
3037 compress_type, 0, 0,
3038 BTRFS_FILE_EXTENT_REG);
3039 if (!ret) {
3040 clear_reserved_extent = false;
3041 btrfs_release_delalloc_bytes(fs_info,
3042 ordered_extent->start,
3043 ordered_extent->disk_len);
3044 }
3045 }
3046 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
3047 ordered_extent->file_offset, ordered_extent->len,
3048 trans->transid);
3049 if (ret < 0) {
3050 btrfs_abort_transaction(trans, ret);
3051 goto out;
3052 }
3053
3054 ret = add_pending_csums(trans, inode, &ordered_extent->list);
3055 if (ret) {
3056 btrfs_abort_transaction(trans, ret);
3057 goto out;
3058 }
3059
3060 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
3061 ret = btrfs_update_inode_fallback(trans, root, inode);
3062 if (ret) {
3063 btrfs_abort_transaction(trans, ret);
3064 goto out;
3065 }
3066 ret = 0;
3067out:
3068 if (range_locked || clear_new_delalloc_bytes) {
3069 unsigned int clear_bits = 0;
3070
3071 if (range_locked)
3072 clear_bits |= EXTENT_LOCKED;
3073 if (clear_new_delalloc_bytes)
3074 clear_bits |= EXTENT_DELALLOC_NEW;
3075 clear_extent_bit(&BTRFS_I(inode)->io_tree,
3076 ordered_extent->file_offset,
3077 ordered_extent->file_offset +
3078 ordered_extent->len - 1,
3079 clear_bits,
3080 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
3081 0, &cached_state);
3082 }
3083
3084 if (trans)
3085 btrfs_end_transaction(trans);
3086
3087 if (ret || truncated) {
3088 u64 start, end;
3089
3090 if (truncated)
3091 start = ordered_extent->file_offset + logical_len;
3092 else
3093 start = ordered_extent->file_offset;
3094 end = ordered_extent->file_offset + ordered_extent->len - 1;
3095 clear_extent_uptodate(io_tree, start, end, NULL);
3096
3097
3098 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110 if ((ret || !logical_len) &&
3111 clear_reserved_extent &&
3112 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3113 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
3114 btrfs_free_reserved_extent(fs_info,
3115 ordered_extent->start,
3116 ordered_extent->disk_len, 1);
3117 }
3118
3119
3120
3121
3122
3123
3124 btrfs_remove_ordered_extent(inode, ordered_extent);
3125
3126
3127 if (new) {
3128 if (ret) {
3129 free_sa_defrag_extent(new);
3130 atomic_dec(&fs_info->defrag_running);
3131 } else {
3132 relink_file_extents(new);
3133 }
3134 }
3135
3136
3137 btrfs_put_ordered_extent(ordered_extent);
3138
3139 btrfs_put_ordered_extent(ordered_extent);
3140
3141 return ret;
3142}
3143
3144static void finish_ordered_fn(struct btrfs_work *work)
3145{
3146 struct btrfs_ordered_extent *ordered_extent;
3147 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3148 btrfs_finish_ordered_io(ordered_extent);
3149}
3150
3151void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
3152 u64 end, int uptodate)
3153{
3154 struct inode *inode = page->mapping->host;
3155 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3156 struct btrfs_ordered_extent *ordered_extent = NULL;
3157 struct btrfs_workqueue *wq;
3158 btrfs_work_func_t func;
3159
3160 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
3161
3162 ClearPagePrivate2(page);
3163 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
3164 end - start + 1, uptodate))
3165 return;
3166
3167 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
3168 wq = fs_info->endio_freespace_worker;
3169 func = btrfs_freespace_write_helper;
3170 } else {
3171 wq = fs_info->endio_write_workers;
3172 func = btrfs_endio_write_helper;
3173 }
3174
3175 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
3176 NULL);
3177 btrfs_queue_work(wq, &ordered_extent->work);
3178}
3179
3180static int __readpage_endio_check(struct inode *inode,
3181 struct btrfs_io_bio *io_bio,
3182 int icsum, struct page *page,
3183 int pgoff, u64 start, size_t len)
3184{
3185 char *kaddr;
3186 u32 csum_expected;
3187 u32 csum = ~(u32)0;
3188
3189 csum_expected = *(((u32 *)io_bio->csum) + icsum);
3190
3191 kaddr = kmap_atomic(page);
3192 csum = btrfs_csum_data(kaddr + pgoff, csum, len);
3193 btrfs_csum_final(csum, (u8 *)&csum);
3194 if (csum != csum_expected)
3195 goto zeroit;
3196
3197 kunmap_atomic(kaddr);
3198 return 0;
3199zeroit:
3200 btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
3201 io_bio->mirror_num);
3202 memset(kaddr + pgoff, 1, len);
3203 flush_dcache_page(page);
3204 kunmap_atomic(kaddr);
3205 return -EIO;
3206}
3207
3208
3209
3210
3211
3212
3213static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
3214 u64 phy_offset, struct page *page,
3215 u64 start, u64 end, int mirror)
3216{
3217 size_t offset = start - page_offset(page);
3218 struct inode *inode = page->mapping->host;
3219 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3220 struct btrfs_root *root = BTRFS_I(inode)->root;
3221
3222 if (PageChecked(page)) {
3223 ClearPageChecked(page);
3224 return 0;
3225 }
3226
3227 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3228 return 0;
3229
3230 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
3231 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
3232 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
3233 return 0;
3234 }
3235
3236 phy_offset >>= inode->i_sb->s_blocksize_bits;
3237 return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
3238 start, (size_t)(end - start + 1));
3239}
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251void btrfs_add_delayed_iput(struct inode *inode)
3252{
3253 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3254 struct btrfs_inode *binode = BTRFS_I(inode);
3255
3256 if (atomic_add_unless(&inode->i_count, -1, 1))
3257 return;
3258
3259 atomic_inc(&fs_info->nr_delayed_iputs);
3260 spin_lock(&fs_info->delayed_iput_lock);
3261 ASSERT(list_empty(&binode->delayed_iput));
3262 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3263 spin_unlock(&fs_info->delayed_iput_lock);
3264 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3265 wake_up_process(fs_info->cleaner_kthread);
3266}
3267
3268void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
3269{
3270
3271 spin_lock(&fs_info->delayed_iput_lock);
3272 while (!list_empty(&fs_info->delayed_iputs)) {
3273 struct btrfs_inode *inode;
3274
3275 inode = list_first_entry(&fs_info->delayed_iputs,
3276 struct btrfs_inode, delayed_iput);
3277 list_del_init(&inode->delayed_iput);
3278 spin_unlock(&fs_info->delayed_iput_lock);
3279 iput(&inode->vfs_inode);
3280 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
3281 wake_up(&fs_info->delayed_iputs_wait);
3282 spin_lock(&fs_info->delayed_iput_lock);
3283 }
3284 spin_unlock(&fs_info->delayed_iput_lock);
3285}
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
3298{
3299 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
3300 atomic_read(&fs_info->nr_delayed_iputs) == 0);
3301 if (ret)
3302 return -EINTR;
3303 return 0;
3304}
3305
3306
3307
3308
3309
3310int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3311 struct btrfs_inode *inode)
3312{
3313 int ret;
3314
3315 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
3316 if (ret && ret != -EEXIST) {
3317 btrfs_abort_transaction(trans, ret);
3318 return ret;
3319 }
3320
3321 return 0;
3322}
3323
3324
3325
3326
3327
3328static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3329 struct btrfs_inode *inode)
3330{
3331 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
3332}
3333
3334
3335
3336
3337
3338int btrfs_orphan_cleanup(struct btrfs_root *root)
3339{
3340 struct btrfs_fs_info *fs_info = root->fs_info;
3341 struct btrfs_path *path;
3342 struct extent_buffer *leaf;
3343 struct btrfs_key key, found_key;
3344 struct btrfs_trans_handle *trans;
3345 struct inode *inode;
3346 u64 last_objectid = 0;
3347 int ret = 0, nr_unlink = 0;
3348
3349 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3350 return 0;
3351
3352 path = btrfs_alloc_path();
3353 if (!path) {
3354 ret = -ENOMEM;
3355 goto out;
3356 }
3357 path->reada = READA_BACK;
3358
3359 key.objectid = BTRFS_ORPHAN_OBJECTID;
3360 key.type = BTRFS_ORPHAN_ITEM_KEY;
3361 key.offset = (u64)-1;
3362
3363 while (1) {
3364 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3365 if (ret < 0)
3366 goto out;
3367
3368
3369
3370
3371
3372
3373 if (ret > 0) {
3374 ret = 0;
3375 if (path->slots[0] == 0)
3376 break;
3377 path->slots[0]--;
3378 }
3379
3380
3381 leaf = path->nodes[0];
3382 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3383
3384
3385 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3386 break;
3387 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3388 break;
3389
3390
3391 btrfs_release_path(path);
3392
3393
3394
3395
3396
3397
3398
3399 if (found_key.offset == last_objectid) {
3400 btrfs_err(fs_info,
3401 "Error removing orphan entry, stopping orphan cleanup");
3402 ret = -EINVAL;
3403 goto out;
3404 }
3405
3406 last_objectid = found_key.offset;
3407
3408 found_key.objectid = found_key.offset;
3409 found_key.type = BTRFS_INODE_ITEM_KEY;
3410 found_key.offset = 0;
3411 inode = btrfs_iget(fs_info->sb, &found_key, root, NULL);
3412 ret = PTR_ERR_OR_ZERO(inode);
3413 if (ret && ret != -ENOENT)
3414 goto out;
3415
3416 if (ret == -ENOENT && root == fs_info->tree_root) {
3417 struct btrfs_root *dead_root;
3418 struct btrfs_fs_info *fs_info = root->fs_info;
3419 int is_dead_root = 0;
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432 spin_lock(&fs_info->trans_lock);
3433 list_for_each_entry(dead_root, &fs_info->dead_roots,
3434 root_list) {
3435 if (dead_root->root_key.objectid ==
3436 found_key.objectid) {
3437 is_dead_root = 1;
3438 break;
3439 }
3440 }
3441 spin_unlock(&fs_info->trans_lock);
3442 if (is_dead_root) {
3443
3444 key.offset = found_key.objectid - 1;
3445 continue;
3446 }
3447
3448 }
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469 if (ret == -ENOENT || inode->i_nlink) {
3470 if (!ret)
3471 iput(inode);
3472 trans = btrfs_start_transaction(root, 1);
3473 if (IS_ERR(trans)) {
3474 ret = PTR_ERR(trans);
3475 goto out;
3476 }
3477 btrfs_debug(fs_info, "auto deleting %Lu",
3478 found_key.objectid);
3479 ret = btrfs_del_orphan_item(trans, root,
3480 found_key.objectid);
3481 btrfs_end_transaction(trans);
3482 if (ret)
3483 goto out;
3484 continue;
3485 }
3486
3487 nr_unlink++;
3488
3489
3490 iput(inode);
3491 }
3492
3493 btrfs_release_path(path);
3494
3495 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3496
3497 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3498 trans = btrfs_join_transaction(root);
3499 if (!IS_ERR(trans))
3500 btrfs_end_transaction(trans);
3501 }
3502
3503 if (nr_unlink)
3504 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3505
3506out:
3507 if (ret)
3508 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3509 btrfs_free_path(path);
3510 return ret;
3511}
3512
3513
3514
3515
3516
3517
3518
3519static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3520 int slot, u64 objectid,
3521 int *first_xattr_slot)
3522{
3523 u32 nritems = btrfs_header_nritems(leaf);
3524 struct btrfs_key found_key;
3525 static u64 xattr_access = 0;
3526 static u64 xattr_default = 0;
3527 int scanned = 0;
3528
3529 if (!xattr_access) {
3530 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3531 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3532 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3533 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3534 }
3535
3536 slot++;
3537 *first_xattr_slot = -1;
3538 while (slot < nritems) {
3539 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3540
3541
3542 if (found_key.objectid != objectid)
3543 return 0;
3544
3545
3546 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3547 if (*first_xattr_slot == -1)
3548 *first_xattr_slot = slot;
3549 if (found_key.offset == xattr_access ||
3550 found_key.offset == xattr_default)
3551 return 1;
3552 }
3553
3554
3555
3556
3557
3558 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3559 return 0;
3560
3561 slot++;
3562 scanned++;
3563
3564
3565
3566
3567
3568
3569
3570 if (scanned >= 8)
3571 break;
3572 }
3573
3574
3575
3576
3577 if (*first_xattr_slot == -1)
3578 *first_xattr_slot = slot;
3579 return 1;
3580}
3581
3582
3583
3584
3585static int btrfs_read_locked_inode(struct inode *inode,
3586 struct btrfs_path *in_path)
3587{
3588 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3589 struct btrfs_path *path = in_path;
3590 struct extent_buffer *leaf;
3591 struct btrfs_inode_item *inode_item;
3592 struct btrfs_root *root = BTRFS_I(inode)->root;
3593 struct btrfs_key location;
3594 unsigned long ptr;
3595 int maybe_acls;
3596 u32 rdev;
3597 int ret;
3598 bool filled = false;
3599 int first_xattr_slot;
3600
3601 ret = btrfs_fill_inode(inode, &rdev);
3602 if (!ret)
3603 filled = true;
3604
3605 if (!path) {
3606 path = btrfs_alloc_path();
3607 if (!path)
3608 return -ENOMEM;
3609 }
3610
3611 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3612
3613 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3614 if (ret) {
3615 if (path != in_path)
3616 btrfs_free_path(path);
3617 return ret;
3618 }
3619
3620 leaf = path->nodes[0];
3621
3622 if (filled)
3623 goto cache_index;
3624
3625 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3626 struct btrfs_inode_item);
3627 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3628 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3629 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3630 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3631 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3632
3633 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3634 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3635
3636 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3637 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3638
3639 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3640 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3641
3642 BTRFS_I(inode)->i_otime.tv_sec =
3643 btrfs_timespec_sec(leaf, &inode_item->otime);
3644 BTRFS_I(inode)->i_otime.tv_nsec =
3645 btrfs_timespec_nsec(leaf, &inode_item->otime);
3646
3647 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3648 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3649 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3650
3651 inode_set_iversion_queried(inode,
3652 btrfs_inode_sequence(leaf, inode_item));
3653 inode->i_generation = BTRFS_I(inode)->generation;
3654 inode->i_rdev = 0;
3655 rdev = btrfs_inode_rdev(leaf, inode_item);
3656
3657 BTRFS_I(inode)->index_cnt = (u64)-1;
3658 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3659
3660cache_index:
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670 if (BTRFS_I(inode)->last_trans == fs_info->generation)
3671 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3672 &BTRFS_I(inode)->runtime_flags);
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716 BTRFS_I(inode)->last_link_trans = BTRFS_I(inode)->last_trans;
3717
3718 path->slots[0]++;
3719 if (inode->i_nlink != 1 ||
3720 path->slots[0] >= btrfs_header_nritems(leaf))
3721 goto cache_acl;
3722
3723 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3724 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
3725 goto cache_acl;
3726
3727 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3728 if (location.type == BTRFS_INODE_REF_KEY) {
3729 struct btrfs_inode_ref *ref;
3730
3731 ref = (struct btrfs_inode_ref *)ptr;
3732 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3733 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3734 struct btrfs_inode_extref *extref;
3735
3736 extref = (struct btrfs_inode_extref *)ptr;
3737 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3738 extref);
3739 }
3740cache_acl:
3741
3742
3743
3744
3745 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3746 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
3747 if (first_xattr_slot != -1) {
3748 path->slots[0] = first_xattr_slot;
3749 ret = btrfs_load_inode_props(inode, path);
3750 if (ret)
3751 btrfs_err(fs_info,
3752 "error loading props for ino %llu (root %llu): %d",
3753 btrfs_ino(BTRFS_I(inode)),
3754 root->root_key.objectid, ret);
3755 }
3756 if (path != in_path)
3757 btrfs_free_path(path);
3758
3759 if (!maybe_acls)
3760 cache_no_acl(inode);
3761
3762 switch (inode->i_mode & S_IFMT) {
3763 case S_IFREG:
3764 inode->i_mapping->a_ops = &btrfs_aops;
3765 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3766 inode->i_fop = &btrfs_file_operations;
3767 inode->i_op = &btrfs_file_inode_operations;
3768 break;
3769 case S_IFDIR:
3770 inode->i_fop = &btrfs_dir_file_operations;
3771 inode->i_op = &btrfs_dir_inode_operations;
3772 break;
3773 case S_IFLNK:
3774 inode->i_op = &btrfs_symlink_inode_operations;
3775 inode_nohighmem(inode);
3776 inode->i_mapping->a_ops = &btrfs_aops;
3777 break;
3778 default:
3779 inode->i_op = &btrfs_special_inode_operations;
3780 init_special_inode(inode, inode->i_mode, rdev);
3781 break;
3782 }
3783
3784 btrfs_sync_inode_flags_to_i_flags(inode);
3785 return 0;
3786}
3787
3788
3789
3790
3791static void fill_inode_item(struct btrfs_trans_handle *trans,
3792 struct extent_buffer *leaf,
3793 struct btrfs_inode_item *item,
3794 struct inode *inode)
3795{
3796 struct btrfs_map_token token;
3797
3798 btrfs_init_map_token(&token);
3799
3800 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3801 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3802 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3803 &token);
3804 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3805 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3806
3807 btrfs_set_token_timespec_sec(leaf, &item->atime,
3808 inode->i_atime.tv_sec, &token);
3809 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3810 inode->i_atime.tv_nsec, &token);
3811
3812 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3813 inode->i_mtime.tv_sec, &token);
3814 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3815 inode->i_mtime.tv_nsec, &token);
3816
3817 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3818 inode->i_ctime.tv_sec, &token);
3819 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3820 inode->i_ctime.tv_nsec, &token);
3821
3822 btrfs_set_token_timespec_sec(leaf, &item->otime,
3823 BTRFS_I(inode)->i_otime.tv_sec, &token);
3824 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3825 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3826
3827 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3828 &token);
3829 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
3830 &token);
3831 btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode),
3832 &token);
3833 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3834 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3835 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3836 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
3837}
3838
3839
3840
3841
3842static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3843 struct btrfs_root *root, struct inode *inode)
3844{
3845 struct btrfs_inode_item *inode_item;
3846 struct btrfs_path *path;
3847 struct extent_buffer *leaf;
3848 int ret;
3849
3850 path = btrfs_alloc_path();
3851 if (!path)
3852 return -ENOMEM;
3853
3854 path->leave_spinning = 1;
3855 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3856 1);
3857 if (ret) {
3858 if (ret > 0)
3859 ret = -ENOENT;
3860 goto failed;
3861 }
3862
3863 leaf = path->nodes[0];
3864 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3865 struct btrfs_inode_item);
3866
3867 fill_inode_item(trans, leaf, inode_item, inode);
3868 btrfs_mark_buffer_dirty(leaf);
3869 btrfs_set_inode_last_trans(trans, inode);
3870 ret = 0;
3871failed:
3872 btrfs_free_path(path);
3873 return ret;
3874}
3875
3876
3877
3878
3879noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3880 struct btrfs_root *root, struct inode *inode)
3881{
3882 struct btrfs_fs_info *fs_info = root->fs_info;
3883 int ret;
3884
3885
3886
3887
3888
3889
3890
3891
3892 if (!btrfs_is_free_space_inode(BTRFS_I(inode))
3893 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3894 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
3895 btrfs_update_root_times(trans, root);
3896
3897 ret = btrfs_delayed_update_inode(trans, root, inode);
3898 if (!ret)
3899 btrfs_set_inode_last_trans(trans, inode);
3900 return ret;
3901 }
3902
3903 return btrfs_update_inode_item(trans, root, inode);
3904}
3905
3906noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3907 struct btrfs_root *root,
3908 struct inode *inode)
3909{
3910 int ret;
3911
3912 ret = btrfs_update_inode(trans, root, inode);
3913 if (ret == -ENOSPC)
3914 return btrfs_update_inode_item(trans, root, inode);
3915 return ret;
3916}
3917
3918
3919
3920
3921
3922
3923static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3924 struct btrfs_root *root,
3925 struct btrfs_inode *dir,
3926 struct btrfs_inode *inode,
3927 const char *name, int name_len)
3928{
3929 struct btrfs_fs_info *fs_info = root->fs_info;
3930 struct btrfs_path *path;
3931 int ret = 0;
3932 struct extent_buffer *leaf;
3933 struct btrfs_dir_item *di;
3934 struct btrfs_key key;
3935 u64 index;
3936 u64 ino = btrfs_ino(inode);
3937 u64 dir_ino = btrfs_ino(dir);
3938
3939 path = btrfs_alloc_path();
3940 if (!path) {
3941 ret = -ENOMEM;
3942 goto out;
3943 }
3944
3945 path->leave_spinning = 1;
3946 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3947 name, name_len, -1);
3948 if (IS_ERR_OR_NULL(di)) {
3949 ret = di ? PTR_ERR(di) : -ENOENT;
3950 goto err;
3951 }
3952 leaf = path->nodes[0];
3953 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3954 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3955 if (ret)
3956 goto err;
3957 btrfs_release_path(path);
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969 if (inode->dir_index) {
3970 ret = btrfs_delayed_delete_inode_ref(inode);
3971 if (!ret) {
3972 index = inode->dir_index;
3973 goto skip_backref;
3974 }
3975 }
3976
3977 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3978 dir_ino, &index);
3979 if (ret) {
3980 btrfs_info(fs_info,
3981 "failed to delete reference to %.*s, inode %llu parent %llu",
3982 name_len, name, ino, dir_ino);
3983 btrfs_abort_transaction(trans, ret);
3984 goto err;
3985 }
3986skip_backref:
3987 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
3988 if (ret) {
3989 btrfs_abort_transaction(trans, ret);
3990 goto err;
3991 }
3992
3993 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
3994 dir_ino);
3995 if (ret != 0 && ret != -ENOENT) {
3996 btrfs_abort_transaction(trans, ret);
3997 goto err;
3998 }
3999
4000 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
4001 index);
4002 if (ret == -ENOENT)
4003 ret = 0;
4004 else if (ret)
4005 btrfs_abort_transaction(trans, ret);
4006err:
4007 btrfs_free_path(path);
4008 if (ret)
4009 goto out;
4010
4011 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
4012 inode_inc_iversion(&inode->vfs_inode);
4013 inode_inc_iversion(&dir->vfs_inode);
4014 inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
4015 dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
4016 ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
4017out:
4018 return ret;
4019}
4020
4021int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4022 struct btrfs_root *root,
4023 struct btrfs_inode *dir, struct btrfs_inode *inode,
4024 const char *name, int name_len)
4025{
4026 int ret;
4027 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
4028 if (!ret) {
4029 drop_nlink(&inode->vfs_inode);
4030 ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
4031 }
4032 return ret;
4033}
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4044{
4045 struct btrfs_root *root = BTRFS_I(dir)->root;
4046
4047
4048
4049
4050
4051
4052
4053
4054 return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
4055}
4056
4057static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4058{
4059 struct btrfs_root *root = BTRFS_I(dir)->root;
4060 struct btrfs_trans_handle *trans;
4061 struct inode *inode = d_inode(dentry);
4062 int ret;
4063
4064 trans = __unlink_start_trans(dir);
4065 if (IS_ERR(trans))
4066 return PTR_ERR(trans);
4067
4068 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
4069 0);
4070
4071 ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4072 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4073 dentry->d_name.len);
4074 if (ret)
4075 goto out;
4076
4077 if (inode->i_nlink == 0) {
4078 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
4079 if (ret)
4080 goto out;
4081 }
4082
4083out:
4084 btrfs_end_transaction(trans);
4085 btrfs_btree_balance_dirty(root->fs_info);
4086 return ret;
4087}
4088
4089static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4090 struct inode *dir, u64 objectid,
4091 const char *name, int name_len)
4092{
4093 struct btrfs_root *root = BTRFS_I(dir)->root;
4094 struct btrfs_path *path;
4095 struct extent_buffer *leaf;
4096 struct btrfs_dir_item *di;
4097 struct btrfs_key key;
4098 u64 index;
4099 int ret;
4100 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
4101
4102 path = btrfs_alloc_path();
4103 if (!path)
4104 return -ENOMEM;
4105
4106 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4107 name, name_len, -1);
4108 if (IS_ERR_OR_NULL(di)) {
4109 ret = di ? PTR_ERR(di) : -ENOENT;
4110 goto out;
4111 }
4112
4113 leaf = path->nodes[0];
4114 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4115 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4116 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4117 if (ret) {
4118 btrfs_abort_transaction(trans, ret);
4119 goto out;
4120 }
4121 btrfs_release_path(path);
4122
4123 ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
4124 dir_ino, &index, name, name_len);
4125 if (ret < 0) {
4126 if (ret != -ENOENT) {
4127 btrfs_abort_transaction(trans, ret);
4128 goto out;
4129 }
4130 di = btrfs_search_dir_index_item(root, path, dir_ino,
4131 name, name_len);
4132 if (IS_ERR_OR_NULL(di)) {
4133 if (!di)
4134 ret = -ENOENT;
4135 else
4136 ret = PTR_ERR(di);
4137 btrfs_abort_transaction(trans, ret);
4138 goto out;
4139 }
4140
4141 leaf = path->nodes[0];
4142 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4143 index = key.offset;
4144 }
4145 btrfs_release_path(path);
4146
4147 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4148 if (ret) {
4149 btrfs_abort_transaction(trans, ret);
4150 goto out;
4151 }
4152
4153 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
4154 inode_inc_iversion(dir);
4155 dir->i_mtime = dir->i_ctime = current_time(dir);
4156 ret = btrfs_update_inode_fallback(trans, root, dir);
4157 if (ret)
4158 btrfs_abort_transaction(trans, ret);
4159out:
4160 btrfs_free_path(path);
4161 return ret;
4162}
4163
4164
4165
4166
4167
4168static noinline int may_destroy_subvol(struct btrfs_root *root)
4169{
4170 struct btrfs_fs_info *fs_info = root->fs_info;
4171 struct btrfs_path *path;
4172 struct btrfs_dir_item *di;
4173 struct btrfs_key key;
4174 u64 dir_id;
4175 int ret;
4176
4177 path = btrfs_alloc_path();
4178 if (!path)
4179 return -ENOMEM;
4180
4181
4182 dir_id = btrfs_super_root_dir(fs_info->super_copy);
4183 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
4184 dir_id, "default", 7, 0);
4185 if (di && !IS_ERR(di)) {
4186 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
4187 if (key.objectid == root->root_key.objectid) {
4188 ret = -EPERM;
4189 btrfs_err(fs_info,
4190 "deleting default subvolume %llu is not allowed",
4191 key.objectid);
4192 goto out;
4193 }
4194 btrfs_release_path(path);
4195 }
4196
4197 key.objectid = root->root_key.objectid;
4198 key.type = BTRFS_ROOT_REF_KEY;
4199 key.offset = (u64)-1;
4200
4201 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4202 if (ret < 0)
4203 goto out;
4204 BUG_ON(ret == 0);
4205
4206 ret = 0;
4207 if (path->slots[0] > 0) {
4208 path->slots[0]--;
4209 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4210 if (key.objectid == root->root_key.objectid &&
4211 key.type == BTRFS_ROOT_REF_KEY)
4212 ret = -ENOTEMPTY;
4213 }
4214out:
4215 btrfs_free_path(path);
4216 return ret;
4217}
4218
4219
4220static void btrfs_prune_dentries(struct btrfs_root *root)
4221{
4222 struct btrfs_fs_info *fs_info = root->fs_info;
4223 struct rb_node *node;
4224 struct rb_node *prev;
4225 struct btrfs_inode *entry;
4226 struct inode *inode;
4227 u64 objectid = 0;
4228
4229 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
4230 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
4231
4232 spin_lock(&root->inode_lock);
4233again:
4234 node = root->inode_tree.rb_node;
4235 prev = NULL;
4236 while (node) {
4237 prev = node;
4238 entry = rb_entry(node, struct btrfs_inode, rb_node);
4239
4240 if (objectid < btrfs_ino(entry))
4241 node = node->rb_left;
4242 else if (objectid > btrfs_ino(entry))
4243 node = node->rb_right;
4244 else
4245 break;
4246 }
4247 if (!node) {
4248 while (prev) {
4249 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4250 if (objectid <= btrfs_ino(entry)) {
4251 node = prev;
4252 break;
4253 }
4254 prev = rb_next(prev);
4255 }
4256 }
4257 while (node) {
4258 entry = rb_entry(node, struct btrfs_inode, rb_node);
4259 objectid = btrfs_ino(entry) + 1;
4260 inode = igrab(&entry->vfs_inode);
4261 if (inode) {
4262 spin_unlock(&root->inode_lock);
4263 if (atomic_read(&inode->i_count) > 1)
4264 d_prune_aliases(inode);
4265
4266
4267
4268
4269 iput(inode);
4270 cond_resched();
4271 spin_lock(&root->inode_lock);
4272 goto again;
4273 }
4274
4275 if (cond_resched_lock(&root->inode_lock))
4276 goto again;
4277
4278 node = rb_next(node);
4279 }
4280 spin_unlock(&root->inode_lock);
4281}
4282
4283int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4284{
4285 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
4286 struct btrfs_root *root = BTRFS_I(dir)->root;
4287 struct inode *inode = d_inode(dentry);
4288 struct btrfs_root *dest = BTRFS_I(inode)->root;
4289 struct btrfs_trans_handle *trans;
4290 struct btrfs_block_rsv block_rsv;
4291 u64 root_flags;
4292 int ret;
4293 int err;
4294
4295
4296
4297
4298
4299
4300 spin_lock(&dest->root_item_lock);
4301 if (dest->send_in_progress) {
4302 spin_unlock(&dest->root_item_lock);
4303 btrfs_warn(fs_info,
4304 "attempt to delete subvolume %llu during send",
4305 dest->root_key.objectid);
4306 return -EPERM;
4307 }
4308 root_flags = btrfs_root_flags(&dest->root_item);
4309 btrfs_set_root_flags(&dest->root_item,
4310 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
4311 spin_unlock(&dest->root_item_lock);
4312
4313 down_write(&fs_info->subvol_sem);
4314
4315 err = may_destroy_subvol(dest);
4316 if (err)
4317 goto out_up_write;
4318
4319 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
4320
4321
4322
4323
4324
4325 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
4326 if (err)
4327 goto out_up_write;
4328
4329 trans = btrfs_start_transaction(root, 0);
4330 if (IS_ERR(trans)) {
4331 err = PTR_ERR(trans);
4332 goto out_release;
4333 }
4334 trans->block_rsv = &block_rsv;
4335 trans->bytes_reserved = block_rsv.size;
4336
4337 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4338
4339 ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
4340 dentry->d_name.name, dentry->d_name.len);
4341 if (ret) {
4342 err = ret;
4343 btrfs_abort_transaction(trans, ret);
4344 goto out_end_trans;
4345 }
4346
4347 btrfs_record_root_in_trans(trans, dest);
4348
4349 memset(&dest->root_item.drop_progress, 0,
4350 sizeof(dest->root_item.drop_progress));
4351 dest->root_item.drop_level = 0;
4352 btrfs_set_root_refs(&dest->root_item, 0);
4353
4354 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4355 ret = btrfs_insert_orphan_item(trans,
4356 fs_info->tree_root,
4357 dest->root_key.objectid);
4358 if (ret) {
4359 btrfs_abort_transaction(trans, ret);
4360 err = ret;
4361 goto out_end_trans;
4362 }
4363 }
4364
4365 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4366 BTRFS_UUID_KEY_SUBVOL,
4367 dest->root_key.objectid);
4368 if (ret && ret != -ENOENT) {
4369 btrfs_abort_transaction(trans, ret);
4370 err = ret;
4371 goto out_end_trans;
4372 }
4373 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4374 ret = btrfs_uuid_tree_remove(trans,
4375 dest->root_item.received_uuid,
4376 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4377 dest->root_key.objectid);
4378 if (ret && ret != -ENOENT) {
4379 btrfs_abort_transaction(trans, ret);
4380 err = ret;
4381 goto out_end_trans;
4382 }
4383 }
4384
4385out_end_trans:
4386 trans->block_rsv = NULL;
4387 trans->bytes_reserved = 0;
4388 ret = btrfs_end_transaction(trans);
4389 if (ret && !err)
4390 err = ret;
4391 inode->i_flags |= S_DEAD;
4392out_release:
4393 btrfs_subvolume_release_metadata(fs_info, &block_rsv);
4394out_up_write:
4395 up_write(&fs_info->subvol_sem);
4396 if (err) {
4397 spin_lock(&dest->root_item_lock);
4398 root_flags = btrfs_root_flags(&dest->root_item);
4399 btrfs_set_root_flags(&dest->root_item,
4400 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4401 spin_unlock(&dest->root_item_lock);
4402 } else {
4403 d_invalidate(dentry);
4404 btrfs_prune_dentries(dest);
4405 ASSERT(dest->send_in_progress == 0);
4406
4407
4408 if (dest->ino_cache_inode) {
4409 iput(dest->ino_cache_inode);
4410 dest->ino_cache_inode = NULL;
4411 }
4412 }
4413
4414 return err;
4415}
4416
4417static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4418{
4419 struct inode *inode = d_inode(dentry);
4420 int err = 0;
4421 struct btrfs_root *root = BTRFS_I(dir)->root;
4422 struct btrfs_trans_handle *trans;
4423 u64 last_unlink_trans;
4424
4425 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4426 return -ENOTEMPTY;
4427 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
4428 return btrfs_delete_subvolume(dir, dentry);
4429
4430 trans = __unlink_start_trans(dir);
4431 if (IS_ERR(trans))
4432 return PTR_ERR(trans);
4433
4434 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4435 err = btrfs_unlink_subvol(trans, dir,
4436 BTRFS_I(inode)->location.objectid,
4437 dentry->d_name.name,
4438 dentry->d_name.len);
4439 goto out;
4440 }
4441
4442 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4443 if (err)
4444 goto out;
4445
4446 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4447
4448
4449 err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4450 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4451 dentry->d_name.len);
4452 if (!err) {
4453 btrfs_i_size_write(BTRFS_I(inode), 0);
4454
4455
4456
4457
4458
4459
4460
4461
4462
4463
4464
4465 if (last_unlink_trans >= trans->transid)
4466 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4467 }
4468out:
4469 btrfs_end_transaction(trans);
4470 btrfs_btree_balance_dirty(root->fs_info);
4471
4472 return err;
4473}
4474
4475
4476
4477
4478
4479#define NEED_TRUNCATE_BLOCK 1
4480
4481
4482
4483
4484
4485
4486
4487
4488
4489
4490
4491
4492int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4493 struct btrfs_root *root,
4494 struct inode *inode,
4495 u64 new_size, u32 min_type)
4496{
4497 struct btrfs_fs_info *fs_info = root->fs_info;
4498 struct btrfs_path *path;
4499 struct extent_buffer *leaf;
4500 struct btrfs_file_extent_item *fi;
4501 struct btrfs_key key;
4502 struct btrfs_key found_key;
4503 u64 extent_start = 0;
4504 u64 extent_num_bytes = 0;
4505 u64 extent_offset = 0;
4506 u64 item_end = 0;
4507 u64 last_size = new_size;
4508 u32 found_type = (u8)-1;
4509 int found_extent;
4510 int del_item;
4511 int pending_del_nr = 0;
4512 int pending_del_slot = 0;
4513 int extent_type = -1;
4514 int ret;
4515 u64 ino = btrfs_ino(BTRFS_I(inode));
4516 u64 bytes_deleted = 0;
4517 bool be_nice = false;
4518 bool should_throttle = false;
4519
4520 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4521
4522
4523
4524
4525
4526 if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
4527 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4528 be_nice = true;
4529
4530 path = btrfs_alloc_path();
4531 if (!path)
4532 return -ENOMEM;
4533 path->reada = READA_BACK;
4534
4535
4536
4537
4538
4539
4540 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4541 root == fs_info->tree_root)
4542 btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
4543 fs_info->sectorsize),
4544 (u64)-1, 0);
4545
4546
4547
4548
4549
4550
4551
4552 if (min_type == 0 && root == BTRFS_I(inode)->root)
4553 btrfs_kill_delayed_inode_items(BTRFS_I(inode));
4554
4555 key.objectid = ino;
4556 key.offset = (u64)-1;
4557 key.type = (u8)-1;
4558
4559search_again:
4560
4561
4562
4563
4564
4565 if (be_nice && bytes_deleted > SZ_32M &&
4566 btrfs_should_end_transaction(trans)) {
4567 ret = -EAGAIN;
4568 goto out;
4569 }
4570
4571 path->leave_spinning = 1;
4572 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4573 if (ret < 0)
4574 goto out;
4575
4576 if (ret > 0) {
4577 ret = 0;
4578
4579
4580
4581 if (path->slots[0] == 0)
4582 goto out;
4583 path->slots[0]--;
4584 }
4585
4586 while (1) {
4587 fi = NULL;
4588 leaf = path->nodes[0];
4589 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4590 found_type = found_key.type;
4591
4592 if (found_key.objectid != ino)
4593 break;
4594
4595 if (found_type < min_type)
4596 break;
4597
4598 item_end = found_key.offset;
4599 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4600 fi = btrfs_item_ptr(leaf, path->slots[0],
4601 struct btrfs_file_extent_item);
4602 extent_type = btrfs_file_extent_type(leaf, fi);
4603 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4604 item_end +=
4605 btrfs_file_extent_num_bytes(leaf, fi);
4606
4607 trace_btrfs_truncate_show_fi_regular(
4608 BTRFS_I(inode), leaf, fi,
4609 found_key.offset);
4610 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4611 item_end += btrfs_file_extent_ram_bytes(leaf,
4612 fi);
4613
4614 trace_btrfs_truncate_show_fi_inline(
4615 BTRFS_I(inode), leaf, fi, path->slots[0],
4616 found_key.offset);
4617 }
4618 item_end--;
4619 }
4620 if (found_type > min_type) {
4621 del_item = 1;
4622 } else {
4623 if (item_end < new_size)
4624 break;
4625 if (found_key.offset >= new_size)
4626 del_item = 1;
4627 else
4628 del_item = 0;
4629 }
4630 found_extent = 0;
4631
4632 if (found_type != BTRFS_EXTENT_DATA_KEY)
4633 goto delete;
4634
4635 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4636 u64 num_dec;
4637 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4638 if (!del_item) {
4639 u64 orig_num_bytes =
4640 btrfs_file_extent_num_bytes(leaf, fi);
4641 extent_num_bytes = ALIGN(new_size -
4642 found_key.offset,
4643 fs_info->sectorsize);
4644 btrfs_set_file_extent_num_bytes(leaf, fi,
4645 extent_num_bytes);
4646 num_dec = (orig_num_bytes -
4647 extent_num_bytes);
4648 if (test_bit(BTRFS_ROOT_REF_COWS,
4649 &root->state) &&
4650 extent_start != 0)
4651 inode_sub_bytes(inode, num_dec);
4652 btrfs_mark_buffer_dirty(leaf);
4653 } else {
4654 extent_num_bytes =
4655 btrfs_file_extent_disk_num_bytes(leaf,
4656 fi);
4657 extent_offset = found_key.offset -
4658 btrfs_file_extent_offset(leaf, fi);
4659
4660
4661 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4662 if (extent_start != 0) {
4663 found_extent = 1;
4664 if (test_bit(BTRFS_ROOT_REF_COWS,
4665 &root->state))
4666 inode_sub_bytes(inode, num_dec);
4667 }
4668 }
4669 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4670
4671
4672
4673
4674 if (!del_item &&
4675 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4676 btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
4677 btrfs_file_extent_compression(leaf, fi) == 0) {
4678 u32 size = (u32)(new_size - found_key.offset);
4679
4680 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4681 size = btrfs_file_extent_calc_inline_size(size);
4682 btrfs_truncate_item(root->fs_info, path, size, 1);
4683 } else if (!del_item) {
4684
4685
4686
4687
4688 ret = NEED_TRUNCATE_BLOCK;
4689 break;
4690 }
4691
4692 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4693 inode_sub_bytes(inode, item_end + 1 - new_size);
4694 }
4695delete:
4696 if (del_item)
4697 last_size = found_key.offset;
4698 else
4699 last_size = new_size;
4700 if (del_item) {
4701 if (!pending_del_nr) {
4702
4703 pending_del_slot = path->slots[0];
4704 pending_del_nr = 1;
4705 } else if (pending_del_nr &&
4706 path->slots[0] + 1 == pending_del_slot) {
4707
4708 pending_del_nr++;
4709 pending_del_slot = path->slots[0];
4710 } else {
4711 BUG();
4712 }
4713 } else {
4714 break;
4715 }
4716 should_throttle = false;
4717
4718 if (found_extent &&
4719 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4720 root == fs_info->tree_root)) {
4721 btrfs_set_path_blocking(path);
4722 bytes_deleted += extent_num_bytes;
4723 ret = btrfs_free_extent(trans, root, extent_start,
4724 extent_num_bytes, 0,
4725 btrfs_header_owner(leaf),
4726 ino, extent_offset);
4727 if (ret) {
4728 btrfs_abort_transaction(trans, ret);
4729 break;
4730 }
4731 if (be_nice) {
4732 if (btrfs_should_throttle_delayed_refs(trans))
4733 should_throttle = true;
4734 }
4735 }
4736
4737 if (found_type == BTRFS_INODE_ITEM_KEY)
4738 break;
4739
4740 if (path->slots[0] == 0 ||
4741 path->slots[0] != pending_del_slot ||
4742 should_throttle) {
4743 if (pending_del_nr) {
4744 ret = btrfs_del_items(trans, root, path,
4745 pending_del_slot,
4746 pending_del_nr);
4747 if (ret) {
4748 btrfs_abort_transaction(trans, ret);
4749 break;
4750 }
4751 pending_del_nr = 0;
4752 }
4753 btrfs_release_path(path);
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765 if (should_throttle) {
4766 ret = btrfs_delayed_refs_rsv_refill(fs_info,
4767 BTRFS_RESERVE_NO_FLUSH);
4768 if (ret) {
4769 ret = -EAGAIN;
4770 break;
4771 }
4772 }
4773 goto search_again;
4774 } else {
4775 path->slots[0]--;
4776 }
4777 }
4778out:
4779 if (ret >= 0 && pending_del_nr) {
4780 int err;
4781
4782 err = btrfs_del_items(trans, root, path, pending_del_slot,
4783 pending_del_nr);
4784 if (err) {
4785 btrfs_abort_transaction(trans, err);
4786 ret = err;
4787 }
4788 }
4789 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4790 ASSERT(last_size >= new_size);
4791 if (!ret && last_size > new_size)
4792 last_size = new_size;
4793 btrfs_ordered_update_i_size(inode, last_size, NULL);
4794 }
4795
4796 btrfs_free_path(path);
4797 return ret;
4798}
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4812 int front)
4813{
4814 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4815 struct address_space *mapping = inode->i_mapping;
4816 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4817 struct btrfs_ordered_extent *ordered;
4818 struct extent_state *cached_state = NULL;
4819 struct extent_changeset *data_reserved = NULL;
4820 char *kaddr;
4821 u32 blocksize = fs_info->sectorsize;
4822 pgoff_t index = from >> PAGE_SHIFT;
4823 unsigned offset = from & (blocksize - 1);
4824 struct page *page;
4825 gfp_t mask = btrfs_alloc_write_mask(mapping);
4826 int ret = 0;
4827 u64 block_start;
4828 u64 block_end;
4829
4830 if (IS_ALIGNED(offset, blocksize) &&
4831 (!len || IS_ALIGNED(len, blocksize)))
4832 goto out;
4833
4834 block_start = round_down(from, blocksize);
4835 block_end = block_start + blocksize - 1;
4836
4837 ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
4838 block_start, blocksize);
4839 if (ret)
4840 goto out;
4841
4842again:
4843 page = find_or_create_page(mapping, index, mask);
4844 if (!page) {
4845 btrfs_delalloc_release_space(inode, data_reserved,
4846 block_start, blocksize, true);
4847 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
4848 ret = -ENOMEM;
4849 goto out;
4850 }
4851
4852 if (!PageUptodate(page)) {
4853 ret = btrfs_readpage(NULL, page);
4854 lock_page(page);
4855 if (page->mapping != mapping) {
4856 unlock_page(page);
4857 put_page(page);
4858 goto again;
4859 }
4860 if (!PageUptodate(page)) {
4861 ret = -EIO;
4862 goto out_unlock;
4863 }
4864 }
4865 wait_on_page_writeback(page);
4866
4867 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4868 set_page_extent_mapped(page);
4869
4870 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4871 if (ordered) {
4872 unlock_extent_cached(io_tree, block_start, block_end,
4873 &cached_state);
4874 unlock_page(page);
4875 put_page(page);
4876 btrfs_start_ordered_extent(inode, ordered, 1);
4877 btrfs_put_ordered_extent(ordered);
4878 goto again;
4879 }
4880
4881 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4882 EXTENT_DIRTY | EXTENT_DELALLOC |
4883 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4884 0, 0, &cached_state);
4885
4886 ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
4887 &cached_state, 0);
4888 if (ret) {
4889 unlock_extent_cached(io_tree, block_start, block_end,
4890 &cached_state);
4891 goto out_unlock;
4892 }
4893
4894 if (offset != blocksize) {
4895 if (!len)
4896 len = blocksize - offset;
4897 kaddr = kmap(page);
4898 if (front)
4899 memset(kaddr + (block_start - page_offset(page)),
4900 0, offset);
4901 else
4902 memset(kaddr + (block_start - page_offset(page)) + offset,
4903 0, len);
4904 flush_dcache_page(page);
4905 kunmap(page);
4906 }
4907 ClearPageChecked(page);
4908 set_page_dirty(page);
4909 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
4910
4911out_unlock:
4912 if (ret)
4913 btrfs_delalloc_release_space(inode, data_reserved, block_start,
4914 blocksize, true);
4915 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
4916 unlock_page(page);
4917 put_page(page);
4918out:
4919 extent_changeset_free(data_reserved);
4920 return ret;
4921}
4922
4923static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4924 u64 offset, u64 len)
4925{
4926 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4927 struct btrfs_trans_handle *trans;
4928 int ret;
4929
4930
4931
4932
4933
4934 if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
4935 BTRFS_I(inode)->last_trans = fs_info->generation;
4936 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4937 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4938 return 0;
4939 }
4940
4941
4942
4943
4944
4945
4946 trans = btrfs_start_transaction(root, 3);
4947 if (IS_ERR(trans))
4948 return PTR_ERR(trans);
4949
4950 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4951 if (ret) {
4952 btrfs_abort_transaction(trans, ret);
4953 btrfs_end_transaction(trans);
4954 return ret;
4955 }
4956
4957 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
4958 offset, 0, 0, len, 0, len, 0, 0, 0);
4959 if (ret)
4960 btrfs_abort_transaction(trans, ret);
4961 else
4962 btrfs_update_inode(trans, root, inode);
4963 btrfs_end_transaction(trans);
4964 return ret;
4965}
4966
4967
4968
4969
4970
4971
4972
4973int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4974{
4975 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4976 struct btrfs_root *root = BTRFS_I(inode)->root;
4977 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4978 struct extent_map *em = NULL;
4979 struct extent_state *cached_state = NULL;
4980 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4981 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
4982 u64 block_end = ALIGN(size, fs_info->sectorsize);
4983 u64 last_byte;
4984 u64 cur_offset;
4985 u64 hole_size;
4986 int err = 0;
4987
4988
4989
4990
4991
4992
4993 err = btrfs_truncate_block(inode, oldsize, 0, 0);
4994 if (err)
4995 return err;
4996
4997 if (size <= hole_start)
4998 return 0;
4999
5000 while (1) {
5001 struct btrfs_ordered_extent *ordered;
5002
5003 lock_extent_bits(io_tree, hole_start, block_end - 1,
5004 &cached_state);
5005 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
5006 block_end - hole_start);
5007 if (!ordered)
5008 break;
5009 unlock_extent_cached(io_tree, hole_start, block_end - 1,
5010 &cached_state);
5011 btrfs_start_ordered_extent(inode, ordered, 1);
5012 btrfs_put_ordered_extent(ordered);
5013 }
5014
5015 cur_offset = hole_start;
5016 while (1) {
5017 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
5018 block_end - cur_offset, 0);
5019 if (IS_ERR(em)) {
5020 err = PTR_ERR(em);
5021 em = NULL;
5022 break;
5023 }
5024 last_byte = min(extent_map_end(em), block_end);
5025 last_byte = ALIGN(last_byte, fs_info->sectorsize);
5026 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
5027 struct extent_map *hole_em;
5028 hole_size = last_byte - cur_offset;
5029
5030 err = maybe_insert_hole(root, inode, cur_offset,
5031 hole_size);
5032 if (err)
5033 break;
5034 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
5035 cur_offset + hole_size - 1, 0);
5036 hole_em = alloc_extent_map();
5037 if (!hole_em) {
5038 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
5039 &BTRFS_I(inode)->runtime_flags);
5040 goto next;
5041 }
5042 hole_em->start = cur_offset;
5043 hole_em->len = hole_size;
5044 hole_em->orig_start = cur_offset;
5045
5046 hole_em->block_start = EXTENT_MAP_HOLE;
5047 hole_em->block_len = 0;
5048 hole_em->orig_block_len = 0;
5049 hole_em->ram_bytes = hole_size;
5050 hole_em->bdev = fs_info->fs_devices->latest_bdev;
5051 hole_em->compress_type = BTRFS_COMPRESS_NONE;
5052 hole_em->generation = fs_info->generation;
5053
5054 while (1) {
5055 write_lock(&em_tree->lock);
5056 err = add_extent_mapping(em_tree, hole_em, 1);
5057 write_unlock(&em_tree->lock);
5058 if (err != -EEXIST)
5059 break;
5060 btrfs_drop_extent_cache(BTRFS_I(inode),
5061 cur_offset,
5062 cur_offset +
5063 hole_size - 1, 0);
5064 }
5065 free_extent_map(hole_em);
5066 }
5067next:
5068 free_extent_map(em);
5069 em = NULL;
5070 cur_offset = last_byte;
5071 if (cur_offset >= block_end)
5072 break;
5073 }
5074 free_extent_map(em);
5075 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
5076 return err;
5077}
5078
5079static int btrfs_setsize(struct inode *inode, struct iattr *attr)
5080{
5081 struct btrfs_root *root = BTRFS_I(inode)->root;
5082 struct btrfs_trans_handle *trans;
5083 loff_t oldsize = i_size_read(inode);
5084 loff_t newsize = attr->ia_size;
5085 int mask = attr->ia_valid;
5086 int ret;
5087
5088
5089
5090
5091
5092
5093
5094 if (newsize != oldsize) {
5095 inode_inc_iversion(inode);
5096 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
5097 inode->i_ctime = inode->i_mtime =
5098 current_time(inode);
5099 }
5100
5101 if (newsize > oldsize) {
5102
5103
5104
5105
5106
5107
5108
5109 btrfs_wait_for_snapshot_creation(root);
5110 ret = btrfs_cont_expand(inode, oldsize, newsize);
5111 if (ret) {
5112 btrfs_end_write_no_snapshotting(root);
5113 return ret;
5114 }
5115
5116 trans = btrfs_start_transaction(root, 1);
5117 if (IS_ERR(trans)) {
5118 btrfs_end_write_no_snapshotting(root);
5119 return PTR_ERR(trans);
5120 }
5121
5122 i_size_write(inode, newsize);
5123 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
5124 pagecache_isize_extended(inode, oldsize, newsize);
5125 ret = btrfs_update_inode(trans, root, inode);
5126 btrfs_end_write_no_snapshotting(root);
5127 btrfs_end_transaction(trans);
5128 } else {
5129
5130
5131
5132
5133
5134
5135 if (newsize == 0)
5136 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
5137 &BTRFS_I(inode)->runtime_flags);
5138
5139 truncate_setsize(inode, newsize);
5140
5141
5142 btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
5143 inode_dio_wait(inode);
5144 btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
5145
5146 ret = btrfs_truncate(inode, newsize == oldsize);
5147 if (ret && inode->i_nlink) {
5148 int err;
5149
5150
5151
5152
5153
5154
5155
5156 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5157 if (err)
5158 return err;
5159 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5160 }
5161 }
5162
5163 return ret;
5164}
5165
5166static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
5167{
5168 struct inode *inode = d_inode(dentry);
5169 struct btrfs_root *root = BTRFS_I(inode)->root;
5170 int err;
5171
5172 if (btrfs_root_readonly(root))
5173 return -EROFS;
5174
5175 err = setattr_prepare(dentry, attr);
5176 if (err)
5177 return err;
5178
5179 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5180 err = btrfs_setsize(inode, attr);
5181 if (err)
5182 return err;
5183 }
5184
5185 if (attr->ia_valid) {
5186 setattr_copy(inode, attr);
5187 inode_inc_iversion(inode);
5188 err = btrfs_dirty_inode(inode);
5189
5190 if (!err && attr->ia_valid & ATTR_MODE)
5191 err = posix_acl_chmod(inode, inode->i_mode);
5192 }
5193
5194 return err;
5195}
5196
5197
5198
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208
5209static void evict_inode_truncate_pages(struct inode *inode)
5210{
5211 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5212 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5213 struct rb_node *node;
5214
5215 ASSERT(inode->i_state & I_FREEING);
5216 truncate_inode_pages_final(&inode->i_data);
5217
5218 write_lock(&map_tree->lock);
5219 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
5220 struct extent_map *em;
5221
5222 node = rb_first_cached(&map_tree->map);
5223 em = rb_entry(node, struct extent_map, rb_node);
5224 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5225 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5226 remove_extent_mapping(map_tree, em);
5227 free_extent_map(em);
5228 if (need_resched()) {
5229 write_unlock(&map_tree->lock);
5230 cond_resched();
5231 write_lock(&map_tree->lock);
5232 }
5233 }
5234 write_unlock(&map_tree->lock);
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252 spin_lock(&io_tree->lock);
5253 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5254 struct extent_state *state;
5255 struct extent_state *cached_state = NULL;
5256 u64 start;
5257 u64 end;
5258 unsigned state_flags;
5259
5260 node = rb_first(&io_tree->state);
5261 state = rb_entry(node, struct extent_state, rb_node);
5262 start = state->start;
5263 end = state->end;
5264 state_flags = state->state;
5265 spin_unlock(&io_tree->lock);
5266
5267 lock_extent_bits(io_tree, start, end, &cached_state);
5268
5269
5270
5271
5272
5273
5274
5275
5276
5277 if (state_flags & EXTENT_DELALLOC)
5278 btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
5279
5280 clear_extent_bit(io_tree, start, end,
5281 EXTENT_LOCKED | EXTENT_DIRTY |
5282 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
5283 EXTENT_DEFRAG, 1, 1, &cached_state);
5284
5285 cond_resched();
5286 spin_lock(&io_tree->lock);
5287 }
5288 spin_unlock(&io_tree->lock);
5289}
5290
5291static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5292 struct btrfs_block_rsv *rsv)
5293{
5294 struct btrfs_fs_info *fs_info = root->fs_info;
5295 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5296 u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
5297 int failures = 0;
5298
5299 for (;;) {
5300 struct btrfs_trans_handle *trans;
5301 int ret;
5302
5303 ret = btrfs_block_rsv_refill(root, rsv,
5304 rsv->size + delayed_refs_extra,
5305 BTRFS_RESERVE_FLUSH_LIMIT);
5306
5307 if (ret && ++failures > 2) {
5308 btrfs_warn(fs_info,
5309 "could not allocate space for a delete; will truncate on mount");
5310 return ERR_PTR(-ENOSPC);
5311 }
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325 trans = btrfs_join_transaction(root);
5326 if (IS_ERR(trans) || !ret) {
5327 if (!IS_ERR(trans)) {
5328 trans->block_rsv = &fs_info->trans_block_rsv;
5329 trans->bytes_reserved = delayed_refs_extra;
5330 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5331 delayed_refs_extra, 1);
5332 }
5333 return trans;
5334 }
5335
5336
5337
5338
5339
5340 if (!btrfs_check_space_for_delayed_refs(fs_info) &&
5341 !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0))
5342 return trans;
5343
5344
5345 ret = btrfs_commit_transaction(trans);
5346 if (ret)
5347 return ERR_PTR(ret);
5348 }
5349}
5350
5351void btrfs_evict_inode(struct inode *inode)
5352{
5353 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5354 struct btrfs_trans_handle *trans;
5355 struct btrfs_root *root = BTRFS_I(inode)->root;
5356 struct btrfs_block_rsv *rsv;
5357 int ret;
5358
5359 trace_btrfs_inode_evict(inode);
5360
5361 if (!root) {
5362 clear_inode(inode);
5363 return;
5364 }
5365
5366 evict_inode_truncate_pages(inode);
5367
5368 if (inode->i_nlink &&
5369 ((btrfs_root_refs(&root->root_item) != 0 &&
5370 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5371 btrfs_is_free_space_inode(BTRFS_I(inode))))
5372 goto no_delete;
5373
5374 if (is_bad_inode(inode))
5375 goto no_delete;
5376
5377 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5378
5379 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5380 goto no_delete;
5381
5382 if (inode->i_nlink > 0) {
5383 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5384 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5385 goto no_delete;
5386 }
5387
5388 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5389 if (ret)
5390 goto no_delete;
5391
5392 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5393 if (!rsv)
5394 goto no_delete;
5395 rsv->size = btrfs_calc_trunc_metadata_size(fs_info, 1);
5396 rsv->failfast = 1;
5397
5398 btrfs_i_size_write(BTRFS_I(inode), 0);
5399
5400 while (1) {
5401 trans = evict_refill_and_join(root, rsv);
5402 if (IS_ERR(trans))
5403 goto free_rsv;
5404
5405 trans->block_rsv = rsv;
5406
5407 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5408 trans->block_rsv = &fs_info->trans_block_rsv;
5409 btrfs_end_transaction(trans);
5410 btrfs_btree_balance_dirty(fs_info);
5411 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5412 goto free_rsv;
5413 else if (!ret)
5414 break;
5415 }
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426 trans = evict_refill_and_join(root, rsv);
5427 if (!IS_ERR(trans)) {
5428 trans->block_rsv = rsv;
5429 btrfs_orphan_del(trans, BTRFS_I(inode));
5430 trans->block_rsv = &fs_info->trans_block_rsv;
5431 btrfs_end_transaction(trans);
5432 }
5433
5434 if (!(root == fs_info->tree_root ||
5435 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
5436 btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
5437
5438free_rsv:
5439 btrfs_free_block_rsv(fs_info, rsv);
5440no_delete:
5441
5442
5443
5444
5445
5446 btrfs_remove_delayed_node(BTRFS_I(inode));
5447 clear_inode(inode);
5448}
5449
5450
5451
5452
5453
5454
5455static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5456 struct btrfs_key *location)
5457{
5458 const char *name = dentry->d_name.name;
5459 int namelen = dentry->d_name.len;
5460 struct btrfs_dir_item *di;
5461 struct btrfs_path *path;
5462 struct btrfs_root *root = BTRFS_I(dir)->root;
5463 int ret = 0;
5464
5465 path = btrfs_alloc_path();
5466 if (!path)
5467 return -ENOMEM;
5468
5469 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5470 name, namelen, 0);
5471 if (IS_ERR_OR_NULL(di)) {
5472 ret = di ? PTR_ERR(di) : -ENOENT;
5473 goto out;
5474 }
5475
5476 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5477 if (location->type != BTRFS_INODE_ITEM_KEY &&
5478 location->type != BTRFS_ROOT_ITEM_KEY) {
5479 ret = -EUCLEAN;
5480 btrfs_warn(root->fs_info,
5481"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5482 __func__, name, btrfs_ino(BTRFS_I(dir)),
5483 location->objectid, location->type, location->offset);
5484 }
5485out:
5486 btrfs_free_path(path);
5487 return ret;
5488}
5489
5490
5491
5492
5493
5494
5495static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5496 struct inode *dir,
5497 struct dentry *dentry,
5498 struct btrfs_key *location,
5499 struct btrfs_root **sub_root)
5500{
5501 struct btrfs_path *path;
5502 struct btrfs_root *new_root;
5503 struct btrfs_root_ref *ref;
5504 struct extent_buffer *leaf;
5505 struct btrfs_key key;
5506 int ret;
5507 int err = 0;
5508
5509 path = btrfs_alloc_path();
5510 if (!path) {
5511 err = -ENOMEM;
5512 goto out;
5513 }
5514
5515 err = -ENOENT;
5516 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5517 key.type = BTRFS_ROOT_REF_KEY;
5518 key.offset = location->objectid;
5519
5520 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5521 if (ret) {
5522 if (ret < 0)
5523 err = ret;
5524 goto out;
5525 }
5526
5527 leaf = path->nodes[0];
5528 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5529 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5530 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5531 goto out;
5532
5533 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5534 (unsigned long)(ref + 1),
5535 dentry->d_name.len);
5536 if (ret)
5537 goto out;
5538
5539 btrfs_release_path(path);
5540
5541 new_root = btrfs_read_fs_root_no_name(fs_info, location);
5542 if (IS_ERR(new_root)) {
5543 err = PTR_ERR(new_root);
5544 goto out;
5545 }
5546
5547 *sub_root = new_root;
5548 location->objectid = btrfs_root_dirid(&new_root->root_item);
5549 location->type = BTRFS_INODE_ITEM_KEY;
5550 location->offset = 0;
5551 err = 0;
5552out:
5553 btrfs_free_path(path);
5554 return err;
5555}
5556
5557static void inode_tree_add(struct inode *inode)
5558{
5559 struct btrfs_root *root = BTRFS_I(inode)->root;
5560 struct btrfs_inode *entry;
5561 struct rb_node **p;
5562 struct rb_node *parent;
5563 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5564 u64 ino = btrfs_ino(BTRFS_I(inode));
5565
5566 if (inode_unhashed(inode))
5567 return;
5568 parent = NULL;
5569 spin_lock(&root->inode_lock);
5570 p = &root->inode_tree.rb_node;
5571 while (*p) {
5572 parent = *p;
5573 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5574
5575 if (ino < btrfs_ino(entry))
5576 p = &parent->rb_left;
5577 else if (ino > btrfs_ino(entry))
5578 p = &parent->rb_right;
5579 else {
5580 WARN_ON(!(entry->vfs_inode.i_state &
5581 (I_WILL_FREE | I_FREEING)));
5582 rb_replace_node(parent, new, &root->inode_tree);
5583 RB_CLEAR_NODE(parent);
5584 spin_unlock(&root->inode_lock);
5585 return;
5586 }
5587 }
5588 rb_link_node(new, parent, p);
5589 rb_insert_color(new, &root->inode_tree);
5590 spin_unlock(&root->inode_lock);
5591}
5592
5593static void inode_tree_del(struct inode *inode)
5594{
5595 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5596 struct btrfs_root *root = BTRFS_I(inode)->root;
5597 int empty = 0;
5598
5599 spin_lock(&root->inode_lock);
5600 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5601 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5602 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5603 empty = RB_EMPTY_ROOT(&root->inode_tree);
5604 }
5605 spin_unlock(&root->inode_lock);
5606
5607 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5608 synchronize_srcu(&fs_info->subvol_srcu);
5609 spin_lock(&root->inode_lock);
5610 empty = RB_EMPTY_ROOT(&root->inode_tree);
5611 spin_unlock(&root->inode_lock);
5612 if (empty)
5613 btrfs_add_dead_root(root);
5614 }
5615}
5616
5617
5618static int btrfs_init_locked_inode(struct inode *inode, void *p)
5619{
5620 struct btrfs_iget_args *args = p;
5621 inode->i_ino = args->location->objectid;
5622 memcpy(&BTRFS_I(inode)->location, args->location,
5623 sizeof(*args->location));
5624 BTRFS_I(inode)->root = args->root;
5625 return 0;
5626}
5627
5628static int btrfs_find_actor(struct inode *inode, void *opaque)
5629{
5630 struct btrfs_iget_args *args = opaque;
5631 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
5632 args->root == BTRFS_I(inode)->root;
5633}
5634
5635static struct inode *btrfs_iget_locked(struct super_block *s,
5636 struct btrfs_key *location,
5637 struct btrfs_root *root)
5638{
5639 struct inode *inode;
5640 struct btrfs_iget_args args;
5641 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
5642
5643 args.location = location;
5644 args.root = root;
5645
5646 inode = iget5_locked(s, hashval, btrfs_find_actor,
5647 btrfs_init_locked_inode,
5648 (void *)&args);
5649 return inode;
5650}
5651
5652
5653
5654
5655struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5656 struct btrfs_root *root, int *new,
5657 struct btrfs_path *path)
5658{
5659 struct inode *inode;
5660
5661 inode = btrfs_iget_locked(s, location, root);
5662 if (!inode)
5663 return ERR_PTR(-ENOMEM);
5664
5665 if (inode->i_state & I_NEW) {
5666 int ret;
5667
5668 ret = btrfs_read_locked_inode(inode, path);
5669 if (!ret) {
5670 inode_tree_add(inode);
5671 unlock_new_inode(inode);
5672 if (new)
5673 *new = 1;
5674 } else {
5675 iget_failed(inode);
5676
5677
5678
5679
5680
5681 if (ret > 0)
5682 ret = -ENOENT;
5683 inode = ERR_PTR(ret);
5684 }
5685 }
5686
5687 return inode;
5688}
5689
5690struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5691 struct btrfs_root *root, int *new)
5692{
5693 return btrfs_iget_path(s, location, root, new, NULL);
5694}
5695
5696static struct inode *new_simple_dir(struct super_block *s,
5697 struct btrfs_key *key,
5698 struct btrfs_root *root)
5699{
5700 struct inode *inode = new_inode(s);
5701
5702 if (!inode)
5703 return ERR_PTR(-ENOMEM);
5704
5705 BTRFS_I(inode)->root = root;
5706 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5707 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5708
5709 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5710 inode->i_op = &btrfs_dir_ro_inode_operations;
5711 inode->i_opflags &= ~IOP_XATTR;
5712 inode->i_fop = &simple_dir_operations;
5713 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5714 inode->i_mtime = current_time(inode);
5715 inode->i_atime = inode->i_mtime;
5716 inode->i_ctime = inode->i_mtime;
5717 BTRFS_I(inode)->i_otime = inode->i_mtime;
5718
5719 return inode;
5720}
5721
5722struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5723{
5724 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5725 struct inode *inode;
5726 struct btrfs_root *root = BTRFS_I(dir)->root;
5727 struct btrfs_root *sub_root = root;
5728 struct btrfs_key location;
5729 int index;
5730 int ret = 0;
5731
5732 if (dentry->d_name.len > BTRFS_NAME_LEN)
5733 return ERR_PTR(-ENAMETOOLONG);
5734
5735 ret = btrfs_inode_by_name(dir, dentry, &location);
5736 if (ret < 0)
5737 return ERR_PTR(ret);
5738
5739 if (location.type == BTRFS_INODE_ITEM_KEY) {
5740 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5741 return inode;
5742 }
5743
5744 index = srcu_read_lock(&fs_info->subvol_srcu);
5745 ret = fixup_tree_root_location(fs_info, dir, dentry,
5746 &location, &sub_root);
5747 if (ret < 0) {
5748 if (ret != -ENOENT)
5749 inode = ERR_PTR(ret);
5750 else
5751 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5752 } else {
5753 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
5754 }
5755 srcu_read_unlock(&fs_info->subvol_srcu, index);
5756
5757 if (!IS_ERR(inode) && root != sub_root) {
5758 down_read(&fs_info->cleanup_work_sem);
5759 if (!sb_rdonly(inode->i_sb))
5760 ret = btrfs_orphan_cleanup(sub_root);
5761 up_read(&fs_info->cleanup_work_sem);
5762 if (ret) {
5763 iput(inode);
5764 inode = ERR_PTR(ret);
5765 }
5766 }
5767
5768 return inode;
5769}
5770
5771static int btrfs_dentry_delete(const struct dentry *dentry)
5772{
5773 struct btrfs_root *root;
5774 struct inode *inode = d_inode(dentry);
5775
5776 if (!inode && !IS_ROOT(dentry))
5777 inode = d_inode(dentry->d_parent);
5778
5779 if (inode) {
5780 root = BTRFS_I(inode)->root;
5781 if (btrfs_root_refs(&root->root_item) == 0)
5782 return 1;
5783
5784 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5785 return 1;
5786 }
5787 return 0;
5788}
5789
5790static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5791 unsigned int flags)
5792{
5793 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
5794
5795 if (inode == ERR_PTR(-ENOENT))
5796 inode = NULL;
5797 return d_splice_alias(inode, dentry);
5798}
5799
5800unsigned char btrfs_filetype_table[] = {
5801 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5802};
5803
5804
5805
5806
5807
5808
5809
5810
5811
5812
5813static int btrfs_opendir(struct inode *inode, struct file *file)
5814{
5815 struct btrfs_file_private *private;
5816
5817 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5818 if (!private)
5819 return -ENOMEM;
5820 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5821 if (!private->filldir_buf) {
5822 kfree(private);
5823 return -ENOMEM;
5824 }
5825 file->private_data = private;
5826 return 0;
5827}
5828
5829struct dir_entry {
5830 u64 ino;
5831 u64 offset;
5832 unsigned type;
5833 int name_len;
5834};
5835
5836static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5837{
5838 while (entries--) {
5839 struct dir_entry *entry = addr;
5840 char *name = (char *)(entry + 1);
5841
5842 ctx->pos = get_unaligned(&entry->offset);
5843 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5844 get_unaligned(&entry->ino),
5845 get_unaligned(&entry->type)))
5846 return 1;
5847 addr += sizeof(struct dir_entry) +
5848 get_unaligned(&entry->name_len);
5849 ctx->pos++;
5850 }
5851 return 0;
5852}
5853
5854static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5855{
5856 struct inode *inode = file_inode(file);
5857 struct btrfs_root *root = BTRFS_I(inode)->root;
5858 struct btrfs_file_private *private = file->private_data;
5859 struct btrfs_dir_item *di;
5860 struct btrfs_key key;
5861 struct btrfs_key found_key;
5862 struct btrfs_path *path;
5863 void *addr;
5864 struct list_head ins_list;
5865 struct list_head del_list;
5866 int ret;
5867 struct extent_buffer *leaf;
5868 int slot;
5869 char *name_ptr;
5870 int name_len;
5871 int entries = 0;
5872 int total_len = 0;
5873 bool put = false;
5874 struct btrfs_key location;
5875
5876 if (!dir_emit_dots(file, ctx))
5877 return 0;
5878
5879 path = btrfs_alloc_path();
5880 if (!path)
5881 return -ENOMEM;
5882
5883 addr = private->filldir_buf;
5884 path->reada = READA_FORWARD;
5885
5886 INIT_LIST_HEAD(&ins_list);
5887 INIT_LIST_HEAD(&del_list);
5888 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5889
5890again:
5891 key.type = BTRFS_DIR_INDEX_KEY;
5892 key.offset = ctx->pos;
5893 key.objectid = btrfs_ino(BTRFS_I(inode));
5894
5895 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5896 if (ret < 0)
5897 goto err;
5898
5899 while (1) {
5900 struct dir_entry *entry;
5901
5902 leaf = path->nodes[0];
5903 slot = path->slots[0];
5904 if (slot >= btrfs_header_nritems(leaf)) {
5905 ret = btrfs_next_leaf(root, path);
5906 if (ret < 0)
5907 goto err;
5908 else if (ret > 0)
5909 break;
5910 continue;
5911 }
5912
5913 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5914
5915 if (found_key.objectid != key.objectid)
5916 break;
5917 if (found_key.type != BTRFS_DIR_INDEX_KEY)
5918 break;
5919 if (found_key.offset < ctx->pos)
5920 goto next;
5921 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
5922 goto next;
5923 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5924 name_len = btrfs_dir_name_len(leaf, di);
5925 if ((total_len + sizeof(struct dir_entry) + name_len) >=
5926 PAGE_SIZE) {
5927 btrfs_release_path(path);
5928 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5929 if (ret)
5930 goto nopos;
5931 addr = private->filldir_buf;
5932 entries = 0;
5933 total_len = 0;
5934 goto again;
5935 }
5936
5937 entry = addr;
5938 put_unaligned(name_len, &entry->name_len);
5939 name_ptr = (char *)(entry + 1);
5940 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
5941 name_len);
5942 put_unaligned(btrfs_filetype_table[btrfs_dir_type(leaf, di)],
5943 &entry->type);
5944 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5945 put_unaligned(location.objectid, &entry->ino);
5946 put_unaligned(found_key.offset, &entry->offset);
5947 entries++;
5948 addr += sizeof(struct dir_entry) + name_len;
5949 total_len += sizeof(struct dir_entry) + name_len;
5950next:
5951 path->slots[0]++;
5952 }
5953 btrfs_release_path(path);
5954
5955 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5956 if (ret)
5957 goto nopos;
5958
5959 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
5960 if (ret)
5961 goto nopos;
5962
5963
5964
5965
5966
5967
5968
5969
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979
5980 if (ctx->pos >= INT_MAX)
5981 ctx->pos = LLONG_MAX;
5982 else
5983 ctx->pos = INT_MAX;
5984nopos:
5985 ret = 0;
5986err:
5987 if (put)
5988 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
5989 btrfs_free_path(path);
5990 return ret;
5991}
5992
5993
5994
5995
5996
5997
5998
5999static int btrfs_dirty_inode(struct inode *inode)
6000{
6001 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6002 struct btrfs_root *root = BTRFS_I(inode)->root;
6003 struct btrfs_trans_handle *trans;
6004 int ret;
6005
6006 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6007 return 0;
6008
6009 trans = btrfs_join_transaction(root);
6010 if (IS_ERR(trans))
6011 return PTR_ERR(trans);
6012
6013 ret = btrfs_update_inode(trans, root, inode);
6014 if (ret && ret == -ENOSPC) {
6015
6016 btrfs_end_transaction(trans);
6017 trans = btrfs_start_transaction(root, 1);
6018 if (IS_ERR(trans))
6019 return PTR_ERR(trans);
6020
6021 ret = btrfs_update_inode(trans, root, inode);
6022 }
6023 btrfs_end_transaction(trans);
6024 if (BTRFS_I(inode)->delayed_node)
6025 btrfs_balance_delayed_items(fs_info);
6026
6027 return ret;
6028}
6029
6030
6031
6032
6033
6034static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
6035 int flags)
6036{
6037 struct btrfs_root *root = BTRFS_I(inode)->root;
6038 bool dirty = flags & ~S_VERSION;
6039
6040 if (btrfs_root_readonly(root))
6041 return -EROFS;
6042
6043 if (flags & S_VERSION)
6044 dirty |= inode_maybe_inc_iversion(inode, dirty);
6045 if (flags & S_CTIME)
6046 inode->i_ctime = *now;
6047 if (flags & S_MTIME)
6048 inode->i_mtime = *now;
6049 if (flags & S_ATIME)
6050 inode->i_atime = *now;
6051 return dirty ? btrfs_dirty_inode(inode) : 0;
6052}
6053
6054
6055
6056
6057
6058
6059static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
6060{
6061 struct btrfs_root *root = inode->root;
6062 struct btrfs_key key, found_key;
6063 struct btrfs_path *path;
6064 struct extent_buffer *leaf;
6065 int ret;
6066
6067 key.objectid = btrfs_ino(inode);
6068 key.type = BTRFS_DIR_INDEX_KEY;
6069 key.offset = (u64)-1;
6070
6071 path = btrfs_alloc_path();
6072 if (!path)
6073 return -ENOMEM;
6074
6075 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6076 if (ret < 0)
6077 goto out;
6078
6079 if (ret == 0)
6080 goto out;
6081 ret = 0;
6082
6083
6084
6085
6086
6087
6088
6089 if (path->slots[0] == 0) {
6090 inode->index_cnt = 2;
6091 goto out;
6092 }
6093
6094 path->slots[0]--;
6095
6096 leaf = path->nodes[0];
6097 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6098
6099 if (found_key.objectid != btrfs_ino(inode) ||
6100 found_key.type != BTRFS_DIR_INDEX_KEY) {
6101 inode->index_cnt = 2;
6102 goto out;
6103 }
6104
6105 inode->index_cnt = found_key.offset + 1;
6106out:
6107 btrfs_free_path(path);
6108 return ret;
6109}
6110
6111
6112
6113
6114
6115int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
6116{
6117 int ret = 0;
6118
6119 if (dir->index_cnt == (u64)-1) {
6120 ret = btrfs_inode_delayed_dir_index_count(dir);
6121 if (ret) {
6122 ret = btrfs_set_inode_index_count(dir);
6123 if (ret)
6124 return ret;
6125 }
6126 }
6127
6128 *index = dir->index_cnt;
6129 dir->index_cnt++;
6130
6131 return ret;
6132}
6133
6134static int btrfs_insert_inode_locked(struct inode *inode)
6135{
6136 struct btrfs_iget_args args;
6137 args.location = &BTRFS_I(inode)->location;
6138 args.root = BTRFS_I(inode)->root;
6139
6140 return insert_inode_locked4(inode,
6141 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6142 btrfs_find_actor, &args);
6143}
6144
6145
6146
6147
6148
6149
6150static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
6151{
6152 unsigned int flags;
6153
6154 if (!dir)
6155 return;
6156
6157 flags = BTRFS_I(dir)->flags;
6158
6159 if (flags & BTRFS_INODE_NOCOMPRESS) {
6160 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
6161 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
6162 } else if (flags & BTRFS_INODE_COMPRESS) {
6163 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
6164 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
6165 }
6166
6167 if (flags & BTRFS_INODE_NODATACOW) {
6168 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
6169 if (S_ISREG(inode->i_mode))
6170 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6171 }
6172
6173 btrfs_sync_inode_flags_to_i_flags(inode);
6174}
6175
6176static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6177 struct btrfs_root *root,
6178 struct inode *dir,
6179 const char *name, int name_len,
6180 u64 ref_objectid, u64 objectid,
6181 umode_t mode, u64 *index)
6182{
6183 struct btrfs_fs_info *fs_info = root->fs_info;
6184 struct inode *inode;
6185 struct btrfs_inode_item *inode_item;
6186 struct btrfs_key *location;
6187 struct btrfs_path *path;
6188 struct btrfs_inode_ref *ref;
6189 struct btrfs_key key[2];
6190 u32 sizes[2];
6191 int nitems = name ? 2 : 1;
6192 unsigned long ptr;
6193 int ret;
6194
6195 path = btrfs_alloc_path();
6196 if (!path)
6197 return ERR_PTR(-ENOMEM);
6198
6199 inode = new_inode(fs_info->sb);
6200 if (!inode) {
6201 btrfs_free_path(path);
6202 return ERR_PTR(-ENOMEM);
6203 }
6204
6205
6206
6207
6208
6209 if (!name)
6210 set_nlink(inode, 0);
6211
6212
6213
6214
6215
6216 inode->i_ino = objectid;
6217
6218 if (dir && name) {
6219 trace_btrfs_inode_request(dir);
6220
6221 ret = btrfs_set_inode_index(BTRFS_I(dir), index);
6222 if (ret) {
6223 btrfs_free_path(path);
6224 iput(inode);
6225 return ERR_PTR(ret);
6226 }
6227 } else if (dir) {
6228 *index = 0;
6229 }
6230
6231
6232
6233
6234
6235 BTRFS_I(inode)->index_cnt = 2;
6236 BTRFS_I(inode)->dir_index = *index;
6237 BTRFS_I(inode)->root = root;
6238 BTRFS_I(inode)->generation = trans->transid;
6239 inode->i_generation = BTRFS_I(inode)->generation;
6240
6241
6242
6243
6244
6245
6246
6247 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6248
6249 key[0].objectid = objectid;
6250 key[0].type = BTRFS_INODE_ITEM_KEY;
6251 key[0].offset = 0;
6252
6253 sizes[0] = sizeof(struct btrfs_inode_item);
6254
6255 if (name) {
6256
6257
6258
6259
6260
6261
6262 key[1].objectid = objectid;
6263 key[1].type = BTRFS_INODE_REF_KEY;
6264 key[1].offset = ref_objectid;
6265
6266 sizes[1] = name_len + sizeof(*ref);
6267 }
6268
6269 location = &BTRFS_I(inode)->location;
6270 location->objectid = objectid;
6271 location->offset = 0;
6272 location->type = BTRFS_INODE_ITEM_KEY;
6273
6274 ret = btrfs_insert_inode_locked(inode);
6275 if (ret < 0) {
6276 iput(inode);
6277 goto fail;
6278 }
6279
6280 path->leave_spinning = 1;
6281 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
6282 if (ret != 0)
6283 goto fail_unlock;
6284
6285 inode_init_owner(inode, dir, mode);
6286 inode_set_bytes(inode, 0);
6287
6288 inode->i_mtime = current_time(inode);
6289 inode->i_atime = inode->i_mtime;
6290 inode->i_ctime = inode->i_mtime;
6291 BTRFS_I(inode)->i_otime = inode->i_mtime;
6292
6293 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6294 struct btrfs_inode_item);
6295 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6296 sizeof(*inode_item));
6297 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6298
6299 if (name) {
6300 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6301 struct btrfs_inode_ref);
6302 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6303 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6304 ptr = (unsigned long)(ref + 1);
6305 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6306 }
6307
6308 btrfs_mark_buffer_dirty(path->nodes[0]);
6309 btrfs_free_path(path);
6310
6311 btrfs_inherit_iflags(inode, dir);
6312
6313 if (S_ISREG(mode)) {
6314 if (btrfs_test_opt(fs_info, NODATASUM))
6315 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6316 if (btrfs_test_opt(fs_info, NODATACOW))
6317 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6318 BTRFS_INODE_NODATASUM;
6319 }
6320
6321 inode_tree_add(inode);
6322
6323 trace_btrfs_inode_new(inode);
6324 btrfs_set_inode_last_trans(trans, inode);
6325
6326 btrfs_update_root_times(trans, root);
6327
6328 ret = btrfs_inode_inherit_props(trans, inode, dir);
6329 if (ret)
6330 btrfs_err(fs_info,
6331 "error inheriting props for ino %llu (root %llu): %d",
6332 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
6333
6334 return inode;
6335
6336fail_unlock:
6337 discard_new_inode(inode);
6338fail:
6339 if (dir && name)
6340 BTRFS_I(dir)->index_cnt--;
6341 btrfs_free_path(path);
6342 return ERR_PTR(ret);
6343}
6344
6345static inline u8 btrfs_inode_type(struct inode *inode)
6346{
6347 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
6348}
6349
6350
6351
6352
6353
6354
6355
6356int btrfs_add_link(struct btrfs_trans_handle *trans,
6357 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6358 const char *name, int name_len, int add_backref, u64 index)
6359{
6360 int ret = 0;
6361 struct btrfs_key key;
6362 struct btrfs_root *root = parent_inode->root;
6363 u64 ino = btrfs_ino(inode);
6364 u64 parent_ino = btrfs_ino(parent_inode);
6365
6366 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6367 memcpy(&key, &inode->root->root_key, sizeof(key));
6368 } else {
6369 key.objectid = ino;
6370 key.type = BTRFS_INODE_ITEM_KEY;
6371 key.offset = 0;
6372 }
6373
6374 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6375 ret = btrfs_add_root_ref(trans, key.objectid,
6376 root->root_key.objectid, parent_ino,
6377 index, name, name_len);
6378 } else if (add_backref) {
6379 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6380 parent_ino, index);
6381 }
6382
6383
6384 if (ret)
6385 return ret;
6386
6387 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6388 btrfs_inode_type(&inode->vfs_inode), index);
6389 if (ret == -EEXIST || ret == -EOVERFLOW)
6390 goto fail_dir_item;
6391 else if (ret) {
6392 btrfs_abort_transaction(trans, ret);
6393 return ret;
6394 }
6395
6396 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6397 name_len * 2);
6398 inode_inc_iversion(&parent_inode->vfs_inode);
6399 parent_inode->vfs_inode.i_mtime = parent_inode->vfs_inode.i_ctime =
6400 current_time(&parent_inode->vfs_inode);
6401 ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
6402 if (ret)
6403 btrfs_abort_transaction(trans, ret);
6404 return ret;
6405
6406fail_dir_item:
6407 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6408 u64 local_index;
6409 int err;
6410 err = btrfs_del_root_ref(trans, key.objectid,
6411 root->root_key.objectid, parent_ino,
6412 &local_index, name, name_len);
6413 if (err)
6414 btrfs_abort_transaction(trans, err);
6415 } else if (add_backref) {
6416 u64 local_index;
6417 int err;
6418
6419 err = btrfs_del_inode_ref(trans, root, name, name_len,
6420 ino, parent_ino, &local_index);
6421 if (err)
6422 btrfs_abort_transaction(trans, err);
6423 }
6424
6425
6426 return ret;
6427}
6428
6429static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6430 struct btrfs_inode *dir, struct dentry *dentry,
6431 struct btrfs_inode *inode, int backref, u64 index)
6432{
6433 int err = btrfs_add_link(trans, dir, inode,
6434 dentry->d_name.name, dentry->d_name.len,
6435 backref, index);
6436 if (err > 0)
6437 err = -EEXIST;
6438 return err;
6439}
6440
6441static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6442 umode_t mode, dev_t rdev)
6443{
6444 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6445 struct btrfs_trans_handle *trans;
6446 struct btrfs_root *root = BTRFS_I(dir)->root;
6447 struct inode *inode = NULL;
6448 int err;
6449 u64 objectid;
6450 u64 index = 0;
6451
6452
6453
6454
6455
6456
6457 trans = btrfs_start_transaction(root, 5);
6458 if (IS_ERR(trans))
6459 return PTR_ERR(trans);
6460
6461 err = btrfs_find_free_ino(root, &objectid);
6462 if (err)
6463 goto out_unlock;
6464
6465 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6466 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6467 mode, &index);
6468 if (IS_ERR(inode)) {
6469 err = PTR_ERR(inode);
6470 inode = NULL;
6471 goto out_unlock;
6472 }
6473
6474
6475
6476
6477
6478
6479
6480 inode->i_op = &btrfs_special_inode_operations;
6481 init_special_inode(inode, inode->i_mode, rdev);
6482
6483 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6484 if (err)
6485 goto out_unlock;
6486
6487 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6488 0, index);
6489 if (err)
6490 goto out_unlock;
6491
6492 btrfs_update_inode(trans, root, inode);
6493 d_instantiate_new(dentry, inode);
6494
6495out_unlock:
6496 btrfs_end_transaction(trans);
6497 btrfs_btree_balance_dirty(fs_info);
6498 if (err && inode) {
6499 inode_dec_link_count(inode);
6500 discard_new_inode(inode);
6501 }
6502 return err;
6503}
6504
6505static int btrfs_create(struct inode *dir, struct dentry *dentry,
6506 umode_t mode, bool excl)
6507{
6508 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6509 struct btrfs_trans_handle *trans;
6510 struct btrfs_root *root = BTRFS_I(dir)->root;
6511 struct inode *inode = NULL;
6512 int err;
6513 u64 objectid;
6514 u64 index = 0;
6515
6516
6517
6518
6519
6520
6521 trans = btrfs_start_transaction(root, 5);
6522 if (IS_ERR(trans))
6523 return PTR_ERR(trans);
6524
6525 err = btrfs_find_free_ino(root, &objectid);
6526 if (err)
6527 goto out_unlock;
6528
6529 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6530 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6531 mode, &index);
6532 if (IS_ERR(inode)) {
6533 err = PTR_ERR(inode);
6534 inode = NULL;
6535 goto out_unlock;
6536 }
6537
6538
6539
6540
6541
6542
6543 inode->i_fop = &btrfs_file_operations;
6544 inode->i_op = &btrfs_file_inode_operations;
6545 inode->i_mapping->a_ops = &btrfs_aops;
6546
6547 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6548 if (err)
6549 goto out_unlock;
6550
6551 err = btrfs_update_inode(trans, root, inode);
6552 if (err)
6553 goto out_unlock;
6554
6555 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6556 0, index);
6557 if (err)
6558 goto out_unlock;
6559
6560 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6561 d_instantiate_new(dentry, inode);
6562
6563out_unlock:
6564 btrfs_end_transaction(trans);
6565 if (err && inode) {
6566 inode_dec_link_count(inode);
6567 discard_new_inode(inode);
6568 }
6569 btrfs_btree_balance_dirty(fs_info);
6570 return err;
6571}
6572
6573static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6574 struct dentry *dentry)
6575{
6576 struct btrfs_trans_handle *trans = NULL;
6577 struct btrfs_root *root = BTRFS_I(dir)->root;
6578 struct inode *inode = d_inode(old_dentry);
6579 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6580 u64 index;
6581 int err;
6582 int drop_inode = 0;
6583
6584
6585 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6586 return -EXDEV;
6587
6588 if (inode->i_nlink >= BTRFS_LINK_MAX)
6589 return -EMLINK;
6590
6591 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6592 if (err)
6593 goto fail;
6594
6595
6596
6597
6598
6599
6600
6601 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6602 if (IS_ERR(trans)) {
6603 err = PTR_ERR(trans);
6604 trans = NULL;
6605 goto fail;
6606 }
6607
6608
6609 BTRFS_I(inode)->dir_index = 0ULL;
6610 inc_nlink(inode);
6611 inode_inc_iversion(inode);
6612 inode->i_ctime = current_time(inode);
6613 ihold(inode);
6614 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6615
6616 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6617 1, index);
6618
6619 if (err) {
6620 drop_inode = 1;
6621 } else {
6622 struct dentry *parent = dentry->d_parent;
6623 int ret;
6624
6625 err = btrfs_update_inode(trans, root, inode);
6626 if (err)
6627 goto fail;
6628 if (inode->i_nlink == 1) {
6629
6630
6631
6632
6633 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6634 if (err)
6635 goto fail;
6636 }
6637 BTRFS_I(inode)->last_link_trans = trans->transid;
6638 d_instantiate(dentry, inode);
6639 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6640 true, NULL);
6641 if (ret == BTRFS_NEED_TRANS_COMMIT) {
6642 err = btrfs_commit_transaction(trans);
6643 trans = NULL;
6644 }
6645 }
6646
6647fail:
6648 if (trans)
6649 btrfs_end_transaction(trans);
6650 if (drop_inode) {
6651 inode_dec_link_count(inode);
6652 iput(inode);
6653 }
6654 btrfs_btree_balance_dirty(fs_info);
6655 return err;
6656}
6657
6658static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6659{
6660 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6661 struct inode *inode = NULL;
6662 struct btrfs_trans_handle *trans;
6663 struct btrfs_root *root = BTRFS_I(dir)->root;
6664 int err = 0;
6665 u64 objectid = 0;
6666 u64 index = 0;
6667
6668
6669
6670
6671
6672
6673 trans = btrfs_start_transaction(root, 5);
6674 if (IS_ERR(trans))
6675 return PTR_ERR(trans);
6676
6677 err = btrfs_find_free_ino(root, &objectid);
6678 if (err)
6679 goto out_fail;
6680
6681 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6682 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6683 S_IFDIR | mode, &index);
6684 if (IS_ERR(inode)) {
6685 err = PTR_ERR(inode);
6686 inode = NULL;
6687 goto out_fail;
6688 }
6689
6690
6691 inode->i_op = &btrfs_dir_inode_operations;
6692 inode->i_fop = &btrfs_dir_file_operations;
6693
6694 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6695 if (err)
6696 goto out_fail;
6697
6698 btrfs_i_size_write(BTRFS_I(inode), 0);
6699 err = btrfs_update_inode(trans, root, inode);
6700 if (err)
6701 goto out_fail;
6702
6703 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6704 dentry->d_name.name,
6705 dentry->d_name.len, 0, index);
6706 if (err)
6707 goto out_fail;
6708
6709 d_instantiate_new(dentry, inode);
6710
6711out_fail:
6712 btrfs_end_transaction(trans);
6713 if (err && inode) {
6714 inode_dec_link_count(inode);
6715 discard_new_inode(inode);
6716 }
6717 btrfs_btree_balance_dirty(fs_info);
6718 return err;
6719}
6720
6721static noinline int uncompress_inline(struct btrfs_path *path,
6722 struct page *page,
6723 size_t pg_offset, u64 extent_offset,
6724 struct btrfs_file_extent_item *item)
6725{
6726 int ret;
6727 struct extent_buffer *leaf = path->nodes[0];
6728 char *tmp;
6729 size_t max_size;
6730 unsigned long inline_size;
6731 unsigned long ptr;
6732 int compress_type;
6733
6734 WARN_ON(pg_offset != 0);
6735 compress_type = btrfs_file_extent_compression(leaf, item);
6736 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6737 inline_size = btrfs_file_extent_inline_item_len(leaf,
6738 btrfs_item_nr(path->slots[0]));
6739 tmp = kmalloc(inline_size, GFP_NOFS);
6740 if (!tmp)
6741 return -ENOMEM;
6742 ptr = btrfs_file_extent_inline_start(item);
6743
6744 read_extent_buffer(leaf, tmp, ptr, inline_size);
6745
6746 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6747 ret = btrfs_decompress(compress_type, tmp, page,
6748 extent_offset, inline_size, max_size);
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758 if (max_size + pg_offset < PAGE_SIZE) {
6759 char *map = kmap(page);
6760 memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
6761 kunmap(page);
6762 }
6763 kfree(tmp);
6764 return ret;
6765}
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6776 struct page *page,
6777 size_t pg_offset, u64 start, u64 len,
6778 int create)
6779{
6780 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6781 int ret;
6782 int err = 0;
6783 u64 extent_start = 0;
6784 u64 extent_end = 0;
6785 u64 objectid = btrfs_ino(inode);
6786 int extent_type = -1;
6787 struct btrfs_path *path = NULL;
6788 struct btrfs_root *root = inode->root;
6789 struct btrfs_file_extent_item *item;
6790 struct extent_buffer *leaf;
6791 struct btrfs_key found_key;
6792 struct extent_map *em = NULL;
6793 struct extent_map_tree *em_tree = &inode->extent_tree;
6794 struct extent_io_tree *io_tree = &inode->io_tree;
6795 const bool new_inline = !page || create;
6796
6797 read_lock(&em_tree->lock);
6798 em = lookup_extent_mapping(em_tree, start, len);
6799 if (em)
6800 em->bdev = fs_info->fs_devices->latest_bdev;
6801 read_unlock(&em_tree->lock);
6802
6803 if (em) {
6804 if (em->start > start || em->start + em->len <= start)
6805 free_extent_map(em);
6806 else if (em->block_start == EXTENT_MAP_INLINE && page)
6807 free_extent_map(em);
6808 else
6809 goto out;
6810 }
6811 em = alloc_extent_map();
6812 if (!em) {
6813 err = -ENOMEM;
6814 goto out;
6815 }
6816 em->bdev = fs_info->fs_devices->latest_bdev;
6817 em->start = EXTENT_MAP_HOLE;
6818 em->orig_start = EXTENT_MAP_HOLE;
6819 em->len = (u64)-1;
6820 em->block_len = (u64)-1;
6821
6822 path = btrfs_alloc_path();
6823 if (!path) {
6824 err = -ENOMEM;
6825 goto out;
6826 }
6827
6828
6829 path->reada = READA_FORWARD;
6830
6831
6832
6833
6834
6835 path->leave_spinning = 1;
6836
6837 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
6838 if (ret < 0) {
6839 err = ret;
6840 goto out;
6841 } else if (ret > 0) {
6842 if (path->slots[0] == 0)
6843 goto not_found;
6844 path->slots[0]--;
6845 }
6846
6847 leaf = path->nodes[0];
6848 item = btrfs_item_ptr(leaf, path->slots[0],
6849 struct btrfs_file_extent_item);
6850 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6851 if (found_key.objectid != objectid ||
6852 found_key.type != BTRFS_EXTENT_DATA_KEY) {
6853
6854
6855
6856
6857
6858
6859 extent_end = start;
6860 goto next;
6861 }
6862
6863 extent_type = btrfs_file_extent_type(leaf, item);
6864 extent_start = found_key.offset;
6865 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6866 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6867 extent_end = extent_start +
6868 btrfs_file_extent_num_bytes(leaf, item);
6869
6870 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
6871 extent_start);
6872 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6873 size_t size;
6874
6875 size = btrfs_file_extent_ram_bytes(leaf, item);
6876 extent_end = ALIGN(extent_start + size,
6877 fs_info->sectorsize);
6878
6879 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
6880 path->slots[0],
6881 extent_start);
6882 }
6883next:
6884 if (start >= extent_end) {
6885 path->slots[0]++;
6886 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6887 ret = btrfs_next_leaf(root, path);
6888 if (ret < 0) {
6889 err = ret;
6890 goto out;
6891 } else if (ret > 0) {
6892 goto not_found;
6893 }
6894 leaf = path->nodes[0];
6895 }
6896 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6897 if (found_key.objectid != objectid ||
6898 found_key.type != BTRFS_EXTENT_DATA_KEY)
6899 goto not_found;
6900 if (start + len <= found_key.offset)
6901 goto not_found;
6902 if (start > found_key.offset)
6903 goto next;
6904
6905
6906 em->start = start;
6907 em->orig_start = start;
6908 em->len = found_key.offset - start;
6909 em->block_start = EXTENT_MAP_HOLE;
6910 goto insert;
6911 }
6912
6913 btrfs_extent_item_to_extent_map(inode, path, item,
6914 new_inline, em);
6915
6916 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6917 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6918 goto insert;
6919 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6920 unsigned long ptr;
6921 char *map;
6922 size_t size;
6923 size_t extent_offset;
6924 size_t copy_size;
6925
6926 if (new_inline)
6927 goto out;
6928
6929 size = btrfs_file_extent_ram_bytes(leaf, item);
6930 extent_offset = page_offset(page) + pg_offset - extent_start;
6931 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
6932 size - extent_offset);
6933 em->start = extent_start + extent_offset;
6934 em->len = ALIGN(copy_size, fs_info->sectorsize);
6935 em->orig_block_len = em->len;
6936 em->orig_start = em->start;
6937 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6938
6939 btrfs_set_path_blocking(path);
6940 if (!PageUptodate(page)) {
6941 if (btrfs_file_extent_compression(leaf, item) !=
6942 BTRFS_COMPRESS_NONE) {
6943 ret = uncompress_inline(path, page, pg_offset,
6944 extent_offset, item);
6945 if (ret) {
6946 err = ret;
6947 goto out;
6948 }
6949 } else {
6950 map = kmap(page);
6951 read_extent_buffer(leaf, map + pg_offset, ptr,
6952 copy_size);
6953 if (pg_offset + copy_size < PAGE_SIZE) {
6954 memset(map + pg_offset + copy_size, 0,
6955 PAGE_SIZE - pg_offset -
6956 copy_size);
6957 }
6958 kunmap(page);
6959 }
6960 flush_dcache_page(page);
6961 }
6962 set_extent_uptodate(io_tree, em->start,
6963 extent_map_end(em) - 1, NULL, GFP_NOFS);
6964 goto insert;
6965 }
6966not_found:
6967 em->start = start;
6968 em->orig_start = start;
6969 em->len = len;
6970 em->block_start = EXTENT_MAP_HOLE;
6971insert:
6972 btrfs_release_path(path);
6973 if (em->start > start || extent_map_end(em) <= start) {
6974 btrfs_err(fs_info,
6975 "bad extent! em: [%llu %llu] passed [%llu %llu]",
6976 em->start, em->len, start, len);
6977 err = -EIO;
6978 goto out;
6979 }
6980
6981 err = 0;
6982 write_lock(&em_tree->lock);
6983 err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
6984 write_unlock(&em_tree->lock);
6985out:
6986 btrfs_free_path(path);
6987
6988 trace_btrfs_get_extent(root, inode, em);
6989
6990 if (err) {
6991 free_extent_map(em);
6992 return ERR_PTR(err);
6993 }
6994 BUG_ON(!em);
6995 return em;
6996}
6997
6998struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
6999 u64 start, u64 len)
7000{
7001 struct extent_map *em;
7002 struct extent_map *hole_em = NULL;
7003 u64 delalloc_start = start;
7004 u64 end;
7005 u64 delalloc_len;
7006 u64 delalloc_end;
7007 int err = 0;
7008
7009 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7010 if (IS_ERR(em))
7011 return em;
7012
7013
7014
7015
7016
7017
7018 if (em->block_start != EXTENT_MAP_HOLE &&
7019 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7020 return em;
7021 else
7022 hole_em = em;
7023
7024
7025 end = start + len;
7026 if (end < start)
7027 end = (u64)-1;
7028 else
7029 end -= 1;
7030
7031 em = NULL;
7032
7033
7034 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
7035 end, len, EXTENT_DELALLOC, 1);
7036 delalloc_end = delalloc_start + delalloc_len;
7037 if (delalloc_end < delalloc_start)
7038 delalloc_end = (u64)-1;
7039
7040
7041
7042
7043
7044 if (delalloc_start > end || delalloc_end <= start) {
7045 em = hole_em;
7046 hole_em = NULL;
7047 goto out;
7048 }
7049
7050
7051
7052
7053
7054 delalloc_start = max(start, delalloc_start);
7055 delalloc_len = delalloc_end - delalloc_start;
7056
7057 if (delalloc_len > 0) {
7058 u64 hole_start;
7059 u64 hole_len;
7060 const u64 hole_end = extent_map_end(hole_em);
7061
7062 em = alloc_extent_map();
7063 if (!em) {
7064 err = -ENOMEM;
7065 goto out;
7066 }
7067 em->bdev = NULL;
7068
7069 ASSERT(hole_em);
7070
7071
7072
7073
7074
7075
7076
7077 if (hole_end <= start || hole_em->start > end) {
7078 free_extent_map(hole_em);
7079 hole_em = NULL;
7080 } else {
7081 hole_start = max(hole_em->start, start);
7082 hole_len = hole_end - hole_start;
7083 }
7084
7085 if (hole_em && delalloc_start > hole_start) {
7086
7087
7088
7089
7090
7091 em->len = min(hole_len, delalloc_start - hole_start);
7092 em->start = hole_start;
7093 em->orig_start = hole_start;
7094
7095
7096
7097
7098 em->block_start = hole_em->block_start;
7099 em->block_len = hole_len;
7100 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7101 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7102 } else {
7103
7104
7105
7106
7107 em->start = delalloc_start;
7108 em->len = delalloc_len;
7109 em->orig_start = delalloc_start;
7110 em->block_start = EXTENT_MAP_DELALLOC;
7111 em->block_len = delalloc_len;
7112 }
7113 } else {
7114 return hole_em;
7115 }
7116out:
7117
7118 free_extent_map(hole_em);
7119 if (err) {
7120 free_extent_map(em);
7121 return ERR_PTR(err);
7122 }
7123 return em;
7124}
7125
7126static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
7127 const u64 start,
7128 const u64 len,
7129 const u64 orig_start,
7130 const u64 block_start,
7131 const u64 block_len,
7132 const u64 orig_block_len,
7133 const u64 ram_bytes,
7134 const int type)
7135{
7136 struct extent_map *em = NULL;
7137 int ret;
7138
7139 if (type != BTRFS_ORDERED_NOCOW) {
7140 em = create_io_em(inode, start, len, orig_start,
7141 block_start, block_len, orig_block_len,
7142 ram_bytes,
7143 BTRFS_COMPRESS_NONE,
7144 type);
7145 if (IS_ERR(em))
7146 goto out;
7147 }
7148 ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
7149 len, block_len, type);
7150 if (ret) {
7151 if (em) {
7152 free_extent_map(em);
7153 btrfs_drop_extent_cache(BTRFS_I(inode), start,
7154 start + len - 1, 0);
7155 }
7156 em = ERR_PTR(ret);
7157 }
7158 out:
7159
7160 return em;
7161}
7162
7163static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7164 u64 start, u64 len)
7165{
7166 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7167 struct btrfs_root *root = BTRFS_I(inode)->root;
7168 struct extent_map *em;
7169 struct btrfs_key ins;
7170 u64 alloc_hint;
7171 int ret;
7172
7173 alloc_hint = get_extent_allocation_hint(inode, start, len);
7174 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
7175 0, alloc_hint, &ins, 1, 1);
7176 if (ret)
7177 return ERR_PTR(ret);
7178
7179 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7180 ins.objectid, ins.offset, ins.offset,
7181 ins.offset, BTRFS_ORDERED_REGULAR);
7182 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
7183 if (IS_ERR(em))
7184 btrfs_free_reserved_extent(fs_info, ins.objectid,
7185 ins.offset, 1);
7186
7187 return em;
7188}
7189
7190
7191
7192
7193
7194noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7195 u64 *orig_start, u64 *orig_block_len,
7196 u64 *ram_bytes)
7197{
7198 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7199 struct btrfs_path *path;
7200 int ret;
7201 struct extent_buffer *leaf;
7202 struct btrfs_root *root = BTRFS_I(inode)->root;
7203 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7204 struct btrfs_file_extent_item *fi;
7205 struct btrfs_key key;
7206 u64 disk_bytenr;
7207 u64 backref_offset;
7208 u64 extent_end;
7209 u64 num_bytes;
7210 int slot;
7211 int found_type;
7212 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7213
7214 path = btrfs_alloc_path();
7215 if (!path)
7216 return -ENOMEM;
7217
7218 ret = btrfs_lookup_file_extent(NULL, root, path,
7219 btrfs_ino(BTRFS_I(inode)), offset, 0);
7220 if (ret < 0)
7221 goto out;
7222
7223 slot = path->slots[0];
7224 if (ret == 1) {
7225 if (slot == 0) {
7226
7227 ret = 0;
7228 goto out;
7229 }
7230 slot--;
7231 }
7232 ret = 0;
7233 leaf = path->nodes[0];
7234 btrfs_item_key_to_cpu(leaf, &key, slot);
7235 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7236 key.type != BTRFS_EXTENT_DATA_KEY) {
7237
7238 goto out;
7239 }
7240
7241 if (key.offset > offset) {
7242
7243 goto out;
7244 }
7245
7246 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7247 found_type = btrfs_file_extent_type(leaf, fi);
7248 if (found_type != BTRFS_FILE_EXTENT_REG &&
7249 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7250
7251 goto out;
7252 }
7253
7254 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7255 goto out;
7256
7257 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7258 if (extent_end <= offset)
7259 goto out;
7260
7261 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7262 if (disk_bytenr == 0)
7263 goto out;
7264
7265 if (btrfs_file_extent_compression(leaf, fi) ||
7266 btrfs_file_extent_encryption(leaf, fi) ||
7267 btrfs_file_extent_other_encoding(leaf, fi))
7268 goto out;
7269
7270
7271
7272
7273
7274 if (btrfs_file_extent_generation(leaf, fi) <=
7275 btrfs_root_last_snapshot(&root->root_item))
7276 goto out;
7277
7278 backref_offset = btrfs_file_extent_offset(leaf, fi);
7279
7280 if (orig_start) {
7281 *orig_start = key.offset - backref_offset;
7282 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7283 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7284 }
7285
7286 if (btrfs_extent_readonly(fs_info, disk_bytenr))
7287 goto out;
7288
7289 num_bytes = min(offset + *len, extent_end) - offset;
7290 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7291 u64 range_end;
7292
7293 range_end = round_up(offset + num_bytes,
7294 root->fs_info->sectorsize) - 1;
7295 ret = test_range_bit(io_tree, offset, range_end,
7296 EXTENT_DELALLOC, 0, NULL);
7297 if (ret) {
7298 ret = -EAGAIN;
7299 goto out;
7300 }
7301 }
7302
7303 btrfs_release_path(path);
7304
7305
7306
7307
7308
7309
7310 ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
7311 key.offset - backref_offset, disk_bytenr);
7312 if (ret) {
7313 ret = 0;
7314 goto out;
7315 }
7316
7317
7318
7319
7320
7321
7322
7323 disk_bytenr += backref_offset;
7324 disk_bytenr += offset - key.offset;
7325 if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
7326 goto out;
7327
7328
7329
7330
7331 *len = num_bytes;
7332 ret = 1;
7333out:
7334 btrfs_free_path(path);
7335 return ret;
7336}
7337
7338static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7339 struct extent_state **cached_state, int writing)
7340{
7341 struct btrfs_ordered_extent *ordered;
7342 int ret = 0;
7343
7344 while (1) {
7345 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7346 cached_state);
7347
7348
7349
7350
7351
7352 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7353 lockend - lockstart + 1);
7354
7355
7356
7357
7358
7359
7360
7361
7362 if (!ordered &&
7363 (!writing || !filemap_range_has_page(inode->i_mapping,
7364 lockstart, lockend)))
7365 break;
7366
7367 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7368 cached_state);
7369
7370 if (ordered) {
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380
7381
7382
7383
7384
7385
7386 if (writing ||
7387 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7388 btrfs_start_ordered_extent(inode, ordered, 1);
7389 else
7390 ret = -ENOTBLK;
7391 btrfs_put_ordered_extent(ordered);
7392 } else {
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406 ret = -ENOTBLK;
7407 }
7408
7409 if (ret)
7410 break;
7411
7412 cond_resched();
7413 }
7414
7415 return ret;
7416}
7417
7418
7419static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
7420 u64 orig_start, u64 block_start,
7421 u64 block_len, u64 orig_block_len,
7422 u64 ram_bytes, int compress_type,
7423 int type)
7424{
7425 struct extent_map_tree *em_tree;
7426 struct extent_map *em;
7427 struct btrfs_root *root = BTRFS_I(inode)->root;
7428 int ret;
7429
7430 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7431 type == BTRFS_ORDERED_COMPRESSED ||
7432 type == BTRFS_ORDERED_NOCOW ||
7433 type == BTRFS_ORDERED_REGULAR);
7434
7435 em_tree = &BTRFS_I(inode)->extent_tree;
7436 em = alloc_extent_map();
7437 if (!em)
7438 return ERR_PTR(-ENOMEM);
7439
7440 em->start = start;
7441 em->orig_start = orig_start;
7442 em->len = len;
7443 em->block_len = block_len;
7444 em->block_start = block_start;
7445 em->bdev = root->fs_info->fs_devices->latest_bdev;
7446 em->orig_block_len = orig_block_len;
7447 em->ram_bytes = ram_bytes;
7448 em->generation = -1;
7449 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7450 if (type == BTRFS_ORDERED_PREALLOC) {
7451 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7452 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7453 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7454 em->compress_type = compress_type;
7455 }
7456
7457 do {
7458 btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
7459 em->start + em->len - 1, 0);
7460 write_lock(&em_tree->lock);
7461 ret = add_extent_mapping(em_tree, em, 1);
7462 write_unlock(&em_tree->lock);
7463
7464
7465
7466
7467 } while (ret == -EEXIST);
7468
7469 if (ret) {
7470 free_extent_map(em);
7471 return ERR_PTR(ret);
7472 }
7473
7474
7475 return em;
7476}
7477
7478
7479static int btrfs_get_blocks_direct_read(struct extent_map *em,
7480 struct buffer_head *bh_result,
7481 struct inode *inode,
7482 u64 start, u64 len)
7483{
7484 if (em->block_start == EXTENT_MAP_HOLE ||
7485 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7486 return -ENOENT;
7487
7488 len = min(len, em->len - (start - em->start));
7489
7490 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7491 inode->i_blkbits;
7492 bh_result->b_size = len;
7493 bh_result->b_bdev = em->bdev;
7494 set_buffer_mapped(bh_result);
7495
7496 return 0;
7497}
7498
7499static int btrfs_get_blocks_direct_write(struct extent_map **map,
7500 struct buffer_head *bh_result,
7501 struct inode *inode,
7502 struct btrfs_dio_data *dio_data,
7503 u64 start, u64 len)
7504{
7505 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7506 struct extent_map *em = *map;
7507 int ret = 0;
7508
7509
7510
7511
7512
7513
7514
7515
7516
7517
7518 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7519 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7520 em->block_start != EXTENT_MAP_HOLE)) {
7521 int type;
7522 u64 block_start, orig_start, orig_block_len, ram_bytes;
7523
7524 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7525 type = BTRFS_ORDERED_PREALLOC;
7526 else
7527 type = BTRFS_ORDERED_NOCOW;
7528 len = min(len, em->len - (start - em->start));
7529 block_start = em->block_start + (start - em->start);
7530
7531 if (can_nocow_extent(inode, start, &len, &orig_start,
7532 &orig_block_len, &ram_bytes) == 1 &&
7533 btrfs_inc_nocow_writers(fs_info, block_start)) {
7534 struct extent_map *em2;
7535
7536 em2 = btrfs_create_dio_extent(inode, start, len,
7537 orig_start, block_start,
7538 len, orig_block_len,
7539 ram_bytes, type);
7540 btrfs_dec_nocow_writers(fs_info, block_start);
7541 if (type == BTRFS_ORDERED_PREALLOC) {
7542 free_extent_map(em);
7543 *map = em = em2;
7544 }
7545
7546 if (em2 && IS_ERR(em2)) {
7547 ret = PTR_ERR(em2);
7548 goto out;
7549 }
7550
7551
7552
7553
7554
7555 btrfs_free_reserved_data_space_noquota(inode, start,
7556 len);
7557 goto skip_cow;
7558 }
7559 }
7560
7561
7562 len = bh_result->b_size;
7563 free_extent_map(em);
7564 *map = em = btrfs_new_extent_direct(inode, start, len);
7565 if (IS_ERR(em)) {
7566 ret = PTR_ERR(em);
7567 goto out;
7568 }
7569
7570 len = min(len, em->len - (start - em->start));
7571
7572skip_cow:
7573 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7574 inode->i_blkbits;
7575 bh_result->b_size = len;
7576 bh_result->b_bdev = em->bdev;
7577 set_buffer_mapped(bh_result);
7578
7579 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7580 set_buffer_new(bh_result);
7581
7582
7583
7584
7585
7586 if (!dio_data->overwrite && start + len > i_size_read(inode))
7587 i_size_write(inode, start + len);
7588
7589 WARN_ON(dio_data->reserve < len);
7590 dio_data->reserve -= len;
7591 dio_data->unsubmitted_oe_range_end = start + len;
7592 current->journal_info = dio_data;
7593out:
7594 return ret;
7595}
7596
7597static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7598 struct buffer_head *bh_result, int create)
7599{
7600 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7601 struct extent_map *em;
7602 struct extent_state *cached_state = NULL;
7603 struct btrfs_dio_data *dio_data = NULL;
7604 u64 start = iblock << inode->i_blkbits;
7605 u64 lockstart, lockend;
7606 u64 len = bh_result->b_size;
7607 int unlock_bits = EXTENT_LOCKED;
7608 int ret = 0;
7609
7610 if (create)
7611 unlock_bits |= EXTENT_DIRTY;
7612 else
7613 len = min_t(u64, len, fs_info->sectorsize);
7614
7615 lockstart = start;
7616 lockend = start + len - 1;
7617
7618 if (current->journal_info) {
7619
7620
7621
7622
7623
7624 dio_data = current->journal_info;
7625 current->journal_info = NULL;
7626 }
7627
7628
7629
7630
7631
7632 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7633 create)) {
7634 ret = -ENOTBLK;
7635 goto err;
7636 }
7637
7638 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
7639 if (IS_ERR(em)) {
7640 ret = PTR_ERR(em);
7641 goto unlock_err;
7642 }
7643
7644
7645
7646
7647
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7659 em->block_start == EXTENT_MAP_INLINE) {
7660 free_extent_map(em);
7661 ret = -ENOTBLK;
7662 goto unlock_err;
7663 }
7664
7665 if (create) {
7666 ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
7667 dio_data, start, len);
7668 if (ret < 0)
7669 goto unlock_err;
7670
7671
7672 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7673 unlock_bits, 1, 0, &cached_state);
7674 } else {
7675 ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
7676 start, len);
7677
7678 if (ret < 0) {
7679 ret = 0;
7680 free_extent_map(em);
7681 goto unlock_err;
7682 }
7683
7684
7685
7686
7687 lockstart = start + bh_result->b_size;
7688 if (lockstart < lockend) {
7689 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7690 lockend, unlock_bits, 1, 0,
7691 &cached_state);
7692 } else {
7693 free_extent_state(cached_state);
7694 }
7695 }
7696
7697 free_extent_map(em);
7698
7699 return 0;
7700
7701unlock_err:
7702 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7703 unlock_bits, 1, 0, &cached_state);
7704err:
7705 if (dio_data)
7706 current->journal_info = dio_data;
7707 return ret;
7708}
7709
7710static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
7711 struct bio *bio,
7712 int mirror_num)
7713{
7714 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7715 blk_status_t ret;
7716
7717 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
7718
7719 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR);
7720 if (ret)
7721 return ret;
7722
7723 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
7724
7725 return ret;
7726}
7727
7728static int btrfs_check_dio_repairable(struct inode *inode,
7729 struct bio *failed_bio,
7730 struct io_failure_record *failrec,
7731 int failed_mirror)
7732{
7733 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7734 int num_copies;
7735
7736 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
7737 if (num_copies == 1) {
7738
7739
7740
7741
7742
7743 btrfs_debug(fs_info,
7744 "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
7745 num_copies, failrec->this_mirror, failed_mirror);
7746 return 0;
7747 }
7748
7749 failrec->failed_mirror = failed_mirror;
7750 failrec->this_mirror++;
7751 if (failrec->this_mirror == failed_mirror)
7752 failrec->this_mirror++;
7753
7754 if (failrec->this_mirror > num_copies) {
7755 btrfs_debug(fs_info,
7756 "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
7757 num_copies, failrec->this_mirror, failed_mirror);
7758 return 0;
7759 }
7760
7761 return 1;
7762}
7763
7764static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
7765 struct page *page, unsigned int pgoff,
7766 u64 start, u64 end, int failed_mirror,
7767 bio_end_io_t *repair_endio, void *repair_arg)
7768{
7769 struct io_failure_record *failrec;
7770 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7771 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
7772 struct bio *bio;
7773 int isector;
7774 unsigned int read_mode = 0;
7775 int segs;
7776 int ret;
7777 blk_status_t status;
7778 struct bio_vec bvec;
7779
7780 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
7781
7782 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7783 if (ret)
7784 return errno_to_blk_status(ret);
7785
7786 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7787 failed_mirror);
7788 if (!ret) {
7789 free_io_failure(failure_tree, io_tree, failrec);
7790 return BLK_STS_IOERR;
7791 }
7792
7793 segs = bio_segments(failed_bio);
7794 bio_get_first_bvec(failed_bio, &bvec);
7795 if (segs > 1 ||
7796 (bvec.bv_len > btrfs_inode_sectorsize(inode)))
7797 read_mode |= REQ_FAILFAST_DEV;
7798
7799 isector = start - btrfs_io_bio(failed_bio)->logical;
7800 isector >>= inode->i_sb->s_blocksize_bits;
7801 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7802 pgoff, isector, repair_endio, repair_arg);
7803 bio->bi_opf = REQ_OP_READ | read_mode;
7804
7805 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7806 "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
7807 read_mode, failrec->this_mirror, failrec->in_validation);
7808
7809 status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
7810 if (status) {
7811 free_io_failure(failure_tree, io_tree, failrec);
7812 bio_put(bio);
7813 }
7814
7815 return status;
7816}
7817
7818struct btrfs_retry_complete {
7819 struct completion done;
7820 struct inode *inode;
7821 u64 start;
7822 int uptodate;
7823};
7824
7825static void btrfs_retry_endio_nocsum(struct bio *bio)
7826{
7827 struct btrfs_retry_complete *done = bio->bi_private;
7828 struct inode *inode = done->inode;
7829 struct bio_vec *bvec;
7830 struct extent_io_tree *io_tree, *failure_tree;
7831 int i;
7832 struct bvec_iter_all iter_all;
7833
7834 if (bio->bi_status)
7835 goto end;
7836
7837 ASSERT(bio->bi_vcnt == 1);
7838 io_tree = &BTRFS_I(inode)->io_tree;
7839 failure_tree = &BTRFS_I(inode)->io_failure_tree;
7840 ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode));
7841
7842 done->uptodate = 1;
7843 ASSERT(!bio_flagged(bio, BIO_CLONED));
7844 bio_for_each_segment_all(bvec, bio, i, iter_all)
7845 clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
7846 io_tree, done->start, bvec->bv_page,
7847 btrfs_ino(BTRFS_I(inode)), 0);
7848end:
7849 complete(&done->done);
7850 bio_put(bio);
7851}
7852
7853static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
7854 struct btrfs_io_bio *io_bio)
7855{
7856 struct btrfs_fs_info *fs_info;
7857 struct bio_vec bvec;
7858 struct bvec_iter iter;
7859 struct btrfs_retry_complete done;
7860 u64 start;
7861 unsigned int pgoff;
7862 u32 sectorsize;
7863 int nr_sectors;
7864 blk_status_t ret;
7865 blk_status_t err = BLK_STS_OK;
7866
7867 fs_info = BTRFS_I(inode)->root->fs_info;
7868 sectorsize = fs_info->sectorsize;
7869
7870 start = io_bio->logical;
7871 done.inode = inode;
7872 io_bio->bio.bi_iter = io_bio->iter;
7873
7874 bio_for_each_segment(bvec, &io_bio->bio, iter) {
7875 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
7876 pgoff = bvec.bv_offset;
7877
7878next_block_or_try_again:
7879 done.uptodate = 0;
7880 done.start = start;
7881 init_completion(&done.done);
7882
7883 ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
7884 pgoff, start, start + sectorsize - 1,
7885 io_bio->mirror_num,
7886 btrfs_retry_endio_nocsum, &done);
7887 if (ret) {
7888 err = ret;
7889 goto next;
7890 }
7891
7892 wait_for_completion_io(&done.done);
7893
7894 if (!done.uptodate) {
7895
7896 goto next_block_or_try_again;
7897 }
7898
7899next:
7900 start += sectorsize;
7901
7902 nr_sectors--;
7903 if (nr_sectors) {
7904 pgoff += sectorsize;
7905 ASSERT(pgoff < PAGE_SIZE);
7906 goto next_block_or_try_again;
7907 }
7908 }
7909
7910 return err;
7911}
7912
7913static void btrfs_retry_endio(struct bio *bio)
7914{
7915 struct btrfs_retry_complete *done = bio->bi_private;
7916 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7917 struct extent_io_tree *io_tree, *failure_tree;
7918 struct inode *inode = done->inode;
7919 struct bio_vec *bvec;
7920 int uptodate;
7921 int ret;
7922 int i;
7923 struct bvec_iter_all iter_all;
7924
7925 if (bio->bi_status)
7926 goto end;
7927
7928 uptodate = 1;
7929
7930 ASSERT(bio->bi_vcnt == 1);
7931 ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode));
7932
7933 io_tree = &BTRFS_I(inode)->io_tree;
7934 failure_tree = &BTRFS_I(inode)->io_failure_tree;
7935
7936 ASSERT(!bio_flagged(bio, BIO_CLONED));
7937 bio_for_each_segment_all(bvec, bio, i, iter_all) {
7938 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
7939 bvec->bv_offset, done->start,
7940 bvec->bv_len);
7941 if (!ret)
7942 clean_io_failure(BTRFS_I(inode)->root->fs_info,
7943 failure_tree, io_tree, done->start,
7944 bvec->bv_page,
7945 btrfs_ino(BTRFS_I(inode)),
7946 bvec->bv_offset);
7947 else
7948 uptodate = 0;
7949 }
7950
7951 done->uptodate = uptodate;
7952end:
7953 complete(&done->done);
7954 bio_put(bio);
7955}
7956
7957static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
7958 struct btrfs_io_bio *io_bio, blk_status_t err)
7959{
7960 struct btrfs_fs_info *fs_info;
7961 struct bio_vec bvec;
7962 struct bvec_iter iter;
7963 struct btrfs_retry_complete done;
7964 u64 start;
7965 u64 offset = 0;
7966 u32 sectorsize;
7967 int nr_sectors;
7968 unsigned int pgoff;
7969 int csum_pos;
7970 bool uptodate = (err == 0);
7971 int ret;
7972 blk_status_t status;
7973
7974 fs_info = BTRFS_I(inode)->root->fs_info;
7975 sectorsize = fs_info->sectorsize;
7976
7977 err = BLK_STS_OK;
7978 start = io_bio->logical;
7979 done.inode = inode;
7980 io_bio->bio.bi_iter = io_bio->iter;
7981
7982 bio_for_each_segment(bvec, &io_bio->bio, iter) {
7983 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
7984
7985 pgoff = bvec.bv_offset;
7986next_block:
7987 if (uptodate) {
7988 csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
7989 ret = __readpage_endio_check(inode, io_bio, csum_pos,
7990 bvec.bv_page, pgoff, start, sectorsize);
7991 if (likely(!ret))
7992 goto next;
7993 }
7994try_again:
7995 done.uptodate = 0;
7996 done.start = start;
7997 init_completion(&done.done);
7998
7999 status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
8000 pgoff, start, start + sectorsize - 1,
8001 io_bio->mirror_num, btrfs_retry_endio,
8002 &done);
8003 if (status) {
8004 err = status;
8005 goto next;
8006 }
8007
8008 wait_for_completion_io(&done.done);
8009
8010 if (!done.uptodate) {
8011
8012 goto try_again;
8013 }
8014next:
8015 offset += sectorsize;
8016 start += sectorsize;
8017
8018 ASSERT(nr_sectors);
8019
8020 nr_sectors--;
8021 if (nr_sectors) {
8022 pgoff += sectorsize;
8023 ASSERT(pgoff < PAGE_SIZE);
8024 goto next_block;
8025 }
8026 }
8027
8028 return err;
8029}
8030
8031static blk_status_t btrfs_subio_endio_read(struct inode *inode,
8032 struct btrfs_io_bio *io_bio, blk_status_t err)
8033{
8034 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8035
8036 if (skip_csum) {
8037 if (unlikely(err))
8038 return __btrfs_correct_data_nocsum(inode, io_bio);
8039 else
8040 return BLK_STS_OK;
8041 } else {
8042 return __btrfs_subio_endio_read(inode, io_bio, err);
8043 }
8044}
8045
8046static void btrfs_endio_direct_read(struct bio *bio)
8047{
8048 struct btrfs_dio_private *dip = bio->bi_private;
8049 struct inode *inode = dip->inode;
8050 struct bio *dio_bio;
8051 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8052 blk_status_t err = bio->bi_status;
8053
8054 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8055 err = btrfs_subio_endio_read(inode, io_bio, err);
8056
8057 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8058 dip->logical_offset + dip->bytes - 1);
8059 dio_bio = dip->dio_bio;
8060
8061 kfree(dip);
8062
8063 dio_bio->bi_status = err;
8064 dio_end_io(dio_bio);
8065 btrfs_io_bio_free_csum(io_bio);
8066 bio_put(bio);
8067}
8068
8069static void __endio_write_update_ordered(struct inode *inode,
8070 const u64 offset, const u64 bytes,
8071 const bool uptodate)
8072{
8073 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8074 struct btrfs_ordered_extent *ordered = NULL;
8075 struct btrfs_workqueue *wq;
8076 btrfs_work_func_t func;
8077 u64 ordered_offset = offset;
8078 u64 ordered_bytes = bytes;
8079 u64 last_offset;
8080
8081 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
8082 wq = fs_info->endio_freespace_worker;
8083 func = btrfs_freespace_write_helper;
8084 } else {
8085 wq = fs_info->endio_write_workers;
8086 func = btrfs_endio_write_helper;
8087 }
8088
8089 while (ordered_offset < offset + bytes) {
8090 last_offset = ordered_offset;
8091 if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
8092 &ordered_offset,
8093 ordered_bytes,
8094 uptodate)) {
8095 btrfs_init_work(&ordered->work, func,
8096 finish_ordered_fn,
8097 NULL, NULL);
8098 btrfs_queue_work(wq, &ordered->work);
8099 }
8100
8101
8102
8103
8104 if (ordered_offset == last_offset)
8105 return;
8106
8107
8108
8109
8110 if (ordered_offset < offset + bytes) {
8111 ordered_bytes = offset + bytes - ordered_offset;
8112 ordered = NULL;
8113 }
8114 }
8115}
8116
8117static void btrfs_endio_direct_write(struct bio *bio)
8118{
8119 struct btrfs_dio_private *dip = bio->bi_private;
8120 struct bio *dio_bio = dip->dio_bio;
8121
8122 __endio_write_update_ordered(dip->inode, dip->logical_offset,
8123 dip->bytes, !bio->bi_status);
8124
8125 kfree(dip);
8126
8127 dio_bio->bi_status = bio->bi_status;
8128 dio_end_io(dio_bio);
8129 bio_put(bio);
8130}
8131
8132static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
8133 struct bio *bio, u64 offset)
8134{
8135 struct inode *inode = private_data;
8136 blk_status_t ret;
8137 ret = btrfs_csum_one_bio(inode, bio, offset, 1);
8138 BUG_ON(ret);
8139 return 0;
8140}
8141
8142static void btrfs_end_dio_bio(struct bio *bio)
8143{
8144 struct btrfs_dio_private *dip = bio->bi_private;
8145 blk_status_t err = bio->bi_status;
8146
8147 if (err)
8148 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8149 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
8150 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
8151 bio->bi_opf,
8152 (unsigned long long)bio->bi_iter.bi_sector,
8153 bio->bi_iter.bi_size, err);
8154
8155 if (dip->subio_endio)
8156 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
8157
8158 if (err) {
8159
8160
8161
8162
8163
8164
8165 dip->errors = 1;
8166 }
8167
8168
8169 if (!atomic_dec_and_test(&dip->pending_bios))
8170 goto out;
8171
8172 if (dip->errors) {
8173 bio_io_error(dip->orig_bio);
8174 } else {
8175 dip->dio_bio->bi_status = BLK_STS_OK;
8176 bio_endio(dip->orig_bio);
8177 }
8178out:
8179 bio_put(bio);
8180}
8181
8182static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
8183 struct btrfs_dio_private *dip,
8184 struct bio *bio,
8185 u64 file_offset)
8186{
8187 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8188 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8189 blk_status_t ret;
8190
8191
8192
8193
8194
8195
8196 if (dip->logical_offset == file_offset) {
8197 ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio,
8198 file_offset);
8199 if (ret)
8200 return ret;
8201 }
8202
8203 if (bio == dip->orig_bio)
8204 return 0;
8205
8206 file_offset -= dip->logical_offset;
8207 file_offset >>= inode->i_sb->s_blocksize_bits;
8208 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
8209
8210 return 0;
8211}
8212
8213static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
8214 struct inode *inode, u64 file_offset, int async_submit)
8215{
8216 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8217 struct btrfs_dio_private *dip = bio->bi_private;
8218 bool write = bio_op(bio) == REQ_OP_WRITE;
8219 blk_status_t ret;
8220
8221
8222 if (async_submit)
8223 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
8224
8225 if (!write) {
8226 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
8227 if (ret)
8228 goto err;
8229 }
8230
8231 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
8232 goto map;
8233
8234 if (write && async_submit) {
8235 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
8236 file_offset, inode,
8237 btrfs_submit_bio_start_direct_io);
8238 goto err;
8239 } else if (write) {
8240
8241
8242
8243
8244 ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
8245 if (ret)
8246 goto err;
8247 } else {
8248 ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio,
8249 file_offset);
8250 if (ret)
8251 goto err;
8252 }
8253map:
8254 ret = btrfs_map_bio(fs_info, bio, 0, 0);
8255err:
8256 return ret;
8257}
8258
8259static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
8260{
8261 struct inode *inode = dip->inode;
8262 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8263 struct bio *bio;
8264 struct bio *orig_bio = dip->orig_bio;
8265 u64 start_sector = orig_bio->bi_iter.bi_sector;
8266 u64 file_offset = dip->logical_offset;
8267 u64 map_length;
8268 int async_submit = 0;
8269 u64 submit_len;
8270 int clone_offset = 0;
8271 int clone_len;
8272 int ret;
8273 blk_status_t status;
8274
8275 map_length = orig_bio->bi_iter.bi_size;
8276 submit_len = map_length;
8277 ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
8278 &map_length, NULL, 0);
8279 if (ret)
8280 return -EIO;
8281
8282 if (map_length >= submit_len) {
8283 bio = orig_bio;
8284 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
8285 goto submit;
8286 }
8287
8288
8289 if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK)
8290 async_submit = 0;
8291 else
8292 async_submit = 1;
8293
8294
8295 ASSERT(map_length <= INT_MAX);
8296 atomic_inc(&dip->pending_bios);
8297 do {
8298 clone_len = min_t(int, submit_len, map_length);
8299
8300
8301
8302
8303
8304 bio = btrfs_bio_clone_partial(orig_bio, clone_offset,
8305 clone_len);
8306 bio->bi_private = dip;
8307 bio->bi_end_io = btrfs_end_dio_bio;
8308 btrfs_io_bio(bio)->logical = file_offset;
8309
8310 ASSERT(submit_len >= clone_len);
8311 submit_len -= clone_len;
8312 if (submit_len == 0)
8313 break;
8314
8315
8316
8317
8318
8319
8320
8321 atomic_inc(&dip->pending_bios);
8322
8323 status = btrfs_submit_dio_bio(bio, inode, file_offset,
8324 async_submit);
8325 if (status) {
8326 bio_put(bio);
8327 atomic_dec(&dip->pending_bios);
8328 goto out_err;
8329 }
8330
8331 clone_offset += clone_len;
8332 start_sector += clone_len >> 9;
8333 file_offset += clone_len;
8334
8335 map_length = submit_len;
8336 ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
8337 start_sector << 9, &map_length, NULL, 0);
8338 if (ret)
8339 goto out_err;
8340 } while (submit_len > 0);
8341
8342submit:
8343 status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
8344 if (!status)
8345 return 0;
8346
8347 bio_put(bio);
8348out_err:
8349 dip->errors = 1;
8350
8351
8352
8353
8354
8355
8356 if (atomic_dec_and_test(&dip->pending_bios))
8357 bio_io_error(dip->orig_bio);
8358
8359
8360 return 0;
8361}
8362
8363static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
8364 loff_t file_offset)
8365{
8366 struct btrfs_dio_private *dip = NULL;
8367 struct bio *bio = NULL;
8368 struct btrfs_io_bio *io_bio;
8369 bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
8370 int ret = 0;
8371
8372 bio = btrfs_bio_clone(dio_bio);
8373
8374 dip = kzalloc(sizeof(*dip), GFP_NOFS);
8375 if (!dip) {
8376 ret = -ENOMEM;
8377 goto free_ordered;
8378 }
8379
8380 dip->private = dio_bio->bi_private;
8381 dip->inode = inode;
8382 dip->logical_offset = file_offset;
8383 dip->bytes = dio_bio->bi_iter.bi_size;
8384 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
8385 bio->bi_private = dip;
8386 dip->orig_bio = bio;
8387 dip->dio_bio = dio_bio;
8388 atomic_set(&dip->pending_bios, 0);
8389 io_bio = btrfs_io_bio(bio);
8390 io_bio->logical = file_offset;
8391
8392 if (write) {
8393 bio->bi_end_io = btrfs_endio_direct_write;
8394 } else {
8395 bio->bi_end_io = btrfs_endio_direct_read;
8396 dip->subio_endio = btrfs_subio_endio_read;
8397 }
8398
8399
8400
8401
8402
8403
8404
8405 if (write) {
8406 struct btrfs_dio_data *dio_data = current->journal_info;
8407
8408 dio_data->unsubmitted_oe_range_end = dip->logical_offset +
8409 dip->bytes;
8410 dio_data->unsubmitted_oe_range_start =
8411 dio_data->unsubmitted_oe_range_end;
8412 }
8413
8414 ret = btrfs_submit_direct_hook(dip);
8415 if (!ret)
8416 return;
8417
8418 btrfs_io_bio_free_csum(io_bio);
8419
8420free_ordered:
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430 if (bio && dip) {
8431 bio_io_error(bio);
8432
8433
8434
8435
8436
8437 dip = NULL;
8438 bio = NULL;
8439 } else {
8440 if (write)
8441 __endio_write_update_ordered(inode,
8442 file_offset,
8443 dio_bio->bi_iter.bi_size,
8444 false);
8445 else
8446 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8447 file_offset + dio_bio->bi_iter.bi_size - 1);
8448
8449 dio_bio->bi_status = BLK_STS_IOERR;
8450
8451
8452
8453
8454 dio_end_io(dio_bio);
8455 }
8456 if (bio)
8457 bio_put(bio);
8458 kfree(dip);
8459}
8460
8461static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
8462 const struct iov_iter *iter, loff_t offset)
8463{
8464 int seg;
8465 int i;
8466 unsigned int blocksize_mask = fs_info->sectorsize - 1;
8467 ssize_t retval = -EINVAL;
8468
8469 if (offset & blocksize_mask)
8470 goto out;
8471
8472 if (iov_iter_alignment(iter) & blocksize_mask)
8473 goto out;
8474
8475
8476 if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
8477 return 0;
8478
8479
8480
8481
8482
8483 for (seg = 0; seg < iter->nr_segs; seg++) {
8484 for (i = seg + 1; i < iter->nr_segs; i++) {
8485 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
8486 goto out;
8487 }
8488 }
8489 retval = 0;
8490out:
8491 return retval;
8492}
8493
8494static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8495{
8496 struct file *file = iocb->ki_filp;
8497 struct inode *inode = file->f_mapping->host;
8498 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8499 struct btrfs_dio_data dio_data = { 0 };
8500 struct extent_changeset *data_reserved = NULL;
8501 loff_t offset = iocb->ki_pos;
8502 size_t count = 0;
8503 int flags = 0;
8504 bool wakeup = true;
8505 bool relock = false;
8506 ssize_t ret;
8507
8508 if (check_direct_IO(fs_info, iter, offset))
8509 return 0;
8510
8511 inode_dio_begin(inode);
8512
8513
8514
8515
8516
8517
8518
8519 count = iov_iter_count(iter);
8520 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8521 &BTRFS_I(inode)->runtime_flags))
8522 filemap_fdatawrite_range(inode->i_mapping, offset,
8523 offset + count - 1);
8524
8525 if (iov_iter_rw(iter) == WRITE) {
8526
8527
8528
8529
8530
8531 if (offset + count <= inode->i_size) {
8532 dio_data.overwrite = 1;
8533 inode_unlock(inode);
8534 relock = true;
8535 } else if (iocb->ki_flags & IOCB_NOWAIT) {
8536 ret = -EAGAIN;
8537 goto out;
8538 }
8539 ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
8540 offset, count);
8541 if (ret)
8542 goto out;
8543
8544
8545
8546
8547
8548
8549 dio_data.reserve = round_up(count,
8550 fs_info->sectorsize);
8551 dio_data.unsubmitted_oe_range_start = (u64)offset;
8552 dio_data.unsubmitted_oe_range_end = (u64)offset;
8553 current->journal_info = &dio_data;
8554 down_read(&BTRFS_I(inode)->dio_sem);
8555 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8556 &BTRFS_I(inode)->runtime_flags)) {
8557 inode_dio_end(inode);
8558 flags = DIO_LOCKING | DIO_SKIP_HOLES;
8559 wakeup = false;
8560 }
8561
8562 ret = __blockdev_direct_IO(iocb, inode,
8563 fs_info->fs_devices->latest_bdev,
8564 iter, btrfs_get_blocks_direct, NULL,
8565 btrfs_submit_direct, flags);
8566 if (iov_iter_rw(iter) == WRITE) {
8567 up_read(&BTRFS_I(inode)->dio_sem);
8568 current->journal_info = NULL;
8569 if (ret < 0 && ret != -EIOCBQUEUED) {
8570 if (dio_data.reserve)
8571 btrfs_delalloc_release_space(inode, data_reserved,
8572 offset, dio_data.reserve, true);
8573
8574
8575
8576
8577
8578
8579 if (dio_data.unsubmitted_oe_range_start <
8580 dio_data.unsubmitted_oe_range_end)
8581 __endio_write_update_ordered(inode,
8582 dio_data.unsubmitted_oe_range_start,
8583 dio_data.unsubmitted_oe_range_end -
8584 dio_data.unsubmitted_oe_range_start,
8585 false);
8586 } else if (ret >= 0 && (size_t)ret < count)
8587 btrfs_delalloc_release_space(inode, data_reserved,
8588 offset, count - (size_t)ret, true);
8589 btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
8590 }
8591out:
8592 if (wakeup)
8593 inode_dio_end(inode);
8594 if (relock)
8595 inode_lock(inode);
8596
8597 extent_changeset_free(data_reserved);
8598 return ret;
8599}
8600
8601#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
8602
8603static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8604 __u64 start, __u64 len)
8605{
8606 int ret;
8607
8608 ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
8609 if (ret)
8610 return ret;
8611
8612 return extent_fiemap(inode, fieinfo, start, len);
8613}
8614
8615int btrfs_readpage(struct file *file, struct page *page)
8616{
8617 struct extent_io_tree *tree;
8618 tree = &BTRFS_I(page->mapping->host)->io_tree;
8619 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
8620}
8621
8622static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8623{
8624 struct inode *inode = page->mapping->host;
8625 int ret;
8626
8627 if (current->flags & PF_MEMALLOC) {
8628 redirty_page_for_writepage(wbc, page);
8629 unlock_page(page);
8630 return 0;
8631 }
8632
8633
8634
8635
8636
8637
8638 if (!igrab(inode)) {
8639 redirty_page_for_writepage(wbc, page);
8640 return AOP_WRITEPAGE_ACTIVATE;
8641 }
8642 ret = extent_write_full_page(page, wbc);
8643 btrfs_add_delayed_iput(inode);
8644 return ret;
8645}
8646
8647static int btrfs_writepages(struct address_space *mapping,
8648 struct writeback_control *wbc)
8649{
8650 return extent_writepages(mapping, wbc);
8651}
8652
8653static int
8654btrfs_readpages(struct file *file, struct address_space *mapping,
8655 struct list_head *pages, unsigned nr_pages)
8656{
8657 return extent_readpages(mapping, pages, nr_pages);
8658}
8659
8660static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8661{
8662 int ret = try_release_extent_mapping(page, gfp_flags);
8663 if (ret == 1) {
8664 ClearPagePrivate(page);
8665 set_page_private(page, 0);
8666 put_page(page);
8667 }
8668 return ret;
8669}
8670
8671static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8672{
8673 if (PageWriteback(page) || PageDirty(page))
8674 return 0;
8675 return __btrfs_releasepage(page, gfp_flags);
8676}
8677
8678static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8679 unsigned int length)
8680{
8681 struct inode *inode = page->mapping->host;
8682 struct extent_io_tree *tree;
8683 struct btrfs_ordered_extent *ordered;
8684 struct extent_state *cached_state = NULL;
8685 u64 page_start = page_offset(page);
8686 u64 page_end = page_start + PAGE_SIZE - 1;
8687 u64 start;
8688 u64 end;
8689 int inode_evicting = inode->i_state & I_FREEING;
8690
8691
8692
8693
8694
8695
8696
8697
8698 wait_on_page_writeback(page);
8699
8700 tree = &BTRFS_I(inode)->io_tree;
8701 if (offset) {
8702 btrfs_releasepage(page, GFP_NOFS);
8703 return;
8704 }
8705
8706 if (!inode_evicting)
8707 lock_extent_bits(tree, page_start, page_end, &cached_state);
8708again:
8709 start = page_start;
8710 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
8711 page_end - start + 1);
8712 if (ordered) {
8713 end = min(page_end, ordered->file_offset + ordered->len - 1);
8714
8715
8716
8717
8718 if (!inode_evicting)
8719 clear_extent_bit(tree, start, end,
8720 EXTENT_DIRTY | EXTENT_DELALLOC |
8721 EXTENT_DELALLOC_NEW |
8722 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8723 EXTENT_DEFRAG, 1, 0, &cached_state);
8724
8725
8726
8727
8728 if (TestClearPagePrivate2(page)) {
8729 struct btrfs_ordered_inode_tree *tree;
8730 u64 new_len;
8731
8732 tree = &BTRFS_I(inode)->ordered_tree;
8733
8734 spin_lock_irq(&tree->lock);
8735 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8736 new_len = start - ordered->file_offset;
8737 if (new_len < ordered->truncated_len)
8738 ordered->truncated_len = new_len;
8739 spin_unlock_irq(&tree->lock);
8740
8741 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8742 start,
8743 end - start + 1, 1))
8744 btrfs_finish_ordered_io(ordered);
8745 }
8746 btrfs_put_ordered_extent(ordered);
8747 if (!inode_evicting) {
8748 cached_state = NULL;
8749 lock_extent_bits(tree, start, end,
8750 &cached_state);
8751 }
8752
8753 start = end + 1;
8754 if (start < page_end)
8755 goto again;
8756 }
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773 if (PageDirty(page))
8774 btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
8775 if (!inode_evicting) {
8776 clear_extent_bit(tree, page_start, page_end,
8777 EXTENT_LOCKED | EXTENT_DIRTY |
8778 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
8779 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
8780 &cached_state);
8781
8782 __btrfs_releasepage(page, GFP_NOFS);
8783 }
8784
8785 ClearPageChecked(page);
8786 if (PagePrivate(page)) {
8787 ClearPagePrivate(page);
8788 set_page_private(page, 0);
8789 put_page(page);
8790 }
8791}
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8809{
8810 struct page *page = vmf->page;
8811 struct inode *inode = file_inode(vmf->vma->vm_file);
8812 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8813 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8814 struct btrfs_ordered_extent *ordered;
8815 struct extent_state *cached_state = NULL;
8816 struct extent_changeset *data_reserved = NULL;
8817 char *kaddr;
8818 unsigned long zero_start;
8819 loff_t size;
8820 vm_fault_t ret;
8821 int ret2;
8822 int reserved = 0;
8823 u64 reserved_space;
8824 u64 page_start;
8825 u64 page_end;
8826 u64 end;
8827
8828 reserved_space = PAGE_SIZE;
8829
8830 sb_start_pagefault(inode->i_sb);
8831 page_start = page_offset(page);
8832 page_end = page_start + PAGE_SIZE - 1;
8833 end = page_end;
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843 ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
8844 reserved_space);
8845 if (!ret2) {
8846 ret2 = file_update_time(vmf->vma->vm_file);
8847 reserved = 1;
8848 }
8849 if (ret2) {
8850 ret = vmf_error(ret2);
8851 if (reserved)
8852 goto out;
8853 goto out_noreserve;
8854 }
8855
8856 ret = VM_FAULT_NOPAGE;
8857again:
8858 lock_page(page);
8859 size = i_size_read(inode);
8860
8861 if ((page->mapping != inode->i_mapping) ||
8862 (page_start >= size)) {
8863
8864 goto out_unlock;
8865 }
8866 wait_on_page_writeback(page);
8867
8868 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8869 set_page_extent_mapped(page);
8870
8871
8872
8873
8874
8875 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8876 PAGE_SIZE);
8877 if (ordered) {
8878 unlock_extent_cached(io_tree, page_start, page_end,
8879 &cached_state);
8880 unlock_page(page);
8881 btrfs_start_ordered_extent(inode, ordered, 1);
8882 btrfs_put_ordered_extent(ordered);
8883 goto again;
8884 }
8885
8886 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8887 reserved_space = round_up(size - page_start,
8888 fs_info->sectorsize);
8889 if (reserved_space < PAGE_SIZE) {
8890 end = page_start + reserved_space - 1;
8891 btrfs_delalloc_release_space(inode, data_reserved,
8892 page_start, PAGE_SIZE - reserved_space,
8893 true);
8894 }
8895 }
8896
8897
8898
8899
8900
8901
8902
8903
8904 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8905 EXTENT_DIRTY | EXTENT_DELALLOC |
8906 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
8907 0, 0, &cached_state);
8908
8909 ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
8910 &cached_state, 0);
8911 if (ret2) {
8912 unlock_extent_cached(io_tree, page_start, page_end,
8913 &cached_state);
8914 ret = VM_FAULT_SIGBUS;
8915 goto out_unlock;
8916 }
8917 ret2 = 0;
8918
8919
8920 if (page_start + PAGE_SIZE > size)
8921 zero_start = offset_in_page(size);
8922 else
8923 zero_start = PAGE_SIZE;
8924
8925 if (zero_start != PAGE_SIZE) {
8926 kaddr = kmap(page);
8927 memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
8928 flush_dcache_page(page);
8929 kunmap(page);
8930 }
8931 ClearPageChecked(page);
8932 set_page_dirty(page);
8933 SetPageUptodate(page);
8934
8935 BTRFS_I(inode)->last_trans = fs_info->generation;
8936 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
8937 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
8938
8939 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8940
8941 if (!ret2) {
8942 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
8943 sb_end_pagefault(inode->i_sb);
8944 extent_changeset_free(data_reserved);
8945 return VM_FAULT_LOCKED;
8946 }
8947
8948out_unlock:
8949 unlock_page(page);
8950out:
8951 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
8952 btrfs_delalloc_release_space(inode, data_reserved, page_start,
8953 reserved_space, (ret != 0));
8954out_noreserve:
8955 sb_end_pagefault(inode->i_sb);
8956 extent_changeset_free(data_reserved);
8957 return ret;
8958}
8959
8960static int btrfs_truncate(struct inode *inode, bool skip_writeback)
8961{
8962 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8963 struct btrfs_root *root = BTRFS_I(inode)->root;
8964 struct btrfs_block_rsv *rsv;
8965 int ret;
8966 struct btrfs_trans_handle *trans;
8967 u64 mask = fs_info->sectorsize - 1;
8968 u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
8969
8970 if (!skip_writeback) {
8971 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
8972 (u64)-1);
8973 if (ret)
8974 return ret;
8975 }
8976
8977
8978
8979
8980
8981
8982
8983
8984
8985
8986
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996
8997
8998
8999
9000
9001
9002
9003
9004
9005 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
9006 if (!rsv)
9007 return -ENOMEM;
9008 rsv->size = min_size;
9009 rsv->failfast = 1;
9010
9011
9012
9013
9014
9015 trans = btrfs_start_transaction(root, 2);
9016 if (IS_ERR(trans)) {
9017 ret = PTR_ERR(trans);
9018 goto out;
9019 }
9020
9021
9022 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
9023 min_size, false);
9024 BUG_ON(ret);
9025
9026
9027
9028
9029
9030
9031
9032
9033 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
9034 trans->block_rsv = rsv;
9035
9036 while (1) {
9037 ret = btrfs_truncate_inode_items(trans, root, inode,
9038 inode->i_size,
9039 BTRFS_EXTENT_DATA_KEY);
9040 trans->block_rsv = &fs_info->trans_block_rsv;
9041 if (ret != -ENOSPC && ret != -EAGAIN)
9042 break;
9043
9044 ret = btrfs_update_inode(trans, root, inode);
9045 if (ret)
9046 break;
9047
9048 btrfs_end_transaction(trans);
9049 btrfs_btree_balance_dirty(fs_info);
9050
9051 trans = btrfs_start_transaction(root, 2);
9052 if (IS_ERR(trans)) {
9053 ret = PTR_ERR(trans);
9054 trans = NULL;
9055 break;
9056 }
9057
9058 btrfs_block_rsv_release(fs_info, rsv, -1);
9059 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
9060 rsv, min_size, false);
9061 BUG_ON(ret);
9062 trans->block_rsv = rsv;
9063 }
9064
9065
9066
9067
9068
9069
9070
9071 if (ret == NEED_TRUNCATE_BLOCK) {
9072 btrfs_end_transaction(trans);
9073 btrfs_btree_balance_dirty(fs_info);
9074
9075 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
9076 if (ret)
9077 goto out;
9078 trans = btrfs_start_transaction(root, 1);
9079 if (IS_ERR(trans)) {
9080 ret = PTR_ERR(trans);
9081 goto out;
9082 }
9083 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
9084 }
9085
9086 if (trans) {
9087 int ret2;
9088
9089 trans->block_rsv = &fs_info->trans_block_rsv;
9090 ret2 = btrfs_update_inode(trans, root, inode);
9091 if (ret2 && !ret)
9092 ret = ret2;
9093
9094 ret2 = btrfs_end_transaction(trans);
9095 if (ret2 && !ret)
9096 ret = ret2;
9097 btrfs_btree_balance_dirty(fs_info);
9098 }
9099out:
9100 btrfs_free_block_rsv(fs_info, rsv);
9101
9102 return ret;
9103}
9104
9105
9106
9107
9108int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
9109 struct btrfs_root *new_root,
9110 struct btrfs_root *parent_root,
9111 u64 new_dirid)
9112{
9113 struct inode *inode;
9114 int err;
9115 u64 index = 0;
9116
9117 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
9118 new_dirid, new_dirid,
9119 S_IFDIR | (~current_umask() & S_IRWXUGO),
9120 &index);
9121 if (IS_ERR(inode))
9122 return PTR_ERR(inode);
9123 inode->i_op = &btrfs_dir_inode_operations;
9124 inode->i_fop = &btrfs_dir_file_operations;
9125
9126 set_nlink(inode, 1);
9127 btrfs_i_size_write(BTRFS_I(inode), 0);
9128 unlock_new_inode(inode);
9129
9130 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
9131 if (err)
9132 btrfs_err(new_root->fs_info,
9133 "error inheriting subvolume %llu properties: %d",
9134 new_root->root_key.objectid, err);
9135
9136 err = btrfs_update_inode(trans, new_root, inode);
9137
9138 iput(inode);
9139 return err;
9140}
9141
9142struct inode *btrfs_alloc_inode(struct super_block *sb)
9143{
9144 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
9145 struct btrfs_inode *ei;
9146 struct inode *inode;
9147
9148 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
9149 if (!ei)
9150 return NULL;
9151
9152 ei->root = NULL;
9153 ei->generation = 0;
9154 ei->last_trans = 0;
9155 ei->last_sub_trans = 0;
9156 ei->logged_trans = 0;
9157 ei->delalloc_bytes = 0;
9158 ei->new_delalloc_bytes = 0;
9159 ei->defrag_bytes = 0;
9160 ei->disk_i_size = 0;
9161 ei->flags = 0;
9162 ei->csum_bytes = 0;
9163 ei->index_cnt = (u64)-1;
9164 ei->dir_index = 0;
9165 ei->last_unlink_trans = 0;
9166 ei->last_link_trans = 0;
9167 ei->last_log_commit = 0;
9168
9169 spin_lock_init(&ei->lock);
9170 ei->outstanding_extents = 0;
9171 if (sb->s_magic != BTRFS_TEST_MAGIC)
9172 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
9173 BTRFS_BLOCK_RSV_DELALLOC);
9174 ei->runtime_flags = 0;
9175 ei->prop_compress = BTRFS_COMPRESS_NONE;
9176 ei->defrag_compress = BTRFS_COMPRESS_NONE;
9177
9178 ei->delayed_node = NULL;
9179
9180 ei->i_otime.tv_sec = 0;
9181 ei->i_otime.tv_nsec = 0;
9182
9183 inode = &ei->vfs_inode;
9184 extent_map_tree_init(&ei->extent_tree);
9185 extent_io_tree_init(&ei->io_tree, inode);
9186 extent_io_tree_init(&ei->io_failure_tree, inode);
9187 ei->io_tree.track_uptodate = 1;
9188 ei->io_failure_tree.track_uptodate = 1;
9189 atomic_set(&ei->sync_writers, 0);
9190 mutex_init(&ei->log_mutex);
9191 mutex_init(&ei->delalloc_mutex);
9192 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
9193 INIT_LIST_HEAD(&ei->delalloc_inodes);
9194 INIT_LIST_HEAD(&ei->delayed_iput);
9195 RB_CLEAR_NODE(&ei->rb_node);
9196 init_rwsem(&ei->dio_sem);
9197
9198 return inode;
9199}
9200
9201#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
9202void btrfs_test_destroy_inode(struct inode *inode)
9203{
9204 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9205 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9206}
9207#endif
9208
9209static void btrfs_i_callback(struct rcu_head *head)
9210{
9211 struct inode *inode = container_of(head, struct inode, i_rcu);
9212 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9213}
9214
9215void btrfs_destroy_inode(struct inode *inode)
9216{
9217 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9218 struct btrfs_ordered_extent *ordered;
9219 struct btrfs_root *root = BTRFS_I(inode)->root;
9220
9221 WARN_ON(!hlist_empty(&inode->i_dentry));
9222 WARN_ON(inode->i_data.nrpages);
9223 WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
9224 WARN_ON(BTRFS_I(inode)->block_rsv.size);
9225 WARN_ON(BTRFS_I(inode)->outstanding_extents);
9226 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
9227 WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
9228 WARN_ON(BTRFS_I(inode)->csum_bytes);
9229 WARN_ON(BTRFS_I(inode)->defrag_bytes);
9230
9231
9232
9233
9234
9235
9236 if (!root)
9237 goto free;
9238
9239 while (1) {
9240 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
9241 if (!ordered)
9242 break;
9243 else {
9244 btrfs_err(fs_info,
9245 "found ordered extent %llu %llu on inode cleanup",
9246 ordered->file_offset, ordered->len);
9247 btrfs_remove_ordered_extent(inode, ordered);
9248 btrfs_put_ordered_extent(ordered);
9249 btrfs_put_ordered_extent(ordered);
9250 }
9251 }
9252 btrfs_qgroup_check_reserved_leak(inode);
9253 inode_tree_del(inode);
9254 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9255free:
9256 call_rcu(&inode->i_rcu, btrfs_i_callback);
9257}
9258
9259int btrfs_drop_inode(struct inode *inode)
9260{
9261 struct btrfs_root *root = BTRFS_I(inode)->root;
9262
9263 if (root == NULL)
9264 return 1;
9265
9266
9267 if (btrfs_root_refs(&root->root_item) == 0)
9268 return 1;
9269 else
9270 return generic_drop_inode(inode);
9271}
9272
9273static void init_once(void *foo)
9274{
9275 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
9276
9277 inode_init_once(&ei->vfs_inode);
9278}
9279
9280void __cold btrfs_destroy_cachep(void)
9281{
9282
9283
9284
9285
9286 rcu_barrier();
9287 kmem_cache_destroy(btrfs_inode_cachep);
9288 kmem_cache_destroy(btrfs_trans_handle_cachep);
9289 kmem_cache_destroy(btrfs_path_cachep);
9290 kmem_cache_destroy(btrfs_free_space_cachep);
9291}
9292
9293int __init btrfs_init_cachep(void)
9294{
9295 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9296 sizeof(struct btrfs_inode), 0,
9297 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
9298 init_once);
9299 if (!btrfs_inode_cachep)
9300 goto fail;
9301
9302 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9303 sizeof(struct btrfs_trans_handle), 0,
9304 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9305 if (!btrfs_trans_handle_cachep)
9306 goto fail;
9307
9308 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9309 sizeof(struct btrfs_path), 0,
9310 SLAB_MEM_SPREAD, NULL);
9311 if (!btrfs_path_cachep)
9312 goto fail;
9313
9314 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9315 sizeof(struct btrfs_free_space), 0,
9316 SLAB_MEM_SPREAD, NULL);
9317 if (!btrfs_free_space_cachep)
9318 goto fail;
9319
9320 return 0;
9321fail:
9322 btrfs_destroy_cachep();
9323 return -ENOMEM;
9324}
9325
9326static int btrfs_getattr(const struct path *path, struct kstat *stat,
9327 u32 request_mask, unsigned int flags)
9328{
9329 u64 delalloc_bytes;
9330 struct inode *inode = d_inode(path->dentry);
9331 u32 blocksize = inode->i_sb->s_blocksize;
9332 u32 bi_flags = BTRFS_I(inode)->flags;
9333
9334 stat->result_mask |= STATX_BTIME;
9335 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
9336 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
9337 if (bi_flags & BTRFS_INODE_APPEND)
9338 stat->attributes |= STATX_ATTR_APPEND;
9339 if (bi_flags & BTRFS_INODE_COMPRESS)
9340 stat->attributes |= STATX_ATTR_COMPRESSED;
9341 if (bi_flags & BTRFS_INODE_IMMUTABLE)
9342 stat->attributes |= STATX_ATTR_IMMUTABLE;
9343 if (bi_flags & BTRFS_INODE_NODUMP)
9344 stat->attributes |= STATX_ATTR_NODUMP;
9345
9346 stat->attributes_mask |= (STATX_ATTR_APPEND |
9347 STATX_ATTR_COMPRESSED |
9348 STATX_ATTR_IMMUTABLE |
9349 STATX_ATTR_NODUMP);
9350
9351 generic_fillattr(inode, stat);
9352 stat->dev = BTRFS_I(inode)->root->anon_dev;
9353
9354 spin_lock(&BTRFS_I(inode)->lock);
9355 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9356 spin_unlock(&BTRFS_I(inode)->lock);
9357 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
9358 ALIGN(delalloc_bytes, blocksize)) >> 9;
9359 return 0;
9360}
9361
9362static int btrfs_rename_exchange(struct inode *old_dir,
9363 struct dentry *old_dentry,
9364 struct inode *new_dir,
9365 struct dentry *new_dentry)
9366{
9367 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9368 struct btrfs_trans_handle *trans;
9369 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9370 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9371 struct inode *new_inode = new_dentry->d_inode;
9372 struct inode *old_inode = old_dentry->d_inode;
9373 struct timespec64 ctime = current_time(old_inode);
9374 struct dentry *parent;
9375 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9376 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
9377 u64 old_idx = 0;
9378 u64 new_idx = 0;
9379 u64 root_objectid;
9380 int ret;
9381 bool root_log_pinned = false;
9382 bool dest_log_pinned = false;
9383 struct btrfs_log_ctx ctx_root;
9384 struct btrfs_log_ctx ctx_dest;
9385 bool sync_log_root = false;
9386 bool sync_log_dest = false;
9387 bool commit_transaction = false;
9388
9389
9390 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9391 return -EXDEV;
9392
9393 btrfs_init_log_ctx(&ctx_root, old_inode);
9394 btrfs_init_log_ctx(&ctx_dest, new_inode);
9395
9396
9397 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9398 down_read(&fs_info->subvol_sem);
9399 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9400 down_read(&fs_info->subvol_sem);
9401
9402
9403
9404
9405
9406
9407
9408
9409
9410 trans = btrfs_start_transaction(root, 12);
9411 if (IS_ERR(trans)) {
9412 ret = PTR_ERR(trans);
9413 goto out_notrans;
9414 }
9415
9416
9417
9418
9419
9420 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
9421 if (ret)
9422 goto out_fail;
9423 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
9424 if (ret)
9425 goto out_fail;
9426
9427 BTRFS_I(old_inode)->dir_index = 0ULL;
9428 BTRFS_I(new_inode)->dir_index = 0ULL;
9429
9430
9431 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9432
9433 btrfs_set_log_full_commit(fs_info, trans);
9434 } else {
9435 btrfs_pin_log_trans(root);
9436 root_log_pinned = true;
9437 ret = btrfs_insert_inode_ref(trans, dest,
9438 new_dentry->d_name.name,
9439 new_dentry->d_name.len,
9440 old_ino,
9441 btrfs_ino(BTRFS_I(new_dir)),
9442 old_idx);
9443 if (ret)
9444 goto out_fail;
9445 }
9446
9447
9448 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9449
9450 btrfs_set_log_full_commit(fs_info, trans);
9451 } else {
9452 btrfs_pin_log_trans(dest);
9453 dest_log_pinned = true;
9454 ret = btrfs_insert_inode_ref(trans, root,
9455 old_dentry->d_name.name,
9456 old_dentry->d_name.len,
9457 new_ino,
9458 btrfs_ino(BTRFS_I(old_dir)),
9459 new_idx);
9460 if (ret)
9461 goto out_fail;
9462 }
9463
9464
9465 inode_inc_iversion(old_dir);
9466 inode_inc_iversion(new_dir);
9467 inode_inc_iversion(old_inode);
9468 inode_inc_iversion(new_inode);
9469 old_dir->i_ctime = old_dir->i_mtime = ctime;
9470 new_dir->i_ctime = new_dir->i_mtime = ctime;
9471 old_inode->i_ctime = ctime;
9472 new_inode->i_ctime = ctime;
9473
9474 if (old_dentry->d_parent != new_dentry->d_parent) {
9475 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9476 BTRFS_I(old_inode), 1);
9477 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
9478 BTRFS_I(new_inode), 1);
9479 }
9480
9481
9482 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9483 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9484 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9485 old_dentry->d_name.name,
9486 old_dentry->d_name.len);
9487 } else {
9488 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9489 BTRFS_I(old_dentry->d_inode),
9490 old_dentry->d_name.name,
9491 old_dentry->d_name.len);
9492 if (!ret)
9493 ret = btrfs_update_inode(trans, root, old_inode);
9494 }
9495 if (ret) {
9496 btrfs_abort_transaction(trans, ret);
9497 goto out_fail;
9498 }
9499
9500
9501 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9502 root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
9503 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9504 new_dentry->d_name.name,
9505 new_dentry->d_name.len);
9506 } else {
9507 ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9508 BTRFS_I(new_dentry->d_inode),
9509 new_dentry->d_name.name,
9510 new_dentry->d_name.len);
9511 if (!ret)
9512 ret = btrfs_update_inode(trans, dest, new_inode);
9513 }
9514 if (ret) {
9515 btrfs_abort_transaction(trans, ret);
9516 goto out_fail;
9517 }
9518
9519 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9520 new_dentry->d_name.name,
9521 new_dentry->d_name.len, 0, old_idx);
9522 if (ret) {
9523 btrfs_abort_transaction(trans, ret);
9524 goto out_fail;
9525 }
9526
9527 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
9528 old_dentry->d_name.name,
9529 old_dentry->d_name.len, 0, new_idx);
9530 if (ret) {
9531 btrfs_abort_transaction(trans, ret);
9532 goto out_fail;
9533 }
9534
9535 if (old_inode->i_nlink == 1)
9536 BTRFS_I(old_inode)->dir_index = old_idx;
9537 if (new_inode->i_nlink == 1)
9538 BTRFS_I(new_inode)->dir_index = new_idx;
9539
9540 if (root_log_pinned) {
9541 parent = new_dentry->d_parent;
9542 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9543 BTRFS_I(old_dir), parent,
9544 false, &ctx_root);
9545 if (ret == BTRFS_NEED_LOG_SYNC)
9546 sync_log_root = true;
9547 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9548 commit_transaction = true;
9549 ret = 0;
9550 btrfs_end_log_trans(root);
9551 root_log_pinned = false;
9552 }
9553 if (dest_log_pinned) {
9554 if (!commit_transaction) {
9555 parent = old_dentry->d_parent;
9556 ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
9557 BTRFS_I(new_dir), parent,
9558 false, &ctx_dest);
9559 if (ret == BTRFS_NEED_LOG_SYNC)
9560 sync_log_dest = true;
9561 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9562 commit_transaction = true;
9563 ret = 0;
9564 }
9565 btrfs_end_log_trans(dest);
9566 dest_log_pinned = false;
9567 }
9568out_fail:
9569
9570
9571
9572
9573
9574
9575
9576
9577
9578
9579
9580 if (ret && (root_log_pinned || dest_log_pinned)) {
9581 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9582 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9583 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9584 (new_inode &&
9585 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9586 btrfs_set_log_full_commit(fs_info, trans);
9587
9588 if (root_log_pinned) {
9589 btrfs_end_log_trans(root);
9590 root_log_pinned = false;
9591 }
9592 if (dest_log_pinned) {
9593 btrfs_end_log_trans(dest);
9594 dest_log_pinned = false;
9595 }
9596 }
9597 if (!ret && sync_log_root && !commit_transaction) {
9598 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
9599 &ctx_root);
9600 if (ret)
9601 commit_transaction = true;
9602 }
9603 if (!ret && sync_log_dest && !commit_transaction) {
9604 ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
9605 &ctx_dest);
9606 if (ret)
9607 commit_transaction = true;
9608 }
9609 if (commit_transaction) {
9610 ret = btrfs_commit_transaction(trans);
9611 } else {
9612 int ret2;
9613
9614 ret2 = btrfs_end_transaction(trans);
9615 ret = ret ? ret : ret2;
9616 }
9617out_notrans:
9618 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9619 up_read(&fs_info->subvol_sem);
9620 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9621 up_read(&fs_info->subvol_sem);
9622
9623 return ret;
9624}
9625
9626static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9627 struct btrfs_root *root,
9628 struct inode *dir,
9629 struct dentry *dentry)
9630{
9631 int ret;
9632 struct inode *inode;
9633 u64 objectid;
9634 u64 index;
9635
9636 ret = btrfs_find_free_ino(root, &objectid);
9637 if (ret)
9638 return ret;
9639
9640 inode = btrfs_new_inode(trans, root, dir,
9641 dentry->d_name.name,
9642 dentry->d_name.len,
9643 btrfs_ino(BTRFS_I(dir)),
9644 objectid,
9645 S_IFCHR | WHITEOUT_MODE,
9646 &index);
9647
9648 if (IS_ERR(inode)) {
9649 ret = PTR_ERR(inode);
9650 return ret;
9651 }
9652
9653 inode->i_op = &btrfs_special_inode_operations;
9654 init_special_inode(inode, inode->i_mode,
9655 WHITEOUT_DEV);
9656
9657 ret = btrfs_init_inode_security(trans, inode, dir,
9658 &dentry->d_name);
9659 if (ret)
9660 goto out;
9661
9662 ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9663 BTRFS_I(inode), 0, index);
9664 if (ret)
9665 goto out;
9666
9667 ret = btrfs_update_inode(trans, root, inode);
9668out:
9669 unlock_new_inode(inode);
9670 if (ret)
9671 inode_dec_link_count(inode);
9672 iput(inode);
9673
9674 return ret;
9675}
9676
9677static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9678 struct inode *new_dir, struct dentry *new_dentry,
9679 unsigned int flags)
9680{
9681 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9682 struct btrfs_trans_handle *trans;
9683 unsigned int trans_num_items;
9684 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9685 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9686 struct inode *new_inode = d_inode(new_dentry);
9687 struct inode *old_inode = d_inode(old_dentry);
9688 u64 index = 0;
9689 u64 root_objectid;
9690 int ret;
9691 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9692 bool log_pinned = false;
9693 struct btrfs_log_ctx ctx;
9694 bool sync_log = false;
9695 bool commit_transaction = false;
9696
9697 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9698 return -EPERM;
9699
9700
9701 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9702 return -EXDEV;
9703
9704 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9705 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9706 return -ENOTEMPTY;
9707
9708 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9709 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9710 return -ENOTEMPTY;
9711
9712
9713
9714 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9715 new_dentry->d_name.name,
9716 new_dentry->d_name.len);
9717
9718 if (ret) {
9719 if (ret == -EEXIST) {
9720
9721
9722 if (WARN_ON(!new_inode)) {
9723 return ret;
9724 }
9725 } else {
9726
9727 return ret;
9728 }
9729 }
9730 ret = 0;
9731
9732
9733
9734
9735
9736 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9737 filemap_flush(old_inode->i_mapping);
9738
9739
9740 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9741 down_read(&fs_info->subvol_sem);
9742
9743
9744
9745
9746
9747
9748
9749
9750
9751
9752
9753 trans_num_items = 11;
9754 if (flags & RENAME_WHITEOUT)
9755 trans_num_items += 5;
9756 trans = btrfs_start_transaction(root, trans_num_items);
9757 if (IS_ERR(trans)) {
9758 ret = PTR_ERR(trans);
9759 goto out_notrans;
9760 }
9761
9762 if (dest != root)
9763 btrfs_record_root_in_trans(trans, dest);
9764
9765 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9766 if (ret)
9767 goto out_fail;
9768
9769 BTRFS_I(old_inode)->dir_index = 0ULL;
9770 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9771
9772 btrfs_set_log_full_commit(fs_info, trans);
9773 } else {
9774 btrfs_pin_log_trans(root);
9775 log_pinned = true;
9776 ret = btrfs_insert_inode_ref(trans, dest,
9777 new_dentry->d_name.name,
9778 new_dentry->d_name.len,
9779 old_ino,
9780 btrfs_ino(BTRFS_I(new_dir)), index);
9781 if (ret)
9782 goto out_fail;
9783 }
9784
9785 inode_inc_iversion(old_dir);
9786 inode_inc_iversion(new_dir);
9787 inode_inc_iversion(old_inode);
9788 old_dir->i_ctime = old_dir->i_mtime =
9789 new_dir->i_ctime = new_dir->i_mtime =
9790 old_inode->i_ctime = current_time(old_dir);
9791
9792 if (old_dentry->d_parent != new_dentry->d_parent)
9793 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9794 BTRFS_I(old_inode), 1);
9795
9796 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9797 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9798 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9799 old_dentry->d_name.name,
9800 old_dentry->d_name.len);
9801 } else {
9802 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9803 BTRFS_I(d_inode(old_dentry)),
9804 old_dentry->d_name.name,
9805 old_dentry->d_name.len);
9806 if (!ret)
9807 ret = btrfs_update_inode(trans, root, old_inode);
9808 }
9809 if (ret) {
9810 btrfs_abort_transaction(trans, ret);
9811 goto out_fail;
9812 }
9813
9814 if (new_inode) {
9815 inode_inc_iversion(new_inode);
9816 new_inode->i_ctime = current_time(new_inode);
9817 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9818 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9819 root_objectid = BTRFS_I(new_inode)->location.objectid;
9820 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9821 new_dentry->d_name.name,
9822 new_dentry->d_name.len);
9823 BUG_ON(new_inode->i_nlink == 0);
9824 } else {
9825 ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9826 BTRFS_I(d_inode(new_dentry)),
9827 new_dentry->d_name.name,
9828 new_dentry->d_name.len);
9829 }
9830 if (!ret && new_inode->i_nlink == 0)
9831 ret = btrfs_orphan_add(trans,
9832 BTRFS_I(d_inode(new_dentry)));
9833 if (ret) {
9834 btrfs_abort_transaction(trans, ret);
9835 goto out_fail;
9836 }
9837 }
9838
9839 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9840 new_dentry->d_name.name,
9841 new_dentry->d_name.len, 0, index);
9842 if (ret) {
9843 btrfs_abort_transaction(trans, ret);
9844 goto out_fail;
9845 }
9846
9847 if (old_inode->i_nlink == 1)
9848 BTRFS_I(old_inode)->dir_index = index;
9849
9850 if (log_pinned) {
9851 struct dentry *parent = new_dentry->d_parent;
9852
9853 btrfs_init_log_ctx(&ctx, old_inode);
9854 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9855 BTRFS_I(old_dir), parent,
9856 false, &ctx);
9857 if (ret == BTRFS_NEED_LOG_SYNC)
9858 sync_log = true;
9859 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9860 commit_transaction = true;
9861 ret = 0;
9862 btrfs_end_log_trans(root);
9863 log_pinned = false;
9864 }
9865
9866 if (flags & RENAME_WHITEOUT) {
9867 ret = btrfs_whiteout_for_rename(trans, root, old_dir,
9868 old_dentry);
9869
9870 if (ret) {
9871 btrfs_abort_transaction(trans, ret);
9872 goto out_fail;
9873 }
9874 }
9875out_fail:
9876
9877
9878
9879
9880
9881
9882
9883
9884
9885
9886
9887 if (ret && log_pinned) {
9888 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9889 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9890 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9891 (new_inode &&
9892 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9893 btrfs_set_log_full_commit(fs_info, trans);
9894
9895 btrfs_end_log_trans(root);
9896 log_pinned = false;
9897 }
9898 if (!ret && sync_log) {
9899 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
9900 if (ret)
9901 commit_transaction = true;
9902 }
9903 if (commit_transaction) {
9904 ret = btrfs_commit_transaction(trans);
9905 } else {
9906 int ret2;
9907
9908 ret2 = btrfs_end_transaction(trans);
9909 ret = ret ? ret : ret2;
9910 }
9911out_notrans:
9912 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9913 up_read(&fs_info->subvol_sem);
9914
9915 return ret;
9916}
9917
9918static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
9919 struct inode *new_dir, struct dentry *new_dentry,
9920 unsigned int flags)
9921{
9922 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
9923 return -EINVAL;
9924
9925 if (flags & RENAME_EXCHANGE)
9926 return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9927 new_dentry);
9928
9929 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
9930}
9931
9932struct btrfs_delalloc_work {
9933 struct inode *inode;
9934 struct completion completion;
9935 struct list_head list;
9936 struct btrfs_work work;
9937};
9938
9939static void btrfs_run_delalloc_work(struct btrfs_work *work)
9940{
9941 struct btrfs_delalloc_work *delalloc_work;
9942 struct inode *inode;
9943
9944 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9945 work);
9946 inode = delalloc_work->inode;
9947 filemap_flush(inode->i_mapping);
9948 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9949 &BTRFS_I(inode)->runtime_flags))
9950 filemap_flush(inode->i_mapping);
9951
9952 iput(inode);
9953 complete(&delalloc_work->completion);
9954}
9955
9956static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
9957{
9958 struct btrfs_delalloc_work *work;
9959
9960 work = kmalloc(sizeof(*work), GFP_NOFS);
9961 if (!work)
9962 return NULL;
9963
9964 init_completion(&work->completion);
9965 INIT_LIST_HEAD(&work->list);
9966 work->inode = inode;
9967 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
9968 btrfs_run_delalloc_work, NULL, NULL);
9969
9970 return work;
9971}
9972
9973
9974
9975
9976
9977static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot)
9978{
9979 struct btrfs_inode *binode;
9980 struct inode *inode;
9981 struct btrfs_delalloc_work *work, *next;
9982 struct list_head works;
9983 struct list_head splice;
9984 int ret = 0;
9985
9986 INIT_LIST_HEAD(&works);
9987 INIT_LIST_HEAD(&splice);
9988
9989 mutex_lock(&root->delalloc_mutex);
9990 spin_lock(&root->delalloc_lock);
9991 list_splice_init(&root->delalloc_inodes, &splice);
9992 while (!list_empty(&splice)) {
9993 binode = list_entry(splice.next, struct btrfs_inode,
9994 delalloc_inodes);
9995
9996 list_move_tail(&binode->delalloc_inodes,
9997 &root->delalloc_inodes);
9998 inode = igrab(&binode->vfs_inode);
9999 if (!inode) {
10000 cond_resched_lock(&root->delalloc_lock);
10001 continue;
10002 }
10003 spin_unlock(&root->delalloc_lock);
10004
10005 if (snapshot)
10006 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
10007 &binode->runtime_flags);
10008 work = btrfs_alloc_delalloc_work(inode);
10009 if (!work) {
10010 iput(inode);
10011 ret = -ENOMEM;
10012 goto out;
10013 }
10014 list_add_tail(&work->list, &works);
10015 btrfs_queue_work(root->fs_info->flush_workers,
10016 &work->work);
10017 ret++;
10018 if (nr != -1 && ret >= nr)
10019 goto out;
10020 cond_resched();
10021 spin_lock(&root->delalloc_lock);
10022 }
10023 spin_unlock(&root->delalloc_lock);
10024
10025out:
10026 list_for_each_entry_safe(work, next, &works, list) {
10027 list_del_init(&work->list);
10028 wait_for_completion(&work->completion);
10029 kfree(work);
10030 }
10031
10032 if (!list_empty(&splice)) {
10033 spin_lock(&root->delalloc_lock);
10034 list_splice_tail(&splice, &root->delalloc_inodes);
10035 spin_unlock(&root->delalloc_lock);
10036 }
10037 mutex_unlock(&root->delalloc_mutex);
10038 return ret;
10039}
10040
10041int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
10042{
10043 struct btrfs_fs_info *fs_info = root->fs_info;
10044 int ret;
10045
10046 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10047 return -EROFS;
10048
10049 ret = start_delalloc_inodes(root, -1, true);
10050 if (ret > 0)
10051 ret = 0;
10052 return ret;
10053}
10054
10055int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
10056{
10057 struct btrfs_root *root;
10058 struct list_head splice;
10059 int ret;
10060
10061 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10062 return -EROFS;
10063
10064 INIT_LIST_HEAD(&splice);
10065
10066 mutex_lock(&fs_info->delalloc_root_mutex);
10067 spin_lock(&fs_info->delalloc_root_lock);
10068 list_splice_init(&fs_info->delalloc_roots, &splice);
10069 while (!list_empty(&splice) && nr) {
10070 root = list_first_entry(&splice, struct btrfs_root,
10071 delalloc_root);
10072 root = btrfs_grab_fs_root(root);
10073 BUG_ON(!root);
10074 list_move_tail(&root->delalloc_root,
10075 &fs_info->delalloc_roots);
10076 spin_unlock(&fs_info->delalloc_root_lock);
10077
10078 ret = start_delalloc_inodes(root, nr, false);
10079 btrfs_put_fs_root(root);
10080 if (ret < 0)
10081 goto out;
10082
10083 if (nr != -1) {
10084 nr -= ret;
10085 WARN_ON(nr < 0);
10086 }
10087 spin_lock(&fs_info->delalloc_root_lock);
10088 }
10089 spin_unlock(&fs_info->delalloc_root_lock);
10090
10091 ret = 0;
10092out:
10093 if (!list_empty(&splice)) {
10094 spin_lock(&fs_info->delalloc_root_lock);
10095 list_splice_tail(&splice, &fs_info->delalloc_roots);
10096 spin_unlock(&fs_info->delalloc_root_lock);
10097 }
10098 mutex_unlock(&fs_info->delalloc_root_mutex);
10099 return ret;
10100}
10101
10102static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
10103 const char *symname)
10104{
10105 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10106 struct btrfs_trans_handle *trans;
10107 struct btrfs_root *root = BTRFS_I(dir)->root;
10108 struct btrfs_path *path;
10109 struct btrfs_key key;
10110 struct inode *inode = NULL;
10111 int err;
10112 u64 objectid;
10113 u64 index = 0;
10114 int name_len;
10115 int datasize;
10116 unsigned long ptr;
10117 struct btrfs_file_extent_item *ei;
10118 struct extent_buffer *leaf;
10119
10120 name_len = strlen(symname);
10121 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
10122 return -ENAMETOOLONG;
10123
10124
10125
10126
10127
10128
10129
10130
10131 trans = btrfs_start_transaction(root, 7);
10132 if (IS_ERR(trans))
10133 return PTR_ERR(trans);
10134
10135 err = btrfs_find_free_ino(root, &objectid);
10136 if (err)
10137 goto out_unlock;
10138
10139 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
10140 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
10141 objectid, S_IFLNK|S_IRWXUGO, &index);
10142 if (IS_ERR(inode)) {
10143 err = PTR_ERR(inode);
10144 inode = NULL;
10145 goto out_unlock;
10146 }
10147
10148
10149
10150
10151
10152
10153
10154 inode->i_fop = &btrfs_file_operations;
10155 inode->i_op = &btrfs_file_inode_operations;
10156 inode->i_mapping->a_ops = &btrfs_aops;
10157 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10158
10159 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
10160 if (err)
10161 goto out_unlock;
10162
10163 path = btrfs_alloc_path();
10164 if (!path) {
10165 err = -ENOMEM;
10166 goto out_unlock;
10167 }
10168 key.objectid = btrfs_ino(BTRFS_I(inode));
10169 key.offset = 0;
10170 key.type = BTRFS_EXTENT_DATA_KEY;
10171 datasize = btrfs_file_extent_calc_inline_size(name_len);
10172 err = btrfs_insert_empty_item(trans, root, path, &key,
10173 datasize);
10174 if (err) {
10175 btrfs_free_path(path);
10176 goto out_unlock;
10177 }
10178 leaf = path->nodes[0];
10179 ei = btrfs_item_ptr(leaf, path->slots[0],
10180 struct btrfs_file_extent_item);
10181 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
10182 btrfs_set_file_extent_type(leaf, ei,
10183 BTRFS_FILE_EXTENT_INLINE);
10184 btrfs_set_file_extent_encryption(leaf, ei, 0);
10185 btrfs_set_file_extent_compression(leaf, ei, 0);
10186 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
10187 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
10188
10189 ptr = btrfs_file_extent_inline_start(ei);
10190 write_extent_buffer(leaf, symname, ptr, name_len);
10191 btrfs_mark_buffer_dirty(leaf);
10192 btrfs_free_path(path);
10193
10194 inode->i_op = &btrfs_symlink_inode_operations;
10195 inode_nohighmem(inode);
10196 inode->i_mapping->a_ops = &btrfs_aops;
10197 inode_set_bytes(inode, name_len);
10198 btrfs_i_size_write(BTRFS_I(inode), name_len);
10199 err = btrfs_update_inode(trans, root, inode);
10200
10201
10202
10203
10204
10205 if (!err)
10206 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
10207 BTRFS_I(inode), 0, index);
10208 if (err)
10209 goto out_unlock;
10210
10211 d_instantiate_new(dentry, inode);
10212
10213out_unlock:
10214 btrfs_end_transaction(trans);
10215 if (err && inode) {
10216 inode_dec_link_count(inode);
10217 discard_new_inode(inode);
10218 }
10219 btrfs_btree_balance_dirty(fs_info);
10220 return err;
10221}
10222
10223static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10224 u64 start, u64 num_bytes, u64 min_size,
10225 loff_t actual_len, u64 *alloc_hint,
10226 struct btrfs_trans_handle *trans)
10227{
10228 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
10229 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
10230 struct extent_map *em;
10231 struct btrfs_root *root = BTRFS_I(inode)->root;
10232 struct btrfs_key ins;
10233 u64 cur_offset = start;
10234 u64 i_size;
10235 u64 cur_bytes;
10236 u64 last_alloc = (u64)-1;
10237 int ret = 0;
10238 bool own_trans = true;
10239 u64 end = start + num_bytes - 1;
10240
10241 if (trans)
10242 own_trans = false;
10243 while (num_bytes > 0) {
10244 if (own_trans) {
10245 trans = btrfs_start_transaction(root, 3);
10246 if (IS_ERR(trans)) {
10247 ret = PTR_ERR(trans);
10248 break;
10249 }
10250 }
10251
10252 cur_bytes = min_t(u64, num_bytes, SZ_256M);
10253 cur_bytes = max(cur_bytes, min_size);
10254
10255
10256
10257
10258
10259
10260 cur_bytes = min(cur_bytes, last_alloc);
10261 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10262 min_size, 0, *alloc_hint, &ins, 1, 0);
10263 if (ret) {
10264 if (own_trans)
10265 btrfs_end_transaction(trans);
10266 break;
10267 }
10268 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10269
10270 last_alloc = ins.offset;
10271 ret = insert_reserved_file_extent(trans, inode,
10272 cur_offset, ins.objectid,
10273 ins.offset, ins.offset,
10274 ins.offset, 0, 0, 0,
10275 BTRFS_FILE_EXTENT_PREALLOC);
10276 if (ret) {
10277 btrfs_free_reserved_extent(fs_info, ins.objectid,
10278 ins.offset, 0);
10279 btrfs_abort_transaction(trans, ret);
10280 if (own_trans)
10281 btrfs_end_transaction(trans);
10282 break;
10283 }
10284
10285 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10286 cur_offset + ins.offset -1, 0);
10287
10288 em = alloc_extent_map();
10289 if (!em) {
10290 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
10291 &BTRFS_I(inode)->runtime_flags);
10292 goto next;
10293 }
10294
10295 em->start = cur_offset;
10296 em->orig_start = cur_offset;
10297 em->len = ins.offset;
10298 em->block_start = ins.objectid;
10299 em->block_len = ins.offset;
10300 em->orig_block_len = ins.offset;
10301 em->ram_bytes = ins.offset;
10302 em->bdev = fs_info->fs_devices->latest_bdev;
10303 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
10304 em->generation = trans->transid;
10305
10306 while (1) {
10307 write_lock(&em_tree->lock);
10308 ret = add_extent_mapping(em_tree, em, 1);
10309 write_unlock(&em_tree->lock);
10310 if (ret != -EEXIST)
10311 break;
10312 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10313 cur_offset + ins.offset - 1,
10314 0);
10315 }
10316 free_extent_map(em);
10317next:
10318 num_bytes -= ins.offset;
10319 cur_offset += ins.offset;
10320 *alloc_hint = ins.objectid + ins.offset;
10321
10322 inode_inc_iversion(inode);
10323 inode->i_ctime = current_time(inode);
10324 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10325 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10326 (actual_len > inode->i_size) &&
10327 (cur_offset > inode->i_size)) {
10328 if (cur_offset > actual_len)
10329 i_size = actual_len;
10330 else
10331 i_size = cur_offset;
10332 i_size_write(inode, i_size);
10333 btrfs_ordered_update_i_size(inode, i_size, NULL);
10334 }
10335
10336 ret = btrfs_update_inode(trans, root, inode);
10337
10338 if (ret) {
10339 btrfs_abort_transaction(trans, ret);
10340 if (own_trans)
10341 btrfs_end_transaction(trans);
10342 break;
10343 }
10344
10345 if (own_trans)
10346 btrfs_end_transaction(trans);
10347 }
10348 if (cur_offset < end)
10349 btrfs_free_reserved_data_space(inode, NULL, cur_offset,
10350 end - cur_offset + 1);
10351 return ret;
10352}
10353
10354int btrfs_prealloc_file_range(struct inode *inode, int mode,
10355 u64 start, u64 num_bytes, u64 min_size,
10356 loff_t actual_len, u64 *alloc_hint)
10357{
10358 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10359 min_size, actual_len, alloc_hint,
10360 NULL);
10361}
10362
10363int btrfs_prealloc_file_range_trans(struct inode *inode,
10364 struct btrfs_trans_handle *trans, int mode,
10365 u64 start, u64 num_bytes, u64 min_size,
10366 loff_t actual_len, u64 *alloc_hint)
10367{
10368 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10369 min_size, actual_len, alloc_hint, trans);
10370}
10371
10372static int btrfs_set_page_dirty(struct page *page)
10373{
10374 return __set_page_dirty_nobuffers(page);
10375}
10376
10377static int btrfs_permission(struct inode *inode, int mask)
10378{
10379 struct btrfs_root *root = BTRFS_I(inode)->root;
10380 umode_t mode = inode->i_mode;
10381
10382 if (mask & MAY_WRITE &&
10383 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10384 if (btrfs_root_readonly(root))
10385 return -EROFS;
10386 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10387 return -EACCES;
10388 }
10389 return generic_permission(inode, mask);
10390}
10391
10392static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
10393{
10394 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10395 struct btrfs_trans_handle *trans;
10396 struct btrfs_root *root = BTRFS_I(dir)->root;
10397 struct inode *inode = NULL;
10398 u64 objectid;
10399 u64 index;
10400 int ret = 0;
10401
10402
10403
10404
10405 trans = btrfs_start_transaction(root, 5);
10406 if (IS_ERR(trans))
10407 return PTR_ERR(trans);
10408
10409 ret = btrfs_find_free_ino(root, &objectid);
10410 if (ret)
10411 goto out;
10412
10413 inode = btrfs_new_inode(trans, root, dir, NULL, 0,
10414 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
10415 if (IS_ERR(inode)) {
10416 ret = PTR_ERR(inode);
10417 inode = NULL;
10418 goto out;
10419 }
10420
10421 inode->i_fop = &btrfs_file_operations;
10422 inode->i_op = &btrfs_file_inode_operations;
10423
10424 inode->i_mapping->a_ops = &btrfs_aops;
10425 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10426
10427 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
10428 if (ret)
10429 goto out;
10430
10431 ret = btrfs_update_inode(trans, root, inode);
10432 if (ret)
10433 goto out;
10434 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10435 if (ret)
10436 goto out;
10437
10438
10439
10440
10441
10442
10443
10444
10445 set_nlink(inode, 1);
10446 d_tmpfile(dentry, inode);
10447 unlock_new_inode(inode);
10448 mark_inode_dirty(inode);
10449out:
10450 btrfs_end_transaction(trans);
10451 if (ret && inode)
10452 discard_new_inode(inode);
10453 btrfs_btree_balance_dirty(fs_info);
10454 return ret;
10455}
10456
10457void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
10458{
10459 struct inode *inode = tree->private_data;
10460 unsigned long index = start >> PAGE_SHIFT;
10461 unsigned long end_index = end >> PAGE_SHIFT;
10462 struct page *page;
10463
10464 while (index <= end_index) {
10465 page = find_get_page(inode->i_mapping, index);
10466 ASSERT(page);
10467 set_page_writeback(page);
10468 put_page(page);
10469 index++;
10470 }
10471}
10472
10473#ifdef CONFIG_SWAP
10474
10475
10476
10477
10478
10479static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
10480 bool is_block_group)
10481{
10482 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10483 struct btrfs_swapfile_pin *sp, *entry;
10484 struct rb_node **p;
10485 struct rb_node *parent = NULL;
10486
10487 sp = kmalloc(sizeof(*sp), GFP_NOFS);
10488 if (!sp)
10489 return -ENOMEM;
10490 sp->ptr = ptr;
10491 sp->inode = inode;
10492 sp->is_block_group = is_block_group;
10493
10494 spin_lock(&fs_info->swapfile_pins_lock);
10495 p = &fs_info->swapfile_pins.rb_node;
10496 while (*p) {
10497 parent = *p;
10498 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
10499 if (sp->ptr < entry->ptr ||
10500 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
10501 p = &(*p)->rb_left;
10502 } else if (sp->ptr > entry->ptr ||
10503 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
10504 p = &(*p)->rb_right;
10505 } else {
10506 spin_unlock(&fs_info->swapfile_pins_lock);
10507 kfree(sp);
10508 return 1;
10509 }
10510 }
10511 rb_link_node(&sp->node, parent, p);
10512 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
10513 spin_unlock(&fs_info->swapfile_pins_lock);
10514 return 0;
10515}
10516
10517
10518static void btrfs_free_swapfile_pins(struct inode *inode)
10519{
10520 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10521 struct btrfs_swapfile_pin *sp;
10522 struct rb_node *node, *next;
10523
10524 spin_lock(&fs_info->swapfile_pins_lock);
10525 node = rb_first(&fs_info->swapfile_pins);
10526 while (node) {
10527 next = rb_next(node);
10528 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
10529 if (sp->inode == inode) {
10530 rb_erase(&sp->node, &fs_info->swapfile_pins);
10531 if (sp->is_block_group)
10532 btrfs_put_block_group(sp->ptr);
10533 kfree(sp);
10534 }
10535 node = next;
10536 }
10537 spin_unlock(&fs_info->swapfile_pins_lock);
10538}
10539
10540struct btrfs_swap_info {
10541 u64 start;
10542 u64 block_start;
10543 u64 block_len;
10544 u64 lowest_ppage;
10545 u64 highest_ppage;
10546 unsigned long nr_pages;
10547 int nr_extents;
10548};
10549
10550static int btrfs_add_swap_extent(struct swap_info_struct *sis,
10551 struct btrfs_swap_info *bsi)
10552{
10553 unsigned long nr_pages;
10554 u64 first_ppage, first_ppage_reported, next_ppage;
10555 int ret;
10556
10557 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
10558 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
10559 PAGE_SIZE) >> PAGE_SHIFT;
10560
10561 if (first_ppage >= next_ppage)
10562 return 0;
10563 nr_pages = next_ppage - first_ppage;
10564
10565 first_ppage_reported = first_ppage;
10566 if (bsi->start == 0)
10567 first_ppage_reported++;
10568 if (bsi->lowest_ppage > first_ppage_reported)
10569 bsi->lowest_ppage = first_ppage_reported;
10570 if (bsi->highest_ppage < (next_ppage - 1))
10571 bsi->highest_ppage = next_ppage - 1;
10572
10573 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
10574 if (ret < 0)
10575 return ret;
10576 bsi->nr_extents += ret;
10577 bsi->nr_pages += nr_pages;
10578 return 0;
10579}
10580
10581static void btrfs_swap_deactivate(struct file *file)
10582{
10583 struct inode *inode = file_inode(file);
10584
10585 btrfs_free_swapfile_pins(inode);
10586 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
10587}
10588
10589static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10590 sector_t *span)
10591{
10592 struct inode *inode = file_inode(file);
10593 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10594 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
10595 struct extent_state *cached_state = NULL;
10596 struct extent_map *em = NULL;
10597 struct btrfs_device *device = NULL;
10598 struct btrfs_swap_info bsi = {
10599 .lowest_ppage = (sector_t)-1ULL,
10600 };
10601 int ret = 0;
10602 u64 isize;
10603 u64 start;
10604
10605
10606
10607
10608
10609
10610 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
10611 if (ret)
10612 return ret;
10613
10614
10615
10616
10617 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
10618 btrfs_warn(fs_info, "swapfile must not be compressed");
10619 return -EINVAL;
10620 }
10621 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
10622 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
10623 return -EINVAL;
10624 }
10625 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
10626 btrfs_warn(fs_info, "swapfile must not be checksummed");
10627 return -EINVAL;
10628 }
10629
10630
10631
10632
10633
10634
10635
10636
10637
10638
10639 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
10640 btrfs_warn(fs_info,
10641 "cannot activate swapfile while exclusive operation is running");
10642 return -EBUSY;
10643 }
10644
10645
10646
10647
10648
10649
10650 atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
10651
10652 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
10653
10654 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
10655 start = 0;
10656 while (start < isize) {
10657 u64 logical_block_start, physical_block_start;
10658 struct btrfs_block_group_cache *bg;
10659 u64 len = isize - start;
10660
10661 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
10662 if (IS_ERR(em)) {
10663 ret = PTR_ERR(em);
10664 goto out;
10665 }
10666
10667 if (em->block_start == EXTENT_MAP_HOLE) {
10668 btrfs_warn(fs_info, "swapfile must not have holes");
10669 ret = -EINVAL;
10670 goto out;
10671 }
10672 if (em->block_start == EXTENT_MAP_INLINE) {
10673
10674
10675
10676
10677
10678
10679
10680 btrfs_warn(fs_info, "swapfile must not be inline");
10681 ret = -EINVAL;
10682 goto out;
10683 }
10684 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10685 btrfs_warn(fs_info, "swapfile must not be compressed");
10686 ret = -EINVAL;
10687 goto out;
10688 }
10689
10690 logical_block_start = em->block_start + (start - em->start);
10691 len = min(len, em->len - (start - em->start));
10692 free_extent_map(em);
10693 em = NULL;
10694
10695 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
10696 if (ret < 0) {
10697 goto out;
10698 } else if (ret) {
10699 ret = 0;
10700 } else {
10701 btrfs_warn(fs_info,
10702 "swapfile must not be copy-on-write");
10703 ret = -EINVAL;
10704 goto out;
10705 }
10706
10707 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
10708 if (IS_ERR(em)) {
10709 ret = PTR_ERR(em);
10710 goto out;
10711 }
10712
10713 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
10714 btrfs_warn(fs_info,
10715 "swapfile must have single data profile");
10716 ret = -EINVAL;
10717 goto out;
10718 }
10719
10720 if (device == NULL) {
10721 device = em->map_lookup->stripes[0].dev;
10722 ret = btrfs_add_swapfile_pin(inode, device, false);
10723 if (ret == 1)
10724 ret = 0;
10725 else if (ret)
10726 goto out;
10727 } else if (device != em->map_lookup->stripes[0].dev) {
10728 btrfs_warn(fs_info, "swapfile must be on one device");
10729 ret = -EINVAL;
10730 goto out;
10731 }
10732
10733 physical_block_start = (em->map_lookup->stripes[0].physical +
10734 (logical_block_start - em->start));
10735 len = min(len, em->len - (logical_block_start - em->start));
10736 free_extent_map(em);
10737 em = NULL;
10738
10739 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
10740 if (!bg) {
10741 btrfs_warn(fs_info,
10742 "could not find block group containing swapfile");
10743 ret = -EINVAL;
10744 goto out;
10745 }
10746
10747 ret = btrfs_add_swapfile_pin(inode, bg, true);
10748 if (ret) {
10749 btrfs_put_block_group(bg);
10750 if (ret == 1)
10751 ret = 0;
10752 else
10753 goto out;
10754 }
10755
10756 if (bsi.block_len &&
10757 bsi.block_start + bsi.block_len == physical_block_start) {
10758 bsi.block_len += len;
10759 } else {
10760 if (bsi.block_len) {
10761 ret = btrfs_add_swap_extent(sis, &bsi);
10762 if (ret)
10763 goto out;
10764 }
10765 bsi.start = start;
10766 bsi.block_start = physical_block_start;
10767 bsi.block_len = len;
10768 }
10769
10770 start += len;
10771 }
10772
10773 if (bsi.block_len)
10774 ret = btrfs_add_swap_extent(sis, &bsi);
10775
10776out:
10777 if (!IS_ERR_OR_NULL(em))
10778 free_extent_map(em);
10779
10780 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
10781
10782 if (ret)
10783 btrfs_swap_deactivate(file);
10784
10785 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
10786
10787 if (ret)
10788 return ret;
10789
10790 if (device)
10791 sis->bdev = device->bdev;
10792 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
10793 sis->max = bsi.nr_pages;
10794 sis->pages = bsi.nr_pages - 1;
10795 sis->highest_bit = bsi.nr_pages - 1;
10796 return bsi.nr_extents;
10797}
10798#else
10799static void btrfs_swap_deactivate(struct file *file)
10800{
10801}
10802
10803static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10804 sector_t *span)
10805{
10806 return -EOPNOTSUPP;
10807}
10808#endif
10809
10810static const struct inode_operations btrfs_dir_inode_operations = {
10811 .getattr = btrfs_getattr,
10812 .lookup = btrfs_lookup,
10813 .create = btrfs_create,
10814 .unlink = btrfs_unlink,
10815 .link = btrfs_link,
10816 .mkdir = btrfs_mkdir,
10817 .rmdir = btrfs_rmdir,
10818 .rename = btrfs_rename2,
10819 .symlink = btrfs_symlink,
10820 .setattr = btrfs_setattr,
10821 .mknod = btrfs_mknod,
10822 .listxattr = btrfs_listxattr,
10823 .permission = btrfs_permission,
10824 .get_acl = btrfs_get_acl,
10825 .set_acl = btrfs_set_acl,
10826 .update_time = btrfs_update_time,
10827 .tmpfile = btrfs_tmpfile,
10828};
10829static const struct inode_operations btrfs_dir_ro_inode_operations = {
10830 .lookup = btrfs_lookup,
10831 .permission = btrfs_permission,
10832 .update_time = btrfs_update_time,
10833};
10834
10835static const struct file_operations btrfs_dir_file_operations = {
10836 .llseek = generic_file_llseek,
10837 .read = generic_read_dir,
10838 .iterate_shared = btrfs_real_readdir,
10839 .open = btrfs_opendir,
10840 .unlocked_ioctl = btrfs_ioctl,
10841#ifdef CONFIG_COMPAT
10842 .compat_ioctl = btrfs_compat_ioctl,
10843#endif
10844 .release = btrfs_release_file,
10845 .fsync = btrfs_sync_file,
10846};
10847
10848static const struct extent_io_ops btrfs_extent_io_ops = {
10849
10850 .submit_bio_hook = btrfs_submit_bio_hook,
10851 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10852};
10853
10854
10855
10856
10857
10858
10859
10860
10861
10862
10863
10864
10865
10866static const struct address_space_operations btrfs_aops = {
10867 .readpage = btrfs_readpage,
10868 .writepage = btrfs_writepage,
10869 .writepages = btrfs_writepages,
10870 .readpages = btrfs_readpages,
10871 .direct_IO = btrfs_direct_IO,
10872 .invalidatepage = btrfs_invalidatepage,
10873 .releasepage = btrfs_releasepage,
10874 .set_page_dirty = btrfs_set_page_dirty,
10875 .error_remove_page = generic_error_remove_page,
10876 .swap_activate = btrfs_swap_activate,
10877 .swap_deactivate = btrfs_swap_deactivate,
10878};
10879
10880static const struct inode_operations btrfs_file_inode_operations = {
10881 .getattr = btrfs_getattr,
10882 .setattr = btrfs_setattr,
10883 .listxattr = btrfs_listxattr,
10884 .permission = btrfs_permission,
10885 .fiemap = btrfs_fiemap,
10886 .get_acl = btrfs_get_acl,
10887 .set_acl = btrfs_set_acl,
10888 .update_time = btrfs_update_time,
10889};
10890static const struct inode_operations btrfs_special_inode_operations = {
10891 .getattr = btrfs_getattr,
10892 .setattr = btrfs_setattr,
10893 .permission = btrfs_permission,
10894 .listxattr = btrfs_listxattr,
10895 .get_acl = btrfs_get_acl,
10896 .set_acl = btrfs_set_acl,
10897 .update_time = btrfs_update_time,
10898};
10899static const struct inode_operations btrfs_symlink_inode_operations = {
10900 .get_link = page_get_link,
10901 .getattr = btrfs_getattr,
10902 .setattr = btrfs_setattr,
10903 .permission = btrfs_permission,
10904 .listxattr = btrfs_listxattr,
10905 .update_time = btrfs_update_time,
10906};
10907
10908const struct dentry_operations btrfs_dentry_operations = {
10909 .d_delete = btrfs_dentry_delete,
10910};
10911