1
2
3
4
5
6#include <crypto/hash.h>
7#include <linux/kernel.h>
8#include <linux/bio.h>
9#include <linux/blk-cgroup.h>
10#include <linux/file.h>
11#include <linux/fs.h>
12#include <linux/pagemap.h>
13#include <linux/highmem.h>
14#include <linux/time.h>
15#include <linux/init.h>
16#include <linux/string.h>
17#include <linux/backing-dev.h>
18#include <linux/writeback.h>
19#include <linux/compat.h>
20#include <linux/xattr.h>
21#include <linux/posix_acl.h>
22#include <linux/falloc.h>
23#include <linux/slab.h>
24#include <linux/ratelimit.h>
25#include <linux/btrfs.h>
26#include <linux/blkdev.h>
27#include <linux/posix_acl_xattr.h>
28#include <linux/uio.h>
29#include <linux/magic.h>
30#include <linux/iversion.h>
31#include <linux/swap.h>
32#include <linux/migrate.h>
33#include <linux/sched/mm.h>
34#include <linux/iomap.h>
35#include <asm/unaligned.h>
36#include <linux/fsverity.h>
37#include "misc.h"
38#include "ctree.h"
39#include "disk-io.h"
40#include "transaction.h"
41#include "btrfs_inode.h"
42#include "print-tree.h"
43#include "ordered-data.h"
44#include "xattr.h"
45#include "tree-log.h"
46#include "volumes.h"
47#include "compression.h"
48#include "locking.h"
49#include "free-space-cache.h"
50#include "props.h"
51#include "qgroup.h"
52#include "delalloc-space.h"
53#include "block-group.h"
54#include "space-info.h"
55#include "zoned.h"
56#include "subpage.h"
57#include "inode-item.h"
58
59struct btrfs_iget_args {
60 u64 ino;
61 struct btrfs_root *root;
62};
63
64struct btrfs_dio_data {
65 ssize_t submitted;
66 struct extent_changeset *data_reserved;
67};
68
69struct btrfs_rename_ctx {
70
71 u64 index;
72};
73
74static const struct inode_operations btrfs_dir_inode_operations;
75static const struct inode_operations btrfs_symlink_inode_operations;
76static const struct inode_operations btrfs_special_inode_operations;
77static const struct inode_operations btrfs_file_inode_operations;
78static const struct address_space_operations btrfs_aops;
79static const struct file_operations btrfs_dir_file_operations;
80
81static struct kmem_cache *btrfs_inode_cachep;
82struct kmem_cache *btrfs_trans_handle_cachep;
83struct kmem_cache *btrfs_path_cachep;
84struct kmem_cache *btrfs_free_space_cachep;
85struct kmem_cache *btrfs_free_space_bitmap_cachep;
86
87static int btrfs_setsize(struct inode *inode, struct iattr *attr);
88static int btrfs_truncate(struct inode *inode, bool skip_writeback);
89static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
90static noinline int cow_file_range(struct btrfs_inode *inode,
91 struct page *locked_page,
92 u64 start, u64 end, int *page_started,
93 unsigned long *nr_written, int unlock);
94static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
95 u64 len, u64 orig_start, u64 block_start,
96 u64 block_len, u64 orig_block_len,
97 u64 ram_bytes, int compress_type,
98 int type);
99
100static void __endio_write_update_ordered(struct btrfs_inode *inode,
101 const u64 offset, const u64 bytes,
102 const bool uptodate);
103
104
105
106
107
108
109
110
111
112
113
114int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
115{
116 if (ilock_flags & BTRFS_ILOCK_SHARED) {
117 if (ilock_flags & BTRFS_ILOCK_TRY) {
118 if (!inode_trylock_shared(inode))
119 return -EAGAIN;
120 else
121 return 0;
122 }
123 inode_lock_shared(inode);
124 } else {
125 if (ilock_flags & BTRFS_ILOCK_TRY) {
126 if (!inode_trylock(inode))
127 return -EAGAIN;
128 else
129 return 0;
130 }
131 inode_lock(inode);
132 }
133 if (ilock_flags & BTRFS_ILOCK_MMAP)
134 down_write(&BTRFS_I(inode)->i_mmap_lock);
135 return 0;
136}
137
138
139
140
141
142
143
144void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
145{
146 if (ilock_flags & BTRFS_ILOCK_MMAP)
147 up_write(&BTRFS_I(inode)->i_mmap_lock);
148 if (ilock_flags & BTRFS_ILOCK_SHARED)
149 inode_unlock_shared(inode);
150 else
151 inode_unlock(inode);
152}
153
154
155
156
157
158
159
160
161
162
163
164static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
165 struct page *locked_page,
166 u64 offset, u64 bytes)
167{
168 unsigned long index = offset >> PAGE_SHIFT;
169 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
170 u64 page_start = page_offset(locked_page);
171 u64 page_end = page_start + PAGE_SIZE - 1;
172
173 struct page *page;
174
175 while (index <= end_index) {
176
177
178
179
180
181
182
183
184
185
186
187 if (index == (page_offset(locked_page) >> PAGE_SHIFT)) {
188 index++;
189 continue;
190 }
191 page = find_get_page(inode->vfs_inode.i_mapping, index);
192 index++;
193 if (!page)
194 continue;
195
196
197
198
199
200
201 btrfs_page_clamp_clear_ordered(inode->root->fs_info, page,
202 offset, bytes);
203 put_page(page);
204 }
205
206
207 if (bytes + offset <= page_offset(locked_page) + PAGE_SIZE)
208 return;
209
210
211
212
213
214 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
215 bytes = offset + bytes - page_offset(locked_page) - PAGE_SIZE;
216 offset = page_offset(locked_page) + PAGE_SIZE;
217 }
218
219 return __endio_write_update_ordered(inode, offset, bytes, false);
220}
221
222static int btrfs_dirty_inode(struct inode *inode);
223
224static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
225 struct inode *inode, struct inode *dir,
226 const struct qstr *qstr)
227{
228 int err;
229
230 err = btrfs_init_acl(trans, inode, dir);
231 if (!err)
232 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
233 return err;
234}
235
236
237
238
239
240
241static int insert_inline_extent(struct btrfs_trans_handle *trans,
242 struct btrfs_path *path,
243 struct btrfs_inode *inode, bool extent_inserted,
244 size_t size, size_t compressed_size,
245 int compress_type,
246 struct page **compressed_pages,
247 bool update_i_size)
248{
249 struct btrfs_root *root = inode->root;
250 struct extent_buffer *leaf;
251 struct page *page = NULL;
252 char *kaddr;
253 unsigned long ptr;
254 struct btrfs_file_extent_item *ei;
255 int ret;
256 size_t cur_size = size;
257 u64 i_size;
258
259 ASSERT((compressed_size > 0 && compressed_pages) ||
260 (compressed_size == 0 && !compressed_pages));
261
262 if (compressed_size && compressed_pages)
263 cur_size = compressed_size;
264
265 if (!extent_inserted) {
266 struct btrfs_key key;
267 size_t datasize;
268
269 key.objectid = btrfs_ino(inode);
270 key.offset = 0;
271 key.type = BTRFS_EXTENT_DATA_KEY;
272
273 datasize = btrfs_file_extent_calc_inline_size(cur_size);
274 ret = btrfs_insert_empty_item(trans, root, path, &key,
275 datasize);
276 if (ret)
277 goto fail;
278 }
279 leaf = path->nodes[0];
280 ei = btrfs_item_ptr(leaf, path->slots[0],
281 struct btrfs_file_extent_item);
282 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
283 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
284 btrfs_set_file_extent_encryption(leaf, ei, 0);
285 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
286 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
287 ptr = btrfs_file_extent_inline_start(ei);
288
289 if (compress_type != BTRFS_COMPRESS_NONE) {
290 struct page *cpage;
291 int i = 0;
292 while (compressed_size > 0) {
293 cpage = compressed_pages[i];
294 cur_size = min_t(unsigned long, compressed_size,
295 PAGE_SIZE);
296
297 kaddr = kmap_atomic(cpage);
298 write_extent_buffer(leaf, kaddr, ptr, cur_size);
299 kunmap_atomic(kaddr);
300
301 i++;
302 ptr += cur_size;
303 compressed_size -= cur_size;
304 }
305 btrfs_set_file_extent_compression(leaf, ei,
306 compress_type);
307 } else {
308 page = find_get_page(inode->vfs_inode.i_mapping, 0);
309 btrfs_set_file_extent_compression(leaf, ei, 0);
310 kaddr = kmap_atomic(page);
311 write_extent_buffer(leaf, kaddr, ptr, size);
312 kunmap_atomic(kaddr);
313 put_page(page);
314 }
315 btrfs_mark_buffer_dirty(leaf);
316 btrfs_release_path(path);
317
318
319
320
321
322 ret = btrfs_inode_set_file_extent_range(inode, 0,
323 ALIGN(size, root->fs_info->sectorsize));
324 if (ret)
325 goto fail;
326
327
328
329
330
331
332
333
334 i_size = i_size_read(&inode->vfs_inode);
335 if (update_i_size && size > i_size) {
336 i_size_write(&inode->vfs_inode, size);
337 i_size = size;
338 }
339 inode->disk_i_size = i_size;
340
341fail:
342 return ret;
343}
344
345
346
347
348
349
350
351static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
352 size_t compressed_size,
353 int compress_type,
354 struct page **compressed_pages,
355 bool update_i_size)
356{
357 struct btrfs_drop_extents_args drop_args = { 0 };
358 struct btrfs_root *root = inode->root;
359 struct btrfs_fs_info *fs_info = root->fs_info;
360 struct btrfs_trans_handle *trans;
361 u64 data_len = (compressed_size ?: size);
362 int ret;
363 struct btrfs_path *path;
364
365
366
367
368
369
370
371 if (size < i_size_read(&inode->vfs_inode) ||
372 size > fs_info->sectorsize ||
373 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
374 data_len > fs_info->max_inline)
375 return 1;
376
377 path = btrfs_alloc_path();
378 if (!path)
379 return -ENOMEM;
380
381 trans = btrfs_join_transaction(root);
382 if (IS_ERR(trans)) {
383 btrfs_free_path(path);
384 return PTR_ERR(trans);
385 }
386 trans->block_rsv = &inode->block_rsv;
387
388 drop_args.path = path;
389 drop_args.start = 0;
390 drop_args.end = fs_info->sectorsize;
391 drop_args.drop_cache = true;
392 drop_args.replace_extent = true;
393 drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(data_len);
394 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
395 if (ret) {
396 btrfs_abort_transaction(trans, ret);
397 goto out;
398 }
399
400 ret = insert_inline_extent(trans, path, inode, drop_args.extent_inserted,
401 size, compressed_size, compress_type,
402 compressed_pages, update_i_size);
403 if (ret && ret != -ENOSPC) {
404 btrfs_abort_transaction(trans, ret);
405 goto out;
406 } else if (ret == -ENOSPC) {
407 ret = 1;
408 goto out;
409 }
410
411 btrfs_update_inode_bytes(inode, size, drop_args.bytes_found);
412 ret = btrfs_update_inode(trans, root, inode);
413 if (ret && ret != -ENOSPC) {
414 btrfs_abort_transaction(trans, ret);
415 goto out;
416 } else if (ret == -ENOSPC) {
417 ret = 1;
418 goto out;
419 }
420
421 btrfs_set_inode_full_sync(inode);
422out:
423
424
425
426
427
428
429 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
430 btrfs_free_path(path);
431 btrfs_end_transaction(trans);
432 return ret;
433}
434
435struct async_extent {
436 u64 start;
437 u64 ram_size;
438 u64 compressed_size;
439 struct page **pages;
440 unsigned long nr_pages;
441 int compress_type;
442 struct list_head list;
443};
444
445struct async_chunk {
446 struct inode *inode;
447 struct page *locked_page;
448 u64 start;
449 u64 end;
450 unsigned int write_flags;
451 struct list_head extents;
452 struct cgroup_subsys_state *blkcg_css;
453 struct btrfs_work work;
454 struct async_cow *async_cow;
455};
456
457struct async_cow {
458 atomic_t num_chunks;
459 struct async_chunk chunks[];
460};
461
462static noinline int add_async_extent(struct async_chunk *cow,
463 u64 start, u64 ram_size,
464 u64 compressed_size,
465 struct page **pages,
466 unsigned long nr_pages,
467 int compress_type)
468{
469 struct async_extent *async_extent;
470
471 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
472 BUG_ON(!async_extent);
473 async_extent->start = start;
474 async_extent->ram_size = ram_size;
475 async_extent->compressed_size = compressed_size;
476 async_extent->pages = pages;
477 async_extent->nr_pages = nr_pages;
478 async_extent->compress_type = compress_type;
479 list_add_tail(&async_extent->list, &cow->extents);
480 return 0;
481}
482
483
484
485
486
487static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
488 u64 end)
489{
490 struct btrfs_fs_info *fs_info = inode->root->fs_info;
491
492 if (!btrfs_inode_can_compress(inode)) {
493 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
494 KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
495 btrfs_ino(inode));
496 return 0;
497 }
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524 if (fs_info->sectorsize < PAGE_SIZE) {
525 if (!IS_ALIGNED(start, PAGE_SIZE) ||
526 !IS_ALIGNED(end + 1, PAGE_SIZE))
527 return 0;
528 }
529
530
531 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
532 return 1;
533
534 if (inode->defrag_compress)
535 return 1;
536
537 if (inode->flags & BTRFS_INODE_NOCOMPRESS)
538 return 0;
539 if (btrfs_test_opt(fs_info, COMPRESS) ||
540 inode->flags & BTRFS_INODE_COMPRESS ||
541 inode->prop_compress)
542 return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
543 return 0;
544}
545
546static inline void inode_should_defrag(struct btrfs_inode *inode,
547 u64 start, u64 end, u64 num_bytes, u32 small_write)
548{
549
550 if (num_bytes < small_write &&
551 (start > 0 || end + 1 < inode->disk_i_size))
552 btrfs_add_inode_defrag(NULL, inode, small_write);
553}
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572static noinline int compress_file_range(struct async_chunk *async_chunk)
573{
574 struct inode *inode = async_chunk->inode;
575 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
576 u64 blocksize = fs_info->sectorsize;
577 u64 start = async_chunk->start;
578 u64 end = async_chunk->end;
579 u64 actual_end;
580 u64 i_size;
581 int ret = 0;
582 struct page **pages = NULL;
583 unsigned long nr_pages;
584 unsigned long total_compressed = 0;
585 unsigned long total_in = 0;
586 int i;
587 int will_compress;
588 int compress_type = fs_info->compress_type;
589 int compressed_extents = 0;
590 int redirty = 0;
591
592 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
593 SZ_16K);
594
595
596
597
598
599
600
601
602
603
604 barrier();
605 i_size = i_size_read(inode);
606 barrier();
607 actual_end = min_t(u64, i_size, end + 1);
608again:
609 will_compress = 0;
610 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
611 nr_pages = min_t(unsigned long, nr_pages,
612 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
613
614
615
616
617
618
619
620
621
622
623
624 if (actual_end <= start)
625 goto cleanup_and_bail_uncompressed;
626
627 total_compressed = actual_end - start;
628
629
630
631
632
633 if (total_compressed <= blocksize &&
634 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
635 goto cleanup_and_bail_uncompressed;
636
637
638
639
640
641
642 if (blocksize < PAGE_SIZE) {
643 if (!IS_ALIGNED(start, PAGE_SIZE) ||
644 !IS_ALIGNED(round_up(actual_end, blocksize), PAGE_SIZE))
645 goto cleanup_and_bail_uncompressed;
646 }
647
648 total_compressed = min_t(unsigned long, total_compressed,
649 BTRFS_MAX_UNCOMPRESSED);
650 total_in = 0;
651 ret = 0;
652
653
654
655
656
657
658 if (inode_need_compress(BTRFS_I(inode), start, end)) {
659 WARN_ON(pages);
660 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
661 if (!pages) {
662
663 nr_pages = 0;
664 goto cont;
665 }
666
667 if (BTRFS_I(inode)->defrag_compress)
668 compress_type = BTRFS_I(inode)->defrag_compress;
669 else if (BTRFS_I(inode)->prop_compress)
670 compress_type = BTRFS_I(inode)->prop_compress;
671
672
673
674
675
676
677
678
679
680
681
682
683
684 if (!redirty) {
685 extent_range_clear_dirty_for_io(inode, start, end);
686 redirty = 1;
687 }
688
689
690 ret = btrfs_compress_pages(
691 compress_type | (fs_info->compress_level << 4),
692 inode->i_mapping, start,
693 pages,
694 &nr_pages,
695 &total_in,
696 &total_compressed);
697
698 if (!ret) {
699 unsigned long offset = offset_in_page(total_compressed);
700 struct page *page = pages[nr_pages - 1];
701
702
703
704
705 if (offset)
706 memzero_page(page, offset, PAGE_SIZE - offset);
707 will_compress = 1;
708 }
709 }
710cont:
711
712
713
714
715 if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
716
717 if (ret || total_in < actual_end) {
718
719
720
721 ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
722 0, BTRFS_COMPRESS_NONE,
723 NULL, false);
724 } else {
725
726 ret = cow_file_range_inline(BTRFS_I(inode), actual_end,
727 total_compressed,
728 compress_type, pages,
729 false);
730 }
731 if (ret <= 0) {
732 unsigned long clear_flags = EXTENT_DELALLOC |
733 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
734 EXTENT_DO_ACCOUNTING;
735 unsigned long page_error_op;
736
737 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
738
739
740
741
742
743
744
745
746
747
748
749 extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
750 NULL,
751 clear_flags,
752 PAGE_UNLOCK |
753 PAGE_START_WRITEBACK |
754 page_error_op |
755 PAGE_END_WRITEBACK);
756
757
758
759
760
761
762 if (pages) {
763 for (i = 0; i < nr_pages; i++) {
764 WARN_ON(pages[i]->mapping);
765 put_page(pages[i]);
766 }
767 kfree(pages);
768 }
769 return 0;
770 }
771 }
772
773 if (will_compress) {
774
775
776
777
778
779 total_compressed = ALIGN(total_compressed, blocksize);
780
781
782
783
784
785
786 total_in = round_up(total_in, fs_info->sectorsize);
787 if (total_compressed + blocksize <= total_in) {
788 compressed_extents++;
789
790
791
792
793
794
795 add_async_extent(async_chunk, start, total_in,
796 total_compressed, pages, nr_pages,
797 compress_type);
798
799 if (start + total_in < end) {
800 start += total_in;
801 pages = NULL;
802 cond_resched();
803 goto again;
804 }
805 return compressed_extents;
806 }
807 }
808 if (pages) {
809
810
811
812
813 for (i = 0; i < nr_pages; i++) {
814 WARN_ON(pages[i]->mapping);
815 put_page(pages[i]);
816 }
817 kfree(pages);
818 pages = NULL;
819 total_compressed = 0;
820 nr_pages = 0;
821
822
823 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
824 !(BTRFS_I(inode)->prop_compress)) {
825 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
826 }
827 }
828cleanup_and_bail_uncompressed:
829
830
831
832
833
834
835 if (async_chunk->locked_page &&
836 (page_offset(async_chunk->locked_page) >= start &&
837 page_offset(async_chunk->locked_page)) <= end) {
838 __set_page_dirty_nobuffers(async_chunk->locked_page);
839
840 }
841
842 if (redirty)
843 extent_range_redirty_for_io(inode, start, end);
844 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
845 BTRFS_COMPRESS_NONE);
846 compressed_extents++;
847
848 return compressed_extents;
849}
850
851static void free_async_extent_pages(struct async_extent *async_extent)
852{
853 int i;
854
855 if (!async_extent->pages)
856 return;
857
858 for (i = 0; i < async_extent->nr_pages; i++) {
859 WARN_ON(async_extent->pages[i]->mapping);
860 put_page(async_extent->pages[i]);
861 }
862 kfree(async_extent->pages);
863 async_extent->nr_pages = 0;
864 async_extent->pages = NULL;
865}
866
867static int submit_uncompressed_range(struct btrfs_inode *inode,
868 struct async_extent *async_extent,
869 struct page *locked_page)
870{
871 u64 start = async_extent->start;
872 u64 end = async_extent->start + async_extent->ram_size - 1;
873 unsigned long nr_written = 0;
874 int page_started = 0;
875 int ret;
876
877
878
879
880
881
882
883
884 ret = cow_file_range(inode, locked_page, start, end, &page_started,
885 &nr_written, 0);
886
887 if (page_started) {
888 ret = 0;
889 goto out;
890 }
891 if (ret < 0) {
892 if (locked_page)
893 unlock_page(locked_page);
894 goto out;
895 }
896
897 ret = extent_write_locked_range(&inode->vfs_inode, start, end);
898
899out:
900 kfree(async_extent);
901 return ret;
902}
903
904static int submit_one_async_extent(struct btrfs_inode *inode,
905 struct async_chunk *async_chunk,
906 struct async_extent *async_extent,
907 u64 *alloc_hint)
908{
909 struct extent_io_tree *io_tree = &inode->io_tree;
910 struct btrfs_root *root = inode->root;
911 struct btrfs_fs_info *fs_info = root->fs_info;
912 struct btrfs_key ins;
913 struct page *locked_page = NULL;
914 struct extent_map *em;
915 int ret = 0;
916 u64 start = async_extent->start;
917 u64 end = async_extent->start + async_extent->ram_size - 1;
918
919
920
921
922
923 if (async_chunk->locked_page) {
924 u64 locked_page_start = page_offset(async_chunk->locked_page);
925 u64 locked_page_end = locked_page_start + PAGE_SIZE - 1;
926
927 if (!(start >= locked_page_end || end <= locked_page_start))
928 locked_page = async_chunk->locked_page;
929 }
930 lock_extent(io_tree, start, end);
931
932
933 if (!async_extent->pages)
934 return submit_uncompressed_range(inode, async_extent, locked_page);
935
936 ret = btrfs_reserve_extent(root, async_extent->ram_size,
937 async_extent->compressed_size,
938 async_extent->compressed_size,
939 0, *alloc_hint, &ins, 1, 1);
940 if (ret) {
941 free_async_extent_pages(async_extent);
942
943
944
945
946
947
948
949 goto out_free;
950 }
951
952
953 em = create_io_em(inode, start,
954 async_extent->ram_size,
955 start,
956 ins.objectid,
957 ins.offset,
958 ins.offset,
959 async_extent->ram_size,
960 async_extent->compress_type,
961 BTRFS_ORDERED_COMPRESSED);
962 if (IS_ERR(em)) {
963 ret = PTR_ERR(em);
964 goto out_free_reserve;
965 }
966 free_extent_map(em);
967
968 ret = btrfs_add_ordered_extent(inode, start,
969 async_extent->ram_size,
970 async_extent->ram_size,
971 ins.objectid,
972 ins.offset,
973 0,
974 1 << BTRFS_ORDERED_COMPRESSED,
975 async_extent->compress_type);
976 if (ret) {
977 btrfs_drop_extent_cache(inode, start, end, 0);
978 goto out_free_reserve;
979 }
980 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
981
982
983 extent_clear_unlock_delalloc(inode, start, end,
984 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
985 PAGE_UNLOCK | PAGE_START_WRITEBACK);
986 if (btrfs_submit_compressed_write(inode, start,
987 async_extent->ram_size,
988 ins.objectid,
989 ins.offset,
990 async_extent->pages,
991 async_extent->nr_pages,
992 async_chunk->write_flags,
993 async_chunk->blkcg_css, true)) {
994 const u64 start = async_extent->start;
995 const u64 end = start + async_extent->ram_size - 1;
996
997 btrfs_writepage_endio_finish_ordered(inode, NULL, start, end, 0);
998
999 extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
1000 PAGE_END_WRITEBACK | PAGE_SET_ERROR);
1001 free_async_extent_pages(async_extent);
1002 }
1003 *alloc_hint = ins.objectid + ins.offset;
1004 kfree(async_extent);
1005 return ret;
1006
1007out_free_reserve:
1008 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1009 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1010out_free:
1011 extent_clear_unlock_delalloc(inode, start, end,
1012 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
1013 EXTENT_DELALLOC_NEW |
1014 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
1015 PAGE_UNLOCK | PAGE_START_WRITEBACK |
1016 PAGE_END_WRITEBACK | PAGE_SET_ERROR);
1017 free_async_extent_pages(async_extent);
1018 kfree(async_extent);
1019 return ret;
1020}
1021
1022
1023
1024
1025
1026
1027static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
1028{
1029 struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
1030 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1031 struct async_extent *async_extent;
1032 u64 alloc_hint = 0;
1033 int ret = 0;
1034
1035 while (!list_empty(&async_chunk->extents)) {
1036 u64 extent_start;
1037 u64 ram_size;
1038
1039 async_extent = list_entry(async_chunk->extents.next,
1040 struct async_extent, list);
1041 list_del(&async_extent->list);
1042 extent_start = async_extent->start;
1043 ram_size = async_extent->ram_size;
1044
1045 ret = submit_one_async_extent(inode, async_chunk, async_extent,
1046 &alloc_hint);
1047 btrfs_debug(fs_info,
1048"async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
1049 inode->root->root_key.objectid,
1050 btrfs_ino(inode), extent_start, ram_size, ret);
1051 }
1052}
1053
1054static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
1055 u64 num_bytes)
1056{
1057 struct extent_map_tree *em_tree = &inode->extent_tree;
1058 struct extent_map *em;
1059 u64 alloc_hint = 0;
1060
1061 read_lock(&em_tree->lock);
1062 em = search_extent_mapping(em_tree, start, num_bytes);
1063 if (em) {
1064
1065
1066
1067
1068
1069 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1070 free_extent_map(em);
1071 em = search_extent_mapping(em_tree, 0, 0);
1072 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
1073 alloc_hint = em->block_start;
1074 if (em)
1075 free_extent_map(em);
1076 } else {
1077 alloc_hint = em->block_start;
1078 free_extent_map(em);
1079 }
1080 }
1081 read_unlock(&em_tree->lock);
1082
1083 return alloc_hint;
1084}
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099static noinline int cow_file_range(struct btrfs_inode *inode,
1100 struct page *locked_page,
1101 u64 start, u64 end, int *page_started,
1102 unsigned long *nr_written, int unlock)
1103{
1104 struct btrfs_root *root = inode->root;
1105 struct btrfs_fs_info *fs_info = root->fs_info;
1106 u64 alloc_hint = 0;
1107 u64 num_bytes;
1108 unsigned long ram_size;
1109 u64 cur_alloc_size = 0;
1110 u64 min_alloc_size;
1111 u64 blocksize = fs_info->sectorsize;
1112 struct btrfs_key ins;
1113 struct extent_map *em;
1114 unsigned clear_bits;
1115 unsigned long page_ops;
1116 bool extent_reserved = false;
1117 int ret = 0;
1118
1119 if (btrfs_is_free_space_inode(inode)) {
1120 ret = -EINVAL;
1121 goto out_unlock;
1122 }
1123
1124 num_bytes = ALIGN(end - start + 1, blocksize);
1125 num_bytes = max(blocksize, num_bytes);
1126 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
1127
1128 inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140 if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
1141 u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
1142 end + 1);
1143
1144
1145 ret = cow_file_range_inline(inode, actual_end, 0,
1146 BTRFS_COMPRESS_NONE, NULL, false);
1147 if (ret == 0) {
1148
1149
1150
1151
1152
1153
1154 extent_clear_unlock_delalloc(inode, start, end,
1155 locked_page,
1156 EXTENT_LOCKED | EXTENT_DELALLOC |
1157 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1158 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1159 PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
1160 *nr_written = *nr_written +
1161 (end - start + PAGE_SIZE) / PAGE_SIZE;
1162 *page_started = 1;
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175 unlock_page(locked_page);
1176 goto out;
1177 } else if (ret < 0) {
1178 goto out_unlock;
1179 }
1180 }
1181
1182 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
1183 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196 if (btrfs_is_data_reloc_root(root))
1197 min_alloc_size = num_bytes;
1198 else
1199 min_alloc_size = fs_info->sectorsize;
1200
1201 while (num_bytes > 0) {
1202 cur_alloc_size = num_bytes;
1203 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
1204 min_alloc_size, 0, alloc_hint,
1205 &ins, 1, 1);
1206 if (ret < 0)
1207 goto out_unlock;
1208 cur_alloc_size = ins.offset;
1209 extent_reserved = true;
1210
1211 ram_size = ins.offset;
1212 em = create_io_em(inode, start, ins.offset,
1213 start,
1214 ins.objectid,
1215 ins.offset,
1216 ins.offset,
1217 ram_size,
1218 BTRFS_COMPRESS_NONE,
1219 BTRFS_ORDERED_REGULAR );
1220 if (IS_ERR(em)) {
1221 ret = PTR_ERR(em);
1222 goto out_reserve;
1223 }
1224 free_extent_map(em);
1225
1226 ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
1227 ins.objectid, cur_alloc_size, 0,
1228 1 << BTRFS_ORDERED_REGULAR,
1229 BTRFS_COMPRESS_NONE);
1230 if (ret)
1231 goto out_drop_extent_cache;
1232
1233 if (btrfs_is_data_reloc_root(root)) {
1234 ret = btrfs_reloc_clone_csums(inode, start,
1235 cur_alloc_size);
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247 if (ret)
1248 btrfs_drop_extent_cache(inode, start,
1249 start + ram_size - 1, 0);
1250 }
1251
1252 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262 page_ops = unlock ? PAGE_UNLOCK : 0;
1263 page_ops |= PAGE_SET_ORDERED;
1264
1265 extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
1266 locked_page,
1267 EXTENT_LOCKED | EXTENT_DELALLOC,
1268 page_ops);
1269 if (num_bytes < cur_alloc_size)
1270 num_bytes = 0;
1271 else
1272 num_bytes -= cur_alloc_size;
1273 alloc_hint = ins.objectid + ins.offset;
1274 start += cur_alloc_size;
1275 extent_reserved = false;
1276
1277
1278
1279
1280
1281
1282 if (ret)
1283 goto out_unlock;
1284 }
1285out:
1286 return ret;
1287
1288out_drop_extent_cache:
1289 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1290out_reserve:
1291 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1292 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1293out_unlock:
1294 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1295 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1296 page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307 if (extent_reserved) {
1308 extent_clear_unlock_delalloc(inode, start,
1309 start + cur_alloc_size - 1,
1310 locked_page,
1311 clear_bits,
1312 page_ops);
1313 start += cur_alloc_size;
1314 if (start >= end)
1315 goto out;
1316 }
1317 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1318 clear_bits | EXTENT_CLEAR_DATA_RESV,
1319 page_ops);
1320 goto out;
1321}
1322
1323
1324
1325
1326static noinline void async_cow_start(struct btrfs_work *work)
1327{
1328 struct async_chunk *async_chunk;
1329 int compressed_extents;
1330
1331 async_chunk = container_of(work, struct async_chunk, work);
1332
1333 compressed_extents = compress_file_range(async_chunk);
1334 if (compressed_extents == 0) {
1335 btrfs_add_delayed_iput(async_chunk->inode);
1336 async_chunk->inode = NULL;
1337 }
1338}
1339
1340
1341
1342
1343static noinline void async_cow_submit(struct btrfs_work *work)
1344{
1345 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1346 work);
1347 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1348 unsigned long nr_pages;
1349
1350 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1351 PAGE_SHIFT;
1352
1353
1354
1355
1356
1357
1358
1359 if (async_chunk->inode)
1360 submit_compressed_extents(async_chunk);
1361
1362
1363 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1364 5 * SZ_1M)
1365 cond_wake_up_nomb(&fs_info->async_submit_wait);
1366}
1367
1368static noinline void async_cow_free(struct btrfs_work *work)
1369{
1370 struct async_chunk *async_chunk;
1371 struct async_cow *async_cow;
1372
1373 async_chunk = container_of(work, struct async_chunk, work);
1374 if (async_chunk->inode)
1375 btrfs_add_delayed_iput(async_chunk->inode);
1376 if (async_chunk->blkcg_css)
1377 css_put(async_chunk->blkcg_css);
1378
1379 async_cow = async_chunk->async_cow;
1380 if (atomic_dec_and_test(&async_cow->num_chunks))
1381 kvfree(async_cow);
1382}
1383
1384static int cow_file_range_async(struct btrfs_inode *inode,
1385 struct writeback_control *wbc,
1386 struct page *locked_page,
1387 u64 start, u64 end, int *page_started,
1388 unsigned long *nr_written)
1389{
1390 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1391 struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
1392 struct async_cow *ctx;
1393 struct async_chunk *async_chunk;
1394 unsigned long nr_pages;
1395 u64 cur_end;
1396 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1397 int i;
1398 bool should_compress;
1399 unsigned nofs_flag;
1400 const unsigned int write_flags = wbc_to_write_flags(wbc);
1401
1402 unlock_extent(&inode->io_tree, start, end);
1403
1404 if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
1405 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1406 num_chunks = 1;
1407 should_compress = false;
1408 } else {
1409 should_compress = true;
1410 }
1411
1412 nofs_flag = memalloc_nofs_save();
1413 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1414 memalloc_nofs_restore(nofs_flag);
1415
1416 if (!ctx) {
1417 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1418 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1419 EXTENT_DO_ACCOUNTING;
1420 unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
1421 PAGE_END_WRITEBACK | PAGE_SET_ERROR;
1422
1423 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1424 clear_bits, page_ops);
1425 return -ENOMEM;
1426 }
1427
1428 async_chunk = ctx->chunks;
1429 atomic_set(&ctx->num_chunks, num_chunks);
1430
1431 for (i = 0; i < num_chunks; i++) {
1432 if (should_compress)
1433 cur_end = min(end, start + SZ_512K - 1);
1434 else
1435 cur_end = end;
1436
1437
1438
1439
1440
1441 ihold(&inode->vfs_inode);
1442 async_chunk[i].async_cow = ctx;
1443 async_chunk[i].inode = &inode->vfs_inode;
1444 async_chunk[i].start = start;
1445 async_chunk[i].end = cur_end;
1446 async_chunk[i].write_flags = write_flags;
1447 INIT_LIST_HEAD(&async_chunk[i].extents);
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458 if (locked_page) {
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468 wbc_account_cgroup_owner(wbc, locked_page,
1469 cur_end - start);
1470 async_chunk[i].locked_page = locked_page;
1471 locked_page = NULL;
1472 } else {
1473 async_chunk[i].locked_page = NULL;
1474 }
1475
1476 if (blkcg_css != blkcg_root_css) {
1477 css_get(blkcg_css);
1478 async_chunk[i].blkcg_css = blkcg_css;
1479 } else {
1480 async_chunk[i].blkcg_css = NULL;
1481 }
1482
1483 btrfs_init_work(&async_chunk[i].work, async_cow_start,
1484 async_cow_submit, async_cow_free);
1485
1486 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1487 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1488
1489 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1490
1491 *nr_written += nr_pages;
1492 start = cur_end + 1;
1493 }
1494 *page_started = 1;
1495 return 0;
1496}
1497
1498static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
1499 struct page *locked_page, u64 start,
1500 u64 end, int *page_started,
1501 unsigned long *nr_written)
1502{
1503 int ret;
1504
1505 ret = cow_file_range(inode, locked_page, start, end, page_started,
1506 nr_written, 0);
1507 if (ret)
1508 return ret;
1509
1510 if (*page_started)
1511 return 0;
1512
1513 __set_page_dirty_nobuffers(locked_page);
1514 account_page_redirty(locked_page);
1515 extent_write_locked_range(&inode->vfs_inode, start, end);
1516 *page_started = 1;
1517
1518 return 0;
1519}
1520
1521static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1522 u64 bytenr, u64 num_bytes)
1523{
1524 struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bytenr);
1525 struct btrfs_ordered_sum *sums;
1526 int ret;
1527 LIST_HEAD(list);
1528
1529 ret = btrfs_lookup_csums_range(csum_root, bytenr,
1530 bytenr + num_bytes - 1, &list, 0);
1531 if (ret == 0 && list_empty(&list))
1532 return 0;
1533
1534 while (!list_empty(&list)) {
1535 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1536 list_del(&sums->list);
1537 kfree(sums);
1538 }
1539 if (ret < 0)
1540 return ret;
1541 return 1;
1542}
1543
1544static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
1545 const u64 start, const u64 end,
1546 int *page_started, unsigned long *nr_written)
1547{
1548 const bool is_space_ino = btrfs_is_free_space_inode(inode);
1549 const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
1550 const u64 range_bytes = end + 1 - start;
1551 struct extent_io_tree *io_tree = &inode->io_tree;
1552 u64 range_start = start;
1553 u64 count;
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587 count = count_range_bits(io_tree, &range_start, end, range_bytes,
1588 EXTENT_NORESERVE, 0);
1589 if (count > 0 || is_space_ino || is_reloc_ino) {
1590 u64 bytes = count;
1591 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1592 struct btrfs_space_info *sinfo = fs_info->data_sinfo;
1593
1594 if (is_space_ino || is_reloc_ino)
1595 bytes = range_bytes;
1596
1597 spin_lock(&sinfo->lock);
1598 btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
1599 spin_unlock(&sinfo->lock);
1600
1601 if (count > 0)
1602 clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
1603 0, 0, NULL);
1604 }
1605
1606 return cow_file_range(inode, locked_page, start, end, page_started,
1607 nr_written, 1);
1608}
1609
1610
1611
1612
1613
1614
1615
1616
1617static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
1618 struct page *locked_page,
1619 const u64 start, const u64 end,
1620 int *page_started,
1621 unsigned long *nr_written)
1622{
1623 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1624 struct btrfs_root *root = inode->root;
1625 struct btrfs_path *path;
1626 u64 cow_start = (u64)-1;
1627 u64 cur_offset = start;
1628 int ret;
1629 bool check_prev = true;
1630 const bool freespace_inode = btrfs_is_free_space_inode(inode);
1631 u64 ino = btrfs_ino(inode);
1632 bool nocow = false;
1633 u64 disk_bytenr = 0;
1634 const bool force = inode->flags & BTRFS_INODE_NODATACOW;
1635
1636 path = btrfs_alloc_path();
1637 if (!path) {
1638 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1639 EXTENT_LOCKED | EXTENT_DELALLOC |
1640 EXTENT_DO_ACCOUNTING |
1641 EXTENT_DEFRAG, PAGE_UNLOCK |
1642 PAGE_START_WRITEBACK |
1643 PAGE_END_WRITEBACK);
1644 return -ENOMEM;
1645 }
1646
1647 while (1) {
1648 struct btrfs_key found_key;
1649 struct btrfs_file_extent_item *fi;
1650 struct extent_buffer *leaf;
1651 u64 extent_end;
1652 u64 extent_offset;
1653 u64 num_bytes = 0;
1654 u64 disk_num_bytes;
1655 u64 ram_bytes;
1656 int extent_type;
1657
1658 nocow = false;
1659
1660 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1661 cur_offset, 0);
1662 if (ret < 0)
1663 goto error;
1664
1665
1666
1667
1668
1669
1670 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1671 leaf = path->nodes[0];
1672 btrfs_item_key_to_cpu(leaf, &found_key,
1673 path->slots[0] - 1);
1674 if (found_key.objectid == ino &&
1675 found_key.type == BTRFS_EXTENT_DATA_KEY)
1676 path->slots[0]--;
1677 }
1678 check_prev = false;
1679next_slot:
1680
1681 leaf = path->nodes[0];
1682 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1683 ret = btrfs_next_leaf(root, path);
1684 if (ret < 0) {
1685 if (cow_start != (u64)-1)
1686 cur_offset = cow_start;
1687 goto error;
1688 }
1689 if (ret > 0)
1690 break;
1691 leaf = path->nodes[0];
1692 }
1693
1694 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1695
1696
1697 if (found_key.objectid > ino)
1698 break;
1699
1700
1701
1702
1703 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1704 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1705 path->slots[0]++;
1706 goto next_slot;
1707 }
1708
1709
1710 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1711 found_key.offset > end)
1712 break;
1713
1714
1715
1716
1717
1718 if (found_key.offset > cur_offset) {
1719 extent_end = found_key.offset;
1720 extent_type = 0;
1721 goto out_check;
1722 }
1723
1724
1725
1726
1727
1728 fi = btrfs_item_ptr(leaf, path->slots[0],
1729 struct btrfs_file_extent_item);
1730 extent_type = btrfs_file_extent_type(leaf, fi);
1731
1732 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1733 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1734 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1735 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1736 extent_offset = btrfs_file_extent_offset(leaf, fi);
1737 extent_end = found_key.offset +
1738 btrfs_file_extent_num_bytes(leaf, fi);
1739 disk_num_bytes =
1740 btrfs_file_extent_disk_num_bytes(leaf, fi);
1741
1742
1743
1744
1745 if (extent_end <= cur_offset) {
1746 path->slots[0]++;
1747 goto next_slot;
1748 }
1749
1750 if (disk_bytenr == 0)
1751 goto out_check;
1752
1753 if (btrfs_file_extent_compression(leaf, fi) ||
1754 btrfs_file_extent_encryption(leaf, fi) ||
1755 btrfs_file_extent_other_encoding(leaf, fi))
1756 goto out_check;
1757
1758
1759
1760
1761
1762
1763
1764 if (!freespace_inode &&
1765 btrfs_file_extent_generation(leaf, fi) <=
1766 btrfs_root_last_snapshot(&root->root_item))
1767 goto out_check;
1768 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1769 goto out_check;
1770
1771
1772
1773
1774
1775
1776
1777 btrfs_release_path(path);
1778
1779 ret = btrfs_cross_ref_exist(root, ino,
1780 found_key.offset -
1781 extent_offset, disk_bytenr, false);
1782 if (ret) {
1783
1784
1785
1786
1787 if (ret < 0) {
1788 if (cow_start != (u64)-1)
1789 cur_offset = cow_start;
1790 goto error;
1791 }
1792
1793 WARN_ON_ONCE(freespace_inode);
1794 goto out_check;
1795 }
1796 disk_bytenr += extent_offset;
1797 disk_bytenr += cur_offset - found_key.offset;
1798 num_bytes = min(end + 1, extent_end) - cur_offset;
1799
1800
1801
1802
1803 if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
1804 goto out_check;
1805
1806
1807
1808
1809
1810 ret = csum_exist_in_range(fs_info, disk_bytenr,
1811 num_bytes);
1812 if (ret) {
1813
1814
1815
1816
1817 if (ret < 0) {
1818 if (cow_start != (u64)-1)
1819 cur_offset = cow_start;
1820 goto error;
1821 }
1822 WARN_ON_ONCE(freespace_inode);
1823 goto out_check;
1824 }
1825
1826 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
1827 goto out_check;
1828 nocow = true;
1829 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1830 extent_end = found_key.offset + ram_bytes;
1831 extent_end = ALIGN(extent_end, fs_info->sectorsize);
1832
1833 if (extent_end <= start) {
1834 path->slots[0]++;
1835 goto next_slot;
1836 }
1837 } else {
1838
1839 BUG();
1840 }
1841out_check:
1842
1843
1844
1845
1846 if (!nocow) {
1847 if (cow_start == (u64)-1)
1848 cow_start = cur_offset;
1849 cur_offset = extent_end;
1850 if (cur_offset > end)
1851 break;
1852 if (!path->nodes[0])
1853 continue;
1854 path->slots[0]++;
1855 goto next_slot;
1856 }
1857
1858
1859
1860
1861
1862
1863 if (cow_start != (u64)-1) {
1864 ret = fallback_to_cow(inode, locked_page,
1865 cow_start, found_key.offset - 1,
1866 page_started, nr_written);
1867 if (ret)
1868 goto error;
1869 cow_start = (u64)-1;
1870 }
1871
1872 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1873 u64 orig_start = found_key.offset - extent_offset;
1874 struct extent_map *em;
1875
1876 em = create_io_em(inode, cur_offset, num_bytes,
1877 orig_start,
1878 disk_bytenr,
1879 num_bytes,
1880 disk_num_bytes,
1881 ram_bytes, BTRFS_COMPRESS_NONE,
1882 BTRFS_ORDERED_PREALLOC);
1883 if (IS_ERR(em)) {
1884 ret = PTR_ERR(em);
1885 goto error;
1886 }
1887 free_extent_map(em);
1888 ret = btrfs_add_ordered_extent(inode,
1889 cur_offset, num_bytes, num_bytes,
1890 disk_bytenr, num_bytes, 0,
1891 1 << BTRFS_ORDERED_PREALLOC,
1892 BTRFS_COMPRESS_NONE);
1893 if (ret) {
1894 btrfs_drop_extent_cache(inode, cur_offset,
1895 cur_offset + num_bytes - 1,
1896 0);
1897 goto error;
1898 }
1899 } else {
1900 ret = btrfs_add_ordered_extent(inode, cur_offset,
1901 num_bytes, num_bytes,
1902 disk_bytenr, num_bytes,
1903 0,
1904 1 << BTRFS_ORDERED_NOCOW,
1905 BTRFS_COMPRESS_NONE);
1906 if (ret)
1907 goto error;
1908 }
1909
1910 if (nocow)
1911 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1912 nocow = false;
1913
1914 if (btrfs_is_data_reloc_root(root))
1915
1916
1917
1918
1919
1920 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1921 num_bytes);
1922
1923 extent_clear_unlock_delalloc(inode, cur_offset,
1924 cur_offset + num_bytes - 1,
1925 locked_page, EXTENT_LOCKED |
1926 EXTENT_DELALLOC |
1927 EXTENT_CLEAR_DATA_RESV,
1928 PAGE_UNLOCK | PAGE_SET_ORDERED);
1929
1930 cur_offset = extent_end;
1931
1932
1933
1934
1935
1936
1937 if (ret)
1938 goto error;
1939 if (cur_offset > end)
1940 break;
1941 }
1942 btrfs_release_path(path);
1943
1944 if (cur_offset <= end && cow_start == (u64)-1)
1945 cow_start = cur_offset;
1946
1947 if (cow_start != (u64)-1) {
1948 cur_offset = end;
1949 ret = fallback_to_cow(inode, locked_page, cow_start, end,
1950 page_started, nr_written);
1951 if (ret)
1952 goto error;
1953 }
1954
1955error:
1956 if (nocow)
1957 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1958
1959 if (ret && cur_offset < end)
1960 extent_clear_unlock_delalloc(inode, cur_offset, end,
1961 locked_page, EXTENT_LOCKED |
1962 EXTENT_DELALLOC | EXTENT_DEFRAG |
1963 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1964 PAGE_START_WRITEBACK |
1965 PAGE_END_WRITEBACK);
1966 btrfs_free_path(path);
1967 return ret;
1968}
1969
1970static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
1971{
1972 if (inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) {
1973 if (inode->defrag_bytes &&
1974 test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG,
1975 0, NULL))
1976 return false;
1977 return true;
1978 }
1979 return false;
1980}
1981
1982
1983
1984
1985
1986int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
1987 u64 start, u64 end, int *page_started, unsigned long *nr_written,
1988 struct writeback_control *wbc)
1989{
1990 int ret;
1991 const bool zoned = btrfs_is_zoned(inode->root->fs_info);
1992
1993
1994
1995
1996
1997 ASSERT(!(end <= page_offset(locked_page) ||
1998 start >= page_offset(locked_page) + PAGE_SIZE));
1999
2000 if (should_nocow(inode, start, end)) {
2001
2002
2003
2004
2005
2006
2007
2008 ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
2009 ret = run_delalloc_nocow(inode, locked_page, start, end,
2010 page_started, nr_written);
2011 } else if (!btrfs_inode_can_compress(inode) ||
2012 !inode_need_compress(inode, start, end)) {
2013 if (zoned)
2014 ret = run_delalloc_zoned(inode, locked_page, start, end,
2015 page_started, nr_written);
2016 else
2017 ret = cow_file_range(inode, locked_page, start, end,
2018 page_started, nr_written, 1);
2019 } else {
2020 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
2021 ret = cow_file_range_async(inode, wbc, locked_page, start, end,
2022 page_started, nr_written);
2023 }
2024 ASSERT(ret <= 0);
2025 if (ret)
2026 btrfs_cleanup_ordered_extents(inode, locked_page, start,
2027 end - start + 1);
2028 return ret;
2029}
2030
2031void btrfs_split_delalloc_extent(struct inode *inode,
2032 struct extent_state *orig, u64 split)
2033{
2034 u64 size;
2035
2036
2037 if (!(orig->state & EXTENT_DELALLOC))
2038 return;
2039
2040 size = orig->end - orig->start + 1;
2041 if (size > BTRFS_MAX_EXTENT_SIZE) {
2042 u32 num_extents;
2043 u64 new_size;
2044
2045
2046
2047
2048
2049 new_size = orig->end - split + 1;
2050 num_extents = count_max_extents(new_size);
2051 new_size = split - orig->start;
2052 num_extents += count_max_extents(new_size);
2053 if (count_max_extents(size) >= num_extents)
2054 return;
2055 }
2056
2057 spin_lock(&BTRFS_I(inode)->lock);
2058 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
2059 spin_unlock(&BTRFS_I(inode)->lock);
2060}
2061
2062
2063
2064
2065
2066
2067void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
2068 struct extent_state *other)
2069{
2070 u64 new_size, old_size;
2071 u32 num_extents;
2072
2073
2074 if (!(other->state & EXTENT_DELALLOC))
2075 return;
2076
2077 if (new->start > other->start)
2078 new_size = new->end - other->start + 1;
2079 else
2080 new_size = other->end - new->start + 1;
2081
2082
2083 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
2084 spin_lock(&BTRFS_I(inode)->lock);
2085 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
2086 spin_unlock(&BTRFS_I(inode)->lock);
2087 return;
2088 }
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108 old_size = other->end - other->start + 1;
2109 num_extents = count_max_extents(old_size);
2110 old_size = new->end - new->start + 1;
2111 num_extents += count_max_extents(old_size);
2112 if (count_max_extents(new_size) >= num_extents)
2113 return;
2114
2115 spin_lock(&BTRFS_I(inode)->lock);
2116 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
2117 spin_unlock(&BTRFS_I(inode)->lock);
2118}
2119
2120static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
2121 struct inode *inode)
2122{
2123 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2124
2125 spin_lock(&root->delalloc_lock);
2126 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
2127 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
2128 &root->delalloc_inodes);
2129 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2130 &BTRFS_I(inode)->runtime_flags);
2131 root->nr_delalloc_inodes++;
2132 if (root->nr_delalloc_inodes == 1) {
2133 spin_lock(&fs_info->delalloc_root_lock);
2134 BUG_ON(!list_empty(&root->delalloc_root));
2135 list_add_tail(&root->delalloc_root,
2136 &fs_info->delalloc_roots);
2137 spin_unlock(&fs_info->delalloc_root_lock);
2138 }
2139 }
2140 spin_unlock(&root->delalloc_lock);
2141}
2142
2143
2144void __btrfs_del_delalloc_inode(struct btrfs_root *root,
2145 struct btrfs_inode *inode)
2146{
2147 struct btrfs_fs_info *fs_info = root->fs_info;
2148
2149 if (!list_empty(&inode->delalloc_inodes)) {
2150 list_del_init(&inode->delalloc_inodes);
2151 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2152 &inode->runtime_flags);
2153 root->nr_delalloc_inodes--;
2154 if (!root->nr_delalloc_inodes) {
2155 ASSERT(list_empty(&root->delalloc_inodes));
2156 spin_lock(&fs_info->delalloc_root_lock);
2157 BUG_ON(list_empty(&root->delalloc_root));
2158 list_del_init(&root->delalloc_root);
2159 spin_unlock(&fs_info->delalloc_root_lock);
2160 }
2161 }
2162}
2163
2164static void btrfs_del_delalloc_inode(struct btrfs_root *root,
2165 struct btrfs_inode *inode)
2166{
2167 spin_lock(&root->delalloc_lock);
2168 __btrfs_del_delalloc_inode(root, inode);
2169 spin_unlock(&root->delalloc_lock);
2170}
2171
2172
2173
2174
2175
2176void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
2177 unsigned *bits)
2178{
2179 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2180
2181 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
2182 WARN_ON(1);
2183
2184
2185
2186
2187
2188 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
2189 struct btrfs_root *root = BTRFS_I(inode)->root;
2190 u64 len = state->end + 1 - state->start;
2191 u32 num_extents = count_max_extents(len);
2192 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
2193
2194 spin_lock(&BTRFS_I(inode)->lock);
2195 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
2196 spin_unlock(&BTRFS_I(inode)->lock);
2197
2198
2199 if (btrfs_is_testing(fs_info))
2200 return;
2201
2202 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
2203 fs_info->delalloc_batch);
2204 spin_lock(&BTRFS_I(inode)->lock);
2205 BTRFS_I(inode)->delalloc_bytes += len;
2206 if (*bits & EXTENT_DEFRAG)
2207 BTRFS_I(inode)->defrag_bytes += len;
2208 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2209 &BTRFS_I(inode)->runtime_flags))
2210 btrfs_add_delalloc_inodes(root, inode);
2211 spin_unlock(&BTRFS_I(inode)->lock);
2212 }
2213
2214 if (!(state->state & EXTENT_DELALLOC_NEW) &&
2215 (*bits & EXTENT_DELALLOC_NEW)) {
2216 spin_lock(&BTRFS_I(inode)->lock);
2217 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
2218 state->start;
2219 spin_unlock(&BTRFS_I(inode)->lock);
2220 }
2221}
2222
2223
2224
2225
2226
2227void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
2228 struct extent_state *state, unsigned *bits)
2229{
2230 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
2231 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
2232 u64 len = state->end + 1 - state->start;
2233 u32 num_extents = count_max_extents(len);
2234
2235 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
2236 spin_lock(&inode->lock);
2237 inode->defrag_bytes -= len;
2238 spin_unlock(&inode->lock);
2239 }
2240
2241
2242
2243
2244
2245
2246 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
2247 struct btrfs_root *root = inode->root;
2248 bool do_list = !btrfs_is_free_space_inode(inode);
2249
2250 spin_lock(&inode->lock);
2251 btrfs_mod_outstanding_extents(inode, -num_extents);
2252 spin_unlock(&inode->lock);
2253
2254
2255
2256
2257
2258
2259 if (*bits & EXTENT_CLEAR_META_RESV &&
2260 root != fs_info->tree_root)
2261 btrfs_delalloc_release_metadata(inode, len, false);
2262
2263
2264 if (btrfs_is_testing(fs_info))
2265 return;
2266
2267 if (!btrfs_is_data_reloc_root(root) &&
2268 do_list && !(state->state & EXTENT_NORESERVE) &&
2269 (*bits & EXTENT_CLEAR_DATA_RESV))
2270 btrfs_free_reserved_data_space_noquota(fs_info, len);
2271
2272 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
2273 fs_info->delalloc_batch);
2274 spin_lock(&inode->lock);
2275 inode->delalloc_bytes -= len;
2276 if (do_list && inode->delalloc_bytes == 0 &&
2277 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2278 &inode->runtime_flags))
2279 btrfs_del_delalloc_inode(root, inode);
2280 spin_unlock(&inode->lock);
2281 }
2282
2283 if ((state->state & EXTENT_DELALLOC_NEW) &&
2284 (*bits & EXTENT_DELALLOC_NEW)) {
2285 spin_lock(&inode->lock);
2286 ASSERT(inode->new_delalloc_bytes >= len);
2287 inode->new_delalloc_bytes -= len;
2288 if (*bits & EXTENT_ADD_INODE_BYTES)
2289 inode_add_bytes(&inode->vfs_inode, len);
2290 spin_unlock(&inode->lock);
2291 }
2292}
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
2303 u64 dio_file_offset)
2304{
2305 return btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
2306}
2307
2308
2309
2310
2311
2312
2313static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
2314 u64 pre, u64 post)
2315{
2316 struct extent_map_tree *em_tree = &inode->extent_tree;
2317 struct extent_map *em;
2318 struct extent_map *split_pre = NULL;
2319 struct extent_map *split_mid = NULL;
2320 struct extent_map *split_post = NULL;
2321 int ret = 0;
2322 unsigned long flags;
2323
2324
2325 if (pre == 0 && post == 0)
2326 return 0;
2327
2328 split_pre = alloc_extent_map();
2329 if (pre)
2330 split_mid = alloc_extent_map();
2331 if (post)
2332 split_post = alloc_extent_map();
2333 if (!split_pre || (pre && !split_mid) || (post && !split_post)) {
2334 ret = -ENOMEM;
2335 goto out;
2336 }
2337
2338 ASSERT(pre + post < len);
2339
2340 lock_extent(&inode->io_tree, start, start + len - 1);
2341 write_lock(&em_tree->lock);
2342 em = lookup_extent_mapping(em_tree, start, len);
2343 if (!em) {
2344 ret = -EIO;
2345 goto out_unlock;
2346 }
2347
2348 ASSERT(em->len == len);
2349 ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
2350 ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
2351 ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
2352 ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
2353 ASSERT(!list_empty(&em->list));
2354
2355 flags = em->flags;
2356 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
2357
2358
2359 split_pre->start = em->start;
2360 split_pre->len = (pre ? pre : em->len - post);
2361 split_pre->orig_start = split_pre->start;
2362 split_pre->block_start = em->block_start;
2363 split_pre->block_len = split_pre->len;
2364 split_pre->orig_block_len = split_pre->block_len;
2365 split_pre->ram_bytes = split_pre->len;
2366 split_pre->flags = flags;
2367 split_pre->compress_type = em->compress_type;
2368 split_pre->generation = em->generation;
2369
2370 replace_extent_mapping(em_tree, em, split_pre, 1);
2371
2372
2373
2374
2375
2376
2377
2378 if (pre) {
2379
2380 split_mid->start = em->start + pre;
2381 split_mid->len = em->len - pre - post;
2382 split_mid->orig_start = split_mid->start;
2383 split_mid->block_start = em->block_start + pre;
2384 split_mid->block_len = split_mid->len;
2385 split_mid->orig_block_len = split_mid->block_len;
2386 split_mid->ram_bytes = split_mid->len;
2387 split_mid->flags = flags;
2388 split_mid->compress_type = em->compress_type;
2389 split_mid->generation = em->generation;
2390 add_extent_mapping(em_tree, split_mid, 1);
2391 }
2392
2393 if (post) {
2394 split_post->start = em->start + em->len - post;
2395 split_post->len = post;
2396 split_post->orig_start = split_post->start;
2397 split_post->block_start = em->block_start + em->len - post;
2398 split_post->block_len = split_post->len;
2399 split_post->orig_block_len = split_post->block_len;
2400 split_post->ram_bytes = split_post->len;
2401 split_post->flags = flags;
2402 split_post->compress_type = em->compress_type;
2403 split_post->generation = em->generation;
2404 add_extent_mapping(em_tree, split_post, 1);
2405 }
2406
2407
2408 free_extent_map(em);
2409
2410 free_extent_map(em);
2411
2412out_unlock:
2413 write_unlock(&em_tree->lock);
2414 unlock_extent(&inode->io_tree, start, start + len - 1);
2415out:
2416 free_extent_map(split_pre);
2417 free_extent_map(split_mid);
2418 free_extent_map(split_post);
2419
2420 return ret;
2421}
2422
2423static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
2424 struct bio *bio, loff_t file_offset)
2425{
2426 struct btrfs_ordered_extent *ordered;
2427 u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
2428 u64 file_len;
2429 u64 len = bio->bi_iter.bi_size;
2430 u64 end = start + len;
2431 u64 ordered_end;
2432 u64 pre, post;
2433 int ret = 0;
2434
2435 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
2436 if (WARN_ON_ONCE(!ordered))
2437 return BLK_STS_IOERR;
2438
2439
2440 if (ordered->disk_num_bytes == len)
2441 goto out;
2442
2443
2444 if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) {
2445 ret = -EINVAL;
2446 goto out;
2447 }
2448
2449
2450 if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) {
2451 ret = -EINVAL;
2452 goto out;
2453 }
2454
2455 ordered_end = ordered->disk_bytenr + ordered->disk_num_bytes;
2456
2457 if (WARN_ON_ONCE(start < ordered->disk_bytenr || end > ordered_end)) {
2458 ret = -EINVAL;
2459 goto out;
2460 }
2461
2462
2463 if (WARN_ON_ONCE(!list_empty(&ordered->list))) {
2464 ret = -EINVAL;
2465 goto out;
2466 }
2467
2468 file_len = ordered->num_bytes;
2469 pre = start - ordered->disk_bytenr;
2470 post = ordered_end - end;
2471
2472 ret = btrfs_split_ordered_extent(ordered, pre, post);
2473 if (ret)
2474 goto out;
2475 ret = split_zoned_em(inode, file_offset, file_len, pre, post);
2476
2477out:
2478 btrfs_put_ordered_extent(ordered);
2479
2480 return errno_to_blk_status(ret);
2481}
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
2502 int mirror_num, unsigned long bio_flags)
2503
2504{
2505 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2506 struct btrfs_root *root = BTRFS_I(inode)->root;
2507 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
2508 blk_status_t ret = 0;
2509 int skip_sum;
2510 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
2511
2512 skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
2513 test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
2514
2515 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
2516 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
2517
2518 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
2519 struct page *page = bio_first_bvec_all(bio)->bv_page;
2520 loff_t file_offset = page_offset(page);
2521
2522 ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
2523 if (ret)
2524 goto out;
2525 }
2526
2527 if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
2528 ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
2529 if (ret)
2530 goto out;
2531
2532 if (bio_flags & EXTENT_BIO_COMPRESSED) {
2533
2534
2535
2536
2537
2538 ret = btrfs_submit_compressed_read(inode, bio,
2539 mirror_num,
2540 bio_flags);
2541 goto out_no_endio;
2542 } else {
2543
2544
2545
2546
2547
2548 ret = btrfs_lookup_bio_sums(inode, bio, NULL);
2549 if (ret)
2550 goto out;
2551 }
2552 goto mapit;
2553 } else if (async && !skip_sum) {
2554
2555 if (btrfs_is_data_reloc_root(root))
2556 goto mapit;
2557
2558 ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
2559 0, btrfs_submit_bio_start);
2560 goto out;
2561 } else if (!skip_sum) {
2562 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, (u64)-1, false);
2563 if (ret)
2564 goto out;
2565 }
2566
2567mapit:
2568 ret = btrfs_map_bio(fs_info, bio, mirror_num);
2569
2570out:
2571 if (ret) {
2572 bio->bi_status = ret;
2573 bio_endio(bio);
2574 }
2575out_no_endio:
2576 return ret;
2577}
2578
2579
2580
2581
2582
2583static int add_pending_csums(struct btrfs_trans_handle *trans,
2584 struct list_head *list)
2585{
2586 struct btrfs_ordered_sum *sum;
2587 struct btrfs_root *csum_root = NULL;
2588 int ret;
2589
2590 list_for_each_entry(sum, list, list) {
2591 trans->adding_csums = true;
2592 if (!csum_root)
2593 csum_root = btrfs_csum_root(trans->fs_info,
2594 sum->bytenr);
2595 ret = btrfs_csum_file_blocks(trans, csum_root, sum);
2596 trans->adding_csums = false;
2597 if (ret)
2598 return ret;
2599 }
2600 return 0;
2601}
2602
2603static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
2604 const u64 start,
2605 const u64 len,
2606 struct extent_state **cached_state)
2607{
2608 u64 search_start = start;
2609 const u64 end = start + len - 1;
2610
2611 while (search_start < end) {
2612 const u64 search_len = end - search_start + 1;
2613 struct extent_map *em;
2614 u64 em_len;
2615 int ret = 0;
2616
2617 em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
2618 if (IS_ERR(em))
2619 return PTR_ERR(em);
2620
2621 if (em->block_start != EXTENT_MAP_HOLE)
2622 goto next;
2623
2624 em_len = em->len;
2625 if (em->start < search_start)
2626 em_len -= search_start - em->start;
2627 if (em_len > search_len)
2628 em_len = search_len;
2629
2630 ret = set_extent_bit(&inode->io_tree, search_start,
2631 search_start + em_len - 1,
2632 EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
2633 GFP_NOFS, NULL);
2634next:
2635 search_start = extent_map_end(em);
2636 free_extent_map(em);
2637 if (ret)
2638 return ret;
2639 }
2640 return 0;
2641}
2642
2643int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2644 unsigned int extra_bits,
2645 struct extent_state **cached_state)
2646{
2647 WARN_ON(PAGE_ALIGNED(end));
2648
2649 if (start >= i_size_read(&inode->vfs_inode) &&
2650 !(inode->flags & BTRFS_INODE_PREALLOC)) {
2651
2652
2653
2654
2655 extra_bits |= EXTENT_DELALLOC_NEW;
2656 } else {
2657 int ret;
2658
2659 ret = btrfs_find_new_delalloc_bytes(inode, start,
2660 end + 1 - start,
2661 cached_state);
2662 if (ret)
2663 return ret;
2664 }
2665
2666 return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
2667 cached_state);
2668}
2669
2670
2671struct btrfs_writepage_fixup {
2672 struct page *page;
2673 struct inode *inode;
2674 struct btrfs_work work;
2675};
2676
2677static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2678{
2679 struct btrfs_writepage_fixup *fixup;
2680 struct btrfs_ordered_extent *ordered;
2681 struct extent_state *cached_state = NULL;
2682 struct extent_changeset *data_reserved = NULL;
2683 struct page *page;
2684 struct btrfs_inode *inode;
2685 u64 page_start;
2686 u64 page_end;
2687 int ret = 0;
2688 bool free_delalloc_space = true;
2689
2690 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2691 page = fixup->page;
2692 inode = BTRFS_I(fixup->inode);
2693 page_start = page_offset(page);
2694 page_end = page_offset(page) + PAGE_SIZE - 1;
2695
2696
2697
2698
2699
2700 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2701 PAGE_SIZE);
2702again:
2703 lock_page(page);
2704
2705
2706
2707
2708
2709
2710 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728 if (!ret) {
2729 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2730 btrfs_delalloc_release_space(inode, data_reserved,
2731 page_start, PAGE_SIZE,
2732 true);
2733 }
2734 ret = 0;
2735 goto out_page;
2736 }
2737
2738
2739
2740
2741
2742 if (ret)
2743 goto out_page;
2744
2745 lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
2746
2747
2748 if (PageOrdered(page))
2749 goto out_reserved;
2750
2751 ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
2752 if (ordered) {
2753 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2754 &cached_state);
2755 unlock_page(page);
2756 btrfs_start_ordered_extent(ordered, 1);
2757 btrfs_put_ordered_extent(ordered);
2758 goto again;
2759 }
2760
2761 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2762 &cached_state);
2763 if (ret)
2764 goto out_reserved;
2765
2766
2767
2768
2769
2770
2771
2772
2773 BUG_ON(!PageDirty(page));
2774 free_delalloc_space = false;
2775out_reserved:
2776 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2777 if (free_delalloc_space)
2778 btrfs_delalloc_release_space(inode, data_reserved, page_start,
2779 PAGE_SIZE, true);
2780 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2781 &cached_state);
2782out_page:
2783 if (ret) {
2784
2785
2786
2787
2788 mapping_set_error(page->mapping, ret);
2789 end_extent_writepage(page, ret, page_start, page_end);
2790 clear_page_dirty_for_io(page);
2791 SetPageError(page);
2792 }
2793 btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
2794 unlock_page(page);
2795 put_page(page);
2796 kfree(fixup);
2797 extent_changeset_free(data_reserved);
2798
2799
2800
2801
2802
2803 btrfs_add_delayed_iput(&inode->vfs_inode);
2804}
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817int btrfs_writepage_cow_fixup(struct page *page)
2818{
2819 struct inode *inode = page->mapping->host;
2820 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2821 struct btrfs_writepage_fixup *fixup;
2822
2823
2824 if (PageOrdered(page))
2825 return 0;
2826
2827
2828
2829
2830
2831
2832
2833
2834 if (PageChecked(page))
2835 return -EAGAIN;
2836
2837 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2838 if (!fixup)
2839 return -EAGAIN;
2840
2841
2842
2843
2844
2845
2846
2847 ihold(inode);
2848 btrfs_page_set_checked(fs_info, page, page_offset(page), PAGE_SIZE);
2849 get_page(page);
2850 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
2851 fixup->page = page;
2852 fixup->inode = inode;
2853 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
2854
2855 return -EAGAIN;
2856}
2857
2858static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2859 struct btrfs_inode *inode, u64 file_pos,
2860 struct btrfs_file_extent_item *stack_fi,
2861 const bool update_inode_bytes,
2862 u64 qgroup_reserved)
2863{
2864 struct btrfs_root *root = inode->root;
2865 const u64 sectorsize = root->fs_info->sectorsize;
2866 struct btrfs_path *path;
2867 struct extent_buffer *leaf;
2868 struct btrfs_key ins;
2869 u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
2870 u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
2871 u64 offset = btrfs_stack_file_extent_offset(stack_fi);
2872 u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
2873 u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
2874 struct btrfs_drop_extents_args drop_args = { 0 };
2875 int ret;
2876
2877 path = btrfs_alloc_path();
2878 if (!path)
2879 return -ENOMEM;
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890 drop_args.path = path;
2891 drop_args.start = file_pos;
2892 drop_args.end = file_pos + num_bytes;
2893 drop_args.replace_extent = true;
2894 drop_args.extent_item_size = sizeof(*stack_fi);
2895 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
2896 if (ret)
2897 goto out;
2898
2899 if (!drop_args.extent_inserted) {
2900 ins.objectid = btrfs_ino(inode);
2901 ins.offset = file_pos;
2902 ins.type = BTRFS_EXTENT_DATA_KEY;
2903
2904 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2905 sizeof(*stack_fi));
2906 if (ret)
2907 goto out;
2908 }
2909 leaf = path->nodes[0];
2910 btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
2911 write_extent_buffer(leaf, stack_fi,
2912 btrfs_item_ptr_offset(leaf, path->slots[0]),
2913 sizeof(struct btrfs_file_extent_item));
2914
2915 btrfs_mark_buffer_dirty(leaf);
2916 btrfs_release_path(path);
2917
2918
2919
2920
2921
2922
2923
2924
2925 if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
2926 u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
2927
2928 inline_size = drop_args.bytes_found - inline_size;
2929 btrfs_update_inode_bytes(inode, sectorsize, inline_size);
2930 drop_args.bytes_found -= inline_size;
2931 num_bytes -= sectorsize;
2932 }
2933
2934 if (update_inode_bytes)
2935 btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
2936
2937 ins.objectid = disk_bytenr;
2938 ins.offset = disk_num_bytes;
2939 ins.type = BTRFS_EXTENT_ITEM_KEY;
2940
2941 ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
2942 if (ret)
2943 goto out;
2944
2945 ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
2946 file_pos - offset,
2947 qgroup_reserved, &ins);
2948out:
2949 btrfs_free_path(path);
2950
2951 return ret;
2952}
2953
2954static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
2955 u64 start, u64 len)
2956{
2957 struct btrfs_block_group *cache;
2958
2959 cache = btrfs_lookup_block_group(fs_info, start);
2960 ASSERT(cache);
2961
2962 spin_lock(&cache->lock);
2963 cache->delalloc_bytes -= len;
2964 spin_unlock(&cache->lock);
2965
2966 btrfs_put_block_group(cache);
2967}
2968
2969static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
2970 struct btrfs_ordered_extent *oe)
2971{
2972 struct btrfs_file_extent_item stack_fi;
2973 bool update_inode_bytes;
2974 u64 num_bytes = oe->num_bytes;
2975 u64 ram_bytes = oe->ram_bytes;
2976
2977 memset(&stack_fi, 0, sizeof(stack_fi));
2978 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
2979 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
2980 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
2981 oe->disk_num_bytes);
2982 btrfs_set_stack_file_extent_offset(&stack_fi, oe->offset);
2983 if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
2984 num_bytes = ram_bytes = oe->truncated_len;
2985 btrfs_set_stack_file_extent_num_bytes(&stack_fi, num_bytes);
2986 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, ram_bytes);
2987 btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
2988
2989
2990
2991
2992
2993
2994
2995
2996 update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
2997 test_bit(BTRFS_ORDERED_ENCODED, &oe->flags) ||
2998 test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
2999
3000 return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
3001 oe->file_offset, &stack_fi,
3002 update_inode_bytes, oe->qgroup_rsv);
3003}
3004
3005
3006
3007
3008
3009
3010static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
3011{
3012 struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
3013 struct btrfs_root *root = inode->root;
3014 struct btrfs_fs_info *fs_info = root->fs_info;
3015 struct btrfs_trans_handle *trans = NULL;
3016 struct extent_io_tree *io_tree = &inode->io_tree;
3017 struct extent_state *cached_state = NULL;
3018 u64 start, end;
3019 int compress_type = 0;
3020 int ret = 0;
3021 u64 logical_len = ordered_extent->num_bytes;
3022 bool freespace_inode;
3023 bool truncated = false;
3024 bool clear_reserved_extent = true;
3025 unsigned int clear_bits = EXTENT_DEFRAG;
3026
3027 start = ordered_extent->file_offset;
3028 end = start + ordered_extent->num_bytes - 1;
3029
3030 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3031 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
3032 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags) &&
3033 !test_bit(BTRFS_ORDERED_ENCODED, &ordered_extent->flags))
3034 clear_bits |= EXTENT_DELALLOC_NEW;
3035
3036 freespace_inode = btrfs_is_free_space_inode(inode);
3037
3038 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
3039 ret = -EIO;
3040 goto out;
3041 }
3042
3043
3044 if (ordered_extent->bdev) {
3045 btrfs_rewrite_logical_zoned(ordered_extent);
3046 btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
3047 ordered_extent->disk_num_bytes);
3048 }
3049
3050 btrfs_free_io_failure_record(inode, start, end);
3051
3052 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
3053 truncated = true;
3054 logical_len = ordered_extent->truncated_len;
3055
3056 if (!logical_len)
3057 goto out;
3058 }
3059
3060 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
3061 BUG_ON(!list_empty(&ordered_extent->list));
3062
3063 btrfs_inode_safe_disk_i_size_write(inode, 0);
3064 if (freespace_inode)
3065 trans = btrfs_join_transaction_spacecache(root);
3066 else
3067 trans = btrfs_join_transaction(root);
3068 if (IS_ERR(trans)) {
3069 ret = PTR_ERR(trans);
3070 trans = NULL;
3071 goto out;
3072 }
3073 trans->block_rsv = &inode->block_rsv;
3074 ret = btrfs_update_inode_fallback(trans, root, inode);
3075 if (ret)
3076 btrfs_abort_transaction(trans, ret);
3077 goto out;
3078 }
3079
3080 clear_bits |= EXTENT_LOCKED;
3081 lock_extent_bits(io_tree, start, end, &cached_state);
3082
3083 if (freespace_inode)
3084 trans = btrfs_join_transaction_spacecache(root);
3085 else
3086 trans = btrfs_join_transaction(root);
3087 if (IS_ERR(trans)) {
3088 ret = PTR_ERR(trans);
3089 trans = NULL;
3090 goto out;
3091 }
3092
3093 trans->block_rsv = &inode->block_rsv;
3094
3095 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
3096 compress_type = ordered_extent->compress_type;
3097 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3098 BUG_ON(compress_type);
3099 ret = btrfs_mark_extent_written(trans, inode,
3100 ordered_extent->file_offset,
3101 ordered_extent->file_offset +
3102 logical_len);
3103 } else {
3104 BUG_ON(root == fs_info->tree_root);
3105 ret = insert_ordered_extent_file_extent(trans, ordered_extent);
3106 if (!ret) {
3107 clear_reserved_extent = false;
3108 btrfs_release_delalloc_bytes(fs_info,
3109 ordered_extent->disk_bytenr,
3110 ordered_extent->disk_num_bytes);
3111 }
3112 }
3113 unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
3114 ordered_extent->num_bytes, trans->transid);
3115 if (ret < 0) {
3116 btrfs_abort_transaction(trans, ret);
3117 goto out;
3118 }
3119
3120 ret = add_pending_csums(trans, &ordered_extent->list);
3121 if (ret) {
3122 btrfs_abort_transaction(trans, ret);
3123 goto out;
3124 }
3125
3126
3127
3128
3129
3130
3131 if ((clear_bits & EXTENT_DELALLOC_NEW) &&
3132 !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
3133 clear_extent_bit(&inode->io_tree, start, end,
3134 EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
3135 0, 0, &cached_state);
3136
3137 btrfs_inode_safe_disk_i_size_write(inode, 0);
3138 ret = btrfs_update_inode_fallback(trans, root, inode);
3139 if (ret) {
3140 btrfs_abort_transaction(trans, ret);
3141 goto out;
3142 }
3143 ret = 0;
3144out:
3145 clear_extent_bit(&inode->io_tree, start, end, clear_bits,
3146 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
3147 &cached_state);
3148
3149 if (trans)
3150 btrfs_end_transaction(trans);
3151
3152 if (ret || truncated) {
3153 u64 unwritten_start = start;
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163 if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
3164 &ordered_extent->flags))
3165 mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
3166
3167 if (truncated)
3168 unwritten_start += logical_len;
3169 clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
3170
3171
3172 btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184 if ((ret || !logical_len) &&
3185 clear_reserved_extent &&
3186 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3187 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3188
3189
3190
3191
3192 if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC))
3193 btrfs_discard_extent(fs_info,
3194 ordered_extent->disk_bytenr,
3195 ordered_extent->disk_num_bytes,
3196 NULL);
3197 btrfs_free_reserved_extent(fs_info,
3198 ordered_extent->disk_bytenr,
3199 ordered_extent->disk_num_bytes, 1);
3200 }
3201 }
3202
3203
3204
3205
3206
3207 btrfs_remove_ordered_extent(inode, ordered_extent);
3208
3209
3210 btrfs_put_ordered_extent(ordered_extent);
3211
3212 btrfs_put_ordered_extent(ordered_extent);
3213
3214 return ret;
3215}
3216
3217static void finish_ordered_fn(struct btrfs_work *work)
3218{
3219 struct btrfs_ordered_extent *ordered_extent;
3220 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3221 btrfs_finish_ordered_io(ordered_extent);
3222}
3223
3224void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
3225 struct page *page, u64 start,
3226 u64 end, bool uptodate)
3227{
3228 trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
3229
3230 btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start,
3231 finish_ordered_fn, uptodate);
3232}
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245static int check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
3246 u32 bio_offset, struct page *page, u32 pgoff,
3247 u64 start)
3248{
3249 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3250 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
3251 char *kaddr;
3252 u32 len = fs_info->sectorsize;
3253 const u32 csum_size = fs_info->csum_size;
3254 unsigned int offset_sectors;
3255 u8 *csum_expected;
3256 u8 csum[BTRFS_CSUM_SIZE];
3257
3258 ASSERT(pgoff + len <= PAGE_SIZE);
3259
3260 offset_sectors = bio_offset >> fs_info->sectorsize_bits;
3261 csum_expected = ((u8 *)bbio->csum) + offset_sectors * csum_size;
3262
3263 kaddr = kmap_atomic(page);
3264 shash->tfm = fs_info->csum_shash;
3265
3266 crypto_shash_digest(shash, kaddr + pgoff, len, csum);
3267
3268 if (memcmp(csum, csum_expected, csum_size))
3269 goto zeroit;
3270
3271 kunmap_atomic(kaddr);
3272 return 0;
3273zeroit:
3274 btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
3275 bbio->mirror_num);
3276 if (bbio->device)
3277 btrfs_dev_stat_inc_and_print(bbio->device,
3278 BTRFS_DEV_STAT_CORRUPTION_ERRS);
3279 memset(kaddr + pgoff, 1, len);
3280 flush_dcache_page(page);
3281 kunmap_atomic(kaddr);
3282 return -EIO;
3283}
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
3298 u32 bio_offset, struct page *page,
3299 u64 start, u64 end)
3300{
3301 struct inode *inode = page->mapping->host;
3302 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3303 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3304 struct btrfs_root *root = BTRFS_I(inode)->root;
3305 const u32 sectorsize = root->fs_info->sectorsize;
3306 u32 pg_off;
3307 unsigned int result = 0;
3308
3309 if (btrfs_page_test_checked(fs_info, page, start, end + 1 - start)) {
3310 btrfs_page_clear_checked(fs_info, page, start, end + 1 - start);
3311 return 0;
3312 }
3313
3314
3315
3316
3317
3318
3319 if (bbio->csum == NULL)
3320 return 0;
3321
3322 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3323 return 0;
3324
3325 if (unlikely(test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)))
3326 return 0;
3327
3328 ASSERT(page_offset(page) <= start &&
3329 end <= page_offset(page) + PAGE_SIZE - 1);
3330 for (pg_off = offset_in_page(start);
3331 pg_off < offset_in_page(end);
3332 pg_off += sectorsize, bio_offset += sectorsize) {
3333 u64 file_offset = pg_off + page_offset(page);
3334 int ret;
3335
3336 if (btrfs_is_data_reloc_root(root) &&
3337 test_range_bit(io_tree, file_offset,
3338 file_offset + sectorsize - 1,
3339 EXTENT_NODATASUM, 1, NULL)) {
3340
3341 clear_extent_bits(io_tree, file_offset,
3342 file_offset + sectorsize - 1,
3343 EXTENT_NODATASUM);
3344 continue;
3345 }
3346 ret = check_data_csum(inode, bbio, bio_offset, page, pg_off,
3347 page_offset(page) + pg_off);
3348 if (ret < 0) {
3349 const int nr_bit = (pg_off - offset_in_page(start)) >>
3350 root->fs_info->sectorsize_bits;
3351
3352 result |= (1U << nr_bit);
3353 }
3354 }
3355 return result;
3356}
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368void btrfs_add_delayed_iput(struct inode *inode)
3369{
3370 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3371 struct btrfs_inode *binode = BTRFS_I(inode);
3372
3373 if (atomic_add_unless(&inode->i_count, -1, 1))
3374 return;
3375
3376 atomic_inc(&fs_info->nr_delayed_iputs);
3377 spin_lock(&fs_info->delayed_iput_lock);
3378 ASSERT(list_empty(&binode->delayed_iput));
3379 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3380 spin_unlock(&fs_info->delayed_iput_lock);
3381 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3382 wake_up_process(fs_info->cleaner_kthread);
3383}
3384
3385static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
3386 struct btrfs_inode *inode)
3387{
3388 list_del_init(&inode->delayed_iput);
3389 spin_unlock(&fs_info->delayed_iput_lock);
3390 iput(&inode->vfs_inode);
3391 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
3392 wake_up(&fs_info->delayed_iputs_wait);
3393 spin_lock(&fs_info->delayed_iput_lock);
3394}
3395
3396static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
3397 struct btrfs_inode *inode)
3398{
3399 if (!list_empty(&inode->delayed_iput)) {
3400 spin_lock(&fs_info->delayed_iput_lock);
3401 if (!list_empty(&inode->delayed_iput))
3402 run_delayed_iput_locked(fs_info, inode);
3403 spin_unlock(&fs_info->delayed_iput_lock);
3404 }
3405}
3406
3407void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
3408{
3409
3410 spin_lock(&fs_info->delayed_iput_lock);
3411 while (!list_empty(&fs_info->delayed_iputs)) {
3412 struct btrfs_inode *inode;
3413
3414 inode = list_first_entry(&fs_info->delayed_iputs,
3415 struct btrfs_inode, delayed_iput);
3416 run_delayed_iput_locked(fs_info, inode);
3417 cond_resched_lock(&fs_info->delayed_iput_lock);
3418 }
3419 spin_unlock(&fs_info->delayed_iput_lock);
3420}
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
3435{
3436 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
3437 atomic_read(&fs_info->nr_delayed_iputs) == 0);
3438 if (ret)
3439 return -EINTR;
3440 return 0;
3441}
3442
3443
3444
3445
3446
3447int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3448 struct btrfs_inode *inode)
3449{
3450 int ret;
3451
3452 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
3453 if (ret && ret != -EEXIST) {
3454 btrfs_abort_transaction(trans, ret);
3455 return ret;
3456 }
3457
3458 return 0;
3459}
3460
3461
3462
3463
3464
3465static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3466 struct btrfs_inode *inode)
3467{
3468 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
3469}
3470
3471
3472
3473
3474
3475int btrfs_orphan_cleanup(struct btrfs_root *root)
3476{
3477 struct btrfs_fs_info *fs_info = root->fs_info;
3478 struct btrfs_path *path;
3479 struct extent_buffer *leaf;
3480 struct btrfs_key key, found_key;
3481 struct btrfs_trans_handle *trans;
3482 struct inode *inode;
3483 u64 last_objectid = 0;
3484 int ret = 0, nr_unlink = 0;
3485
3486 if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
3487 return 0;
3488
3489 path = btrfs_alloc_path();
3490 if (!path) {
3491 ret = -ENOMEM;
3492 goto out;
3493 }
3494 path->reada = READA_BACK;
3495
3496 key.objectid = BTRFS_ORPHAN_OBJECTID;
3497 key.type = BTRFS_ORPHAN_ITEM_KEY;
3498 key.offset = (u64)-1;
3499
3500 while (1) {
3501 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3502 if (ret < 0)
3503 goto out;
3504
3505
3506
3507
3508
3509
3510 if (ret > 0) {
3511 ret = 0;
3512 if (path->slots[0] == 0)
3513 break;
3514 path->slots[0]--;
3515 }
3516
3517
3518 leaf = path->nodes[0];
3519 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3520
3521
3522 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3523 break;
3524 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3525 break;
3526
3527
3528 btrfs_release_path(path);
3529
3530
3531
3532
3533
3534
3535
3536 if (found_key.offset == last_objectid) {
3537 btrfs_err(fs_info,
3538 "Error removing orphan entry, stopping orphan cleanup");
3539 ret = -EINVAL;
3540 goto out;
3541 }
3542
3543 last_objectid = found_key.offset;
3544
3545 found_key.objectid = found_key.offset;
3546 found_key.type = BTRFS_INODE_ITEM_KEY;
3547 found_key.offset = 0;
3548 inode = btrfs_iget(fs_info->sb, last_objectid, root);
3549 ret = PTR_ERR_OR_ZERO(inode);
3550 if (ret && ret != -ENOENT)
3551 goto out;
3552
3553 if (ret == -ENOENT && root == fs_info->tree_root) {
3554 struct btrfs_root *dead_root;
3555 int is_dead_root = 0;
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573 spin_lock(&fs_info->fs_roots_radix_lock);
3574 dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
3575 (unsigned long)found_key.objectid);
3576 if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
3577 is_dead_root = 1;
3578 spin_unlock(&fs_info->fs_roots_radix_lock);
3579
3580 if (is_dead_root) {
3581
3582 key.offset = found_key.objectid - 1;
3583 continue;
3584 }
3585
3586 }
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614 if (ret == -ENOENT || inode->i_nlink) {
3615 if (!ret) {
3616 ret = btrfs_drop_verity_items(BTRFS_I(inode));
3617 iput(inode);
3618 if (ret)
3619 goto out;
3620 }
3621 trans = btrfs_start_transaction(root, 1);
3622 if (IS_ERR(trans)) {
3623 ret = PTR_ERR(trans);
3624 goto out;
3625 }
3626 btrfs_debug(fs_info, "auto deleting %Lu",
3627 found_key.objectid);
3628 ret = btrfs_del_orphan_item(trans, root,
3629 found_key.objectid);
3630 btrfs_end_transaction(trans);
3631 if (ret)
3632 goto out;
3633 continue;
3634 }
3635
3636 nr_unlink++;
3637
3638
3639 iput(inode);
3640 }
3641
3642 btrfs_release_path(path);
3643
3644 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3645 trans = btrfs_join_transaction(root);
3646 if (!IS_ERR(trans))
3647 btrfs_end_transaction(trans);
3648 }
3649
3650 if (nr_unlink)
3651 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3652
3653out:
3654 if (ret)
3655 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3656 btrfs_free_path(path);
3657 return ret;
3658}
3659
3660
3661
3662
3663
3664
3665
3666static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3667 int slot, u64 objectid,
3668 int *first_xattr_slot)
3669{
3670 u32 nritems = btrfs_header_nritems(leaf);
3671 struct btrfs_key found_key;
3672 static u64 xattr_access = 0;
3673 static u64 xattr_default = 0;
3674 int scanned = 0;
3675
3676 if (!xattr_access) {
3677 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3678 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3679 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3680 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3681 }
3682
3683 slot++;
3684 *first_xattr_slot = -1;
3685 while (slot < nritems) {
3686 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3687
3688
3689 if (found_key.objectid != objectid)
3690 return 0;
3691
3692
3693 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3694 if (*first_xattr_slot == -1)
3695 *first_xattr_slot = slot;
3696 if (found_key.offset == xattr_access ||
3697 found_key.offset == xattr_default)
3698 return 1;
3699 }
3700
3701
3702
3703
3704
3705 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3706 return 0;
3707
3708 slot++;
3709 scanned++;
3710
3711
3712
3713
3714
3715
3716
3717 if (scanned >= 8)
3718 break;
3719 }
3720
3721
3722
3723
3724 if (*first_xattr_slot == -1)
3725 *first_xattr_slot = slot;
3726 return 1;
3727}
3728
3729
3730
3731
3732static int btrfs_read_locked_inode(struct inode *inode,
3733 struct btrfs_path *in_path)
3734{
3735 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3736 struct btrfs_path *path = in_path;
3737 struct extent_buffer *leaf;
3738 struct btrfs_inode_item *inode_item;
3739 struct btrfs_root *root = BTRFS_I(inode)->root;
3740 struct btrfs_key location;
3741 unsigned long ptr;
3742 int maybe_acls;
3743 u32 rdev;
3744 int ret;
3745 bool filled = false;
3746 int first_xattr_slot;
3747
3748 ret = btrfs_fill_inode(inode, &rdev);
3749 if (!ret)
3750 filled = true;
3751
3752 if (!path) {
3753 path = btrfs_alloc_path();
3754 if (!path)
3755 return -ENOMEM;
3756 }
3757
3758 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3759
3760 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3761 if (ret) {
3762 if (path != in_path)
3763 btrfs_free_path(path);
3764 return ret;
3765 }
3766
3767 leaf = path->nodes[0];
3768
3769 if (filled)
3770 goto cache_index;
3771
3772 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3773 struct btrfs_inode_item);
3774 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3775 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3776 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3777 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3778 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3779 btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
3780 round_up(i_size_read(inode), fs_info->sectorsize));
3781
3782 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3783 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3784
3785 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3786 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3787
3788 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3789 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3790
3791 BTRFS_I(inode)->i_otime.tv_sec =
3792 btrfs_timespec_sec(leaf, &inode_item->otime);
3793 BTRFS_I(inode)->i_otime.tv_nsec =
3794 btrfs_timespec_nsec(leaf, &inode_item->otime);
3795
3796 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3797 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3798 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3799
3800 inode_set_iversion_queried(inode,
3801 btrfs_inode_sequence(leaf, inode_item));
3802 inode->i_generation = BTRFS_I(inode)->generation;
3803 inode->i_rdev = 0;
3804 rdev = btrfs_inode_rdev(leaf, inode_item);
3805
3806 BTRFS_I(inode)->index_cnt = (u64)-1;
3807 btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
3808 &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
3809
3810cache_index:
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820 if (BTRFS_I(inode)->last_trans == fs_info->generation)
3821 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3822 &BTRFS_I(inode)->runtime_flags);
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
3851 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3852
3853
3854
3855
3856
3857
3858
3859 BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
3860
3861 path->slots[0]++;
3862 if (inode->i_nlink != 1 ||
3863 path->slots[0] >= btrfs_header_nritems(leaf))
3864 goto cache_acl;
3865
3866 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3867 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
3868 goto cache_acl;
3869
3870 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3871 if (location.type == BTRFS_INODE_REF_KEY) {
3872 struct btrfs_inode_ref *ref;
3873
3874 ref = (struct btrfs_inode_ref *)ptr;
3875 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3876 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3877 struct btrfs_inode_extref *extref;
3878
3879 extref = (struct btrfs_inode_extref *)ptr;
3880 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3881 extref);
3882 }
3883cache_acl:
3884
3885
3886
3887
3888 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3889 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
3890 if (first_xattr_slot != -1) {
3891 path->slots[0] = first_xattr_slot;
3892 ret = btrfs_load_inode_props(inode, path);
3893 if (ret)
3894 btrfs_err(fs_info,
3895 "error loading props for ino %llu (root %llu): %d",
3896 btrfs_ino(BTRFS_I(inode)),
3897 root->root_key.objectid, ret);
3898 }
3899 if (path != in_path)
3900 btrfs_free_path(path);
3901
3902 if (!maybe_acls)
3903 cache_no_acl(inode);
3904
3905 switch (inode->i_mode & S_IFMT) {
3906 case S_IFREG:
3907 inode->i_mapping->a_ops = &btrfs_aops;
3908 inode->i_fop = &btrfs_file_operations;
3909 inode->i_op = &btrfs_file_inode_operations;
3910 break;
3911 case S_IFDIR:
3912 inode->i_fop = &btrfs_dir_file_operations;
3913 inode->i_op = &btrfs_dir_inode_operations;
3914 break;
3915 case S_IFLNK:
3916 inode->i_op = &btrfs_symlink_inode_operations;
3917 inode_nohighmem(inode);
3918 inode->i_mapping->a_ops = &btrfs_aops;
3919 break;
3920 default:
3921 inode->i_op = &btrfs_special_inode_operations;
3922 init_special_inode(inode, inode->i_mode, rdev);
3923 break;
3924 }
3925
3926 btrfs_sync_inode_flags_to_i_flags(inode);
3927 return 0;
3928}
3929
3930
3931
3932
3933static void fill_inode_item(struct btrfs_trans_handle *trans,
3934 struct extent_buffer *leaf,
3935 struct btrfs_inode_item *item,
3936 struct inode *inode)
3937{
3938 struct btrfs_map_token token;
3939 u64 flags;
3940
3941 btrfs_init_map_token(&token, leaf);
3942
3943 btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
3944 btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
3945 btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
3946 btrfs_set_token_inode_mode(&token, item, inode->i_mode);
3947 btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
3948
3949 btrfs_set_token_timespec_sec(&token, &item->atime,
3950 inode->i_atime.tv_sec);
3951 btrfs_set_token_timespec_nsec(&token, &item->atime,
3952 inode->i_atime.tv_nsec);
3953
3954 btrfs_set_token_timespec_sec(&token, &item->mtime,
3955 inode->i_mtime.tv_sec);
3956 btrfs_set_token_timespec_nsec(&token, &item->mtime,
3957 inode->i_mtime.tv_nsec);
3958
3959 btrfs_set_token_timespec_sec(&token, &item->ctime,
3960 inode->i_ctime.tv_sec);
3961 btrfs_set_token_timespec_nsec(&token, &item->ctime,
3962 inode->i_ctime.tv_nsec);
3963
3964 btrfs_set_token_timespec_sec(&token, &item->otime,
3965 BTRFS_I(inode)->i_otime.tv_sec);
3966 btrfs_set_token_timespec_nsec(&token, &item->otime,
3967 BTRFS_I(inode)->i_otime.tv_nsec);
3968
3969 btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
3970 btrfs_set_token_inode_generation(&token, item,
3971 BTRFS_I(inode)->generation);
3972 btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
3973 btrfs_set_token_inode_transid(&token, item, trans->transid);
3974 btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
3975 flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
3976 BTRFS_I(inode)->ro_flags);
3977 btrfs_set_token_inode_flags(&token, item, flags);
3978 btrfs_set_token_inode_block_group(&token, item, 0);
3979}
3980
3981
3982
3983
3984static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3985 struct btrfs_root *root,
3986 struct btrfs_inode *inode)
3987{
3988 struct btrfs_inode_item *inode_item;
3989 struct btrfs_path *path;
3990 struct extent_buffer *leaf;
3991 int ret;
3992
3993 path = btrfs_alloc_path();
3994 if (!path)
3995 return -ENOMEM;
3996
3997 ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
3998 if (ret) {
3999 if (ret > 0)
4000 ret = -ENOENT;
4001 goto failed;
4002 }
4003
4004 leaf = path->nodes[0];
4005 inode_item = btrfs_item_ptr(leaf, path->slots[0],
4006 struct btrfs_inode_item);
4007
4008 fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
4009 btrfs_mark_buffer_dirty(leaf);
4010 btrfs_set_inode_last_trans(trans, inode);
4011 ret = 0;
4012failed:
4013 btrfs_free_path(path);
4014 return ret;
4015}
4016
4017
4018
4019
4020noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
4021 struct btrfs_root *root,
4022 struct btrfs_inode *inode)
4023{
4024 struct btrfs_fs_info *fs_info = root->fs_info;
4025 int ret;
4026
4027
4028
4029
4030
4031
4032
4033
4034 if (!btrfs_is_free_space_inode(inode)
4035 && !btrfs_is_data_reloc_root(root)
4036 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
4037 btrfs_update_root_times(trans, root);
4038
4039 ret = btrfs_delayed_update_inode(trans, root, inode);
4040 if (!ret)
4041 btrfs_set_inode_last_trans(trans, inode);
4042 return ret;
4043 }
4044
4045 return btrfs_update_inode_item(trans, root, inode);
4046}
4047
4048int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
4049 struct btrfs_root *root, struct btrfs_inode *inode)
4050{
4051 int ret;
4052
4053 ret = btrfs_update_inode(trans, root, inode);
4054 if (ret == -ENOSPC)
4055 return btrfs_update_inode_item(trans, root, inode);
4056 return ret;
4057}
4058
4059
4060
4061
4062
4063
4064static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4065 struct btrfs_inode *dir,
4066 struct btrfs_inode *inode,
4067 const char *name, int name_len,
4068 struct btrfs_rename_ctx *rename_ctx)
4069{
4070 struct btrfs_root *root = dir->root;
4071 struct btrfs_fs_info *fs_info = root->fs_info;
4072 struct btrfs_path *path;
4073 int ret = 0;
4074 struct btrfs_dir_item *di;
4075 u64 index;
4076 u64 ino = btrfs_ino(inode);
4077 u64 dir_ino = btrfs_ino(dir);
4078
4079 path = btrfs_alloc_path();
4080 if (!path) {
4081 ret = -ENOMEM;
4082 goto out;
4083 }
4084
4085 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4086 name, name_len, -1);
4087 if (IS_ERR_OR_NULL(di)) {
4088 ret = di ? PTR_ERR(di) : -ENOENT;
4089 goto err;
4090 }
4091 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4092 if (ret)
4093 goto err;
4094 btrfs_release_path(path);
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105
4106 if (inode->dir_index) {
4107 ret = btrfs_delayed_delete_inode_ref(inode);
4108 if (!ret) {
4109 index = inode->dir_index;
4110 goto skip_backref;
4111 }
4112 }
4113
4114 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
4115 dir_ino, &index);
4116 if (ret) {
4117 btrfs_info(fs_info,
4118 "failed to delete reference to %.*s, inode %llu parent %llu",
4119 name_len, name, ino, dir_ino);
4120 btrfs_abort_transaction(trans, ret);
4121 goto err;
4122 }
4123skip_backref:
4124 if (rename_ctx)
4125 rename_ctx->index = index;
4126
4127 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
4128 if (ret) {
4129 btrfs_abort_transaction(trans, ret);
4130 goto err;
4131 }
4132
4133
4134
4135
4136
4137
4138
4139 if (!rename_ctx) {
4140 btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
4141 dir_ino);
4142 btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
4143 index);
4144 }
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155 btrfs_run_delayed_iput(fs_info, inode);
4156err:
4157 btrfs_free_path(path);
4158 if (ret)
4159 goto out;
4160
4161 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
4162 inode_inc_iversion(&inode->vfs_inode);
4163 inode_inc_iversion(&dir->vfs_inode);
4164 inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
4165 dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
4166 ret = btrfs_update_inode(trans, root, dir);
4167out:
4168 return ret;
4169}
4170
4171int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4172 struct btrfs_inode *dir, struct btrfs_inode *inode,
4173 const char *name, int name_len)
4174{
4175 int ret;
4176 ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL);
4177 if (!ret) {
4178 drop_nlink(&inode->vfs_inode);
4179 ret = btrfs_update_inode(trans, inode->root, inode);
4180 }
4181 return ret;
4182}
4183
4184
4185
4186
4187
4188
4189
4190
4191
4192static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4193{
4194 struct btrfs_root *root = BTRFS_I(dir)->root;
4195
4196
4197
4198
4199
4200
4201
4202
4203 return btrfs_start_transaction_fallback_global_rsv(root, 5);
4204}
4205
4206static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4207{
4208 struct btrfs_trans_handle *trans;
4209 struct inode *inode = d_inode(dentry);
4210 int ret;
4211
4212 trans = __unlink_start_trans(dir);
4213 if (IS_ERR(trans))
4214 return PTR_ERR(trans);
4215
4216 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
4217 0);
4218
4219 ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
4220 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4221 dentry->d_name.len);
4222 if (ret)
4223 goto out;
4224
4225 if (inode->i_nlink == 0) {
4226 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
4227 if (ret)
4228 goto out;
4229 }
4230
4231out:
4232 btrfs_end_transaction(trans);
4233 btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
4234 return ret;
4235}
4236
4237static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4238 struct inode *dir, struct dentry *dentry)
4239{
4240 struct btrfs_root *root = BTRFS_I(dir)->root;
4241 struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
4242 struct btrfs_path *path;
4243 struct extent_buffer *leaf;
4244 struct btrfs_dir_item *di;
4245 struct btrfs_key key;
4246 const char *name = dentry->d_name.name;
4247 int name_len = dentry->d_name.len;
4248 u64 index;
4249 int ret;
4250 u64 objectid;
4251 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
4252
4253 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
4254 objectid = inode->root->root_key.objectid;
4255 } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
4256 objectid = inode->location.objectid;
4257 } else {
4258 WARN_ON(1);
4259 return -EINVAL;
4260 }
4261
4262 path = btrfs_alloc_path();
4263 if (!path)
4264 return -ENOMEM;
4265
4266 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4267 name, name_len, -1);
4268 if (IS_ERR_OR_NULL(di)) {
4269 ret = di ? PTR_ERR(di) : -ENOENT;
4270 goto out;
4271 }
4272
4273 leaf = path->nodes[0];
4274 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4275 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4276 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4277 if (ret) {
4278 btrfs_abort_transaction(trans, ret);
4279 goto out;
4280 }
4281 btrfs_release_path(path);
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
4293 di = btrfs_search_dir_index_item(root, path, dir_ino,
4294 name, name_len);
4295 if (IS_ERR_OR_NULL(di)) {
4296 if (!di)
4297 ret = -ENOENT;
4298 else
4299 ret = PTR_ERR(di);
4300 btrfs_abort_transaction(trans, ret);
4301 goto out;
4302 }
4303
4304 leaf = path->nodes[0];
4305 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4306 index = key.offset;
4307 btrfs_release_path(path);
4308 } else {
4309 ret = btrfs_del_root_ref(trans, objectid,
4310 root->root_key.objectid, dir_ino,
4311 &index, name, name_len);
4312 if (ret) {
4313 btrfs_abort_transaction(trans, ret);
4314 goto out;
4315 }
4316 }
4317
4318 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4319 if (ret) {
4320 btrfs_abort_transaction(trans, ret);
4321 goto out;
4322 }
4323
4324 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
4325 inode_inc_iversion(dir);
4326 dir->i_mtime = dir->i_ctime = current_time(dir);
4327 ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
4328 if (ret)
4329 btrfs_abort_transaction(trans, ret);
4330out:
4331 btrfs_free_path(path);
4332 return ret;
4333}
4334
4335
4336
4337
4338
4339static noinline int may_destroy_subvol(struct btrfs_root *root)
4340{
4341 struct btrfs_fs_info *fs_info = root->fs_info;
4342 struct btrfs_path *path;
4343 struct btrfs_dir_item *di;
4344 struct btrfs_key key;
4345 u64 dir_id;
4346 int ret;
4347
4348 path = btrfs_alloc_path();
4349 if (!path)
4350 return -ENOMEM;
4351
4352
4353 dir_id = btrfs_super_root_dir(fs_info->super_copy);
4354 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
4355 dir_id, "default", 7, 0);
4356 if (di && !IS_ERR(di)) {
4357 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
4358 if (key.objectid == root->root_key.objectid) {
4359 ret = -EPERM;
4360 btrfs_err(fs_info,
4361 "deleting default subvolume %llu is not allowed",
4362 key.objectid);
4363 goto out;
4364 }
4365 btrfs_release_path(path);
4366 }
4367
4368 key.objectid = root->root_key.objectid;
4369 key.type = BTRFS_ROOT_REF_KEY;
4370 key.offset = (u64)-1;
4371
4372 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4373 if (ret < 0)
4374 goto out;
4375 BUG_ON(ret == 0);
4376
4377 ret = 0;
4378 if (path->slots[0] > 0) {
4379 path->slots[0]--;
4380 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4381 if (key.objectid == root->root_key.objectid &&
4382 key.type == BTRFS_ROOT_REF_KEY)
4383 ret = -ENOTEMPTY;
4384 }
4385out:
4386 btrfs_free_path(path);
4387 return ret;
4388}
4389
4390
4391static void btrfs_prune_dentries(struct btrfs_root *root)
4392{
4393 struct btrfs_fs_info *fs_info = root->fs_info;
4394 struct rb_node *node;
4395 struct rb_node *prev;
4396 struct btrfs_inode *entry;
4397 struct inode *inode;
4398 u64 objectid = 0;
4399
4400 if (!BTRFS_FS_ERROR(fs_info))
4401 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
4402
4403 spin_lock(&root->inode_lock);
4404again:
4405 node = root->inode_tree.rb_node;
4406 prev = NULL;
4407 while (node) {
4408 prev = node;
4409 entry = rb_entry(node, struct btrfs_inode, rb_node);
4410
4411 if (objectid < btrfs_ino(entry))
4412 node = node->rb_left;
4413 else if (objectid > btrfs_ino(entry))
4414 node = node->rb_right;
4415 else
4416 break;
4417 }
4418 if (!node) {
4419 while (prev) {
4420 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4421 if (objectid <= btrfs_ino(entry)) {
4422 node = prev;
4423 break;
4424 }
4425 prev = rb_next(prev);
4426 }
4427 }
4428 while (node) {
4429 entry = rb_entry(node, struct btrfs_inode, rb_node);
4430 objectid = btrfs_ino(entry) + 1;
4431 inode = igrab(&entry->vfs_inode);
4432 if (inode) {
4433 spin_unlock(&root->inode_lock);
4434 if (atomic_read(&inode->i_count) > 1)
4435 d_prune_aliases(inode);
4436
4437
4438
4439
4440 iput(inode);
4441 cond_resched();
4442 spin_lock(&root->inode_lock);
4443 goto again;
4444 }
4445
4446 if (cond_resched_lock(&root->inode_lock))
4447 goto again;
4448
4449 node = rb_next(node);
4450 }
4451 spin_unlock(&root->inode_lock);
4452}
4453
4454int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4455{
4456 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
4457 struct btrfs_root *root = BTRFS_I(dir)->root;
4458 struct inode *inode = d_inode(dentry);
4459 struct btrfs_root *dest = BTRFS_I(inode)->root;
4460 struct btrfs_trans_handle *trans;
4461 struct btrfs_block_rsv block_rsv;
4462 u64 root_flags;
4463 int ret;
4464
4465
4466
4467
4468
4469
4470 spin_lock(&dest->root_item_lock);
4471 if (dest->send_in_progress) {
4472 spin_unlock(&dest->root_item_lock);
4473 btrfs_warn(fs_info,
4474 "attempt to delete subvolume %llu during send",
4475 dest->root_key.objectid);
4476 return -EPERM;
4477 }
4478 if (atomic_read(&dest->nr_swapfiles)) {
4479 spin_unlock(&dest->root_item_lock);
4480 btrfs_warn(fs_info,
4481 "attempt to delete subvolume %llu with active swapfile",
4482 root->root_key.objectid);
4483 return -EPERM;
4484 }
4485 root_flags = btrfs_root_flags(&dest->root_item);
4486 btrfs_set_root_flags(&dest->root_item,
4487 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
4488 spin_unlock(&dest->root_item_lock);
4489
4490 down_write(&fs_info->subvol_sem);
4491
4492 ret = may_destroy_subvol(dest);
4493 if (ret)
4494 goto out_up_write;
4495
4496 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
4497
4498
4499
4500
4501
4502 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
4503 if (ret)
4504 goto out_up_write;
4505
4506 trans = btrfs_start_transaction(root, 0);
4507 if (IS_ERR(trans)) {
4508 ret = PTR_ERR(trans);
4509 goto out_release;
4510 }
4511 trans->block_rsv = &block_rsv;
4512 trans->bytes_reserved = block_rsv.size;
4513
4514 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4515
4516 ret = btrfs_unlink_subvol(trans, dir, dentry);
4517 if (ret) {
4518 btrfs_abort_transaction(trans, ret);
4519 goto out_end_trans;
4520 }
4521
4522 ret = btrfs_record_root_in_trans(trans, dest);
4523 if (ret) {
4524 btrfs_abort_transaction(trans, ret);
4525 goto out_end_trans;
4526 }
4527
4528 memset(&dest->root_item.drop_progress, 0,
4529 sizeof(dest->root_item.drop_progress));
4530 btrfs_set_root_drop_level(&dest->root_item, 0);
4531 btrfs_set_root_refs(&dest->root_item, 0);
4532
4533 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4534 ret = btrfs_insert_orphan_item(trans,
4535 fs_info->tree_root,
4536 dest->root_key.objectid);
4537 if (ret) {
4538 btrfs_abort_transaction(trans, ret);
4539 goto out_end_trans;
4540 }
4541 }
4542
4543 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4544 BTRFS_UUID_KEY_SUBVOL,
4545 dest->root_key.objectid);
4546 if (ret && ret != -ENOENT) {
4547 btrfs_abort_transaction(trans, ret);
4548 goto out_end_trans;
4549 }
4550 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4551 ret = btrfs_uuid_tree_remove(trans,
4552 dest->root_item.received_uuid,
4553 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4554 dest->root_key.objectid);
4555 if (ret && ret != -ENOENT) {
4556 btrfs_abort_transaction(trans, ret);
4557 goto out_end_trans;
4558 }
4559 }
4560
4561 free_anon_bdev(dest->anon_dev);
4562 dest->anon_dev = 0;
4563out_end_trans:
4564 trans->block_rsv = NULL;
4565 trans->bytes_reserved = 0;
4566 ret = btrfs_end_transaction(trans);
4567 inode->i_flags |= S_DEAD;
4568out_release:
4569 btrfs_subvolume_release_metadata(root, &block_rsv);
4570out_up_write:
4571 up_write(&fs_info->subvol_sem);
4572 if (ret) {
4573 spin_lock(&dest->root_item_lock);
4574 root_flags = btrfs_root_flags(&dest->root_item);
4575 btrfs_set_root_flags(&dest->root_item,
4576 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4577 spin_unlock(&dest->root_item_lock);
4578 } else {
4579 d_invalidate(dentry);
4580 btrfs_prune_dentries(dest);
4581 ASSERT(dest->send_in_progress == 0);
4582 }
4583
4584 return ret;
4585}
4586
4587static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4588{
4589 struct inode *inode = d_inode(dentry);
4590 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
4591 int err = 0;
4592 struct btrfs_trans_handle *trans;
4593 u64 last_unlink_trans;
4594
4595 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4596 return -ENOTEMPTY;
4597 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID) {
4598 if (unlikely(btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))) {
4599 btrfs_err(fs_info,
4600 "extent tree v2 doesn't support snapshot deletion yet");
4601 return -EOPNOTSUPP;
4602 }
4603 return btrfs_delete_subvolume(dir, dentry);
4604 }
4605
4606 trans = __unlink_start_trans(dir);
4607 if (IS_ERR(trans))
4608 return PTR_ERR(trans);
4609
4610 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4611 err = btrfs_unlink_subvol(trans, dir, dentry);
4612 goto out;
4613 }
4614
4615 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4616 if (err)
4617 goto out;
4618
4619 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4620
4621
4622 err = btrfs_unlink_inode(trans, BTRFS_I(dir),
4623 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4624 dentry->d_name.len);
4625 if (!err) {
4626 btrfs_i_size_write(BTRFS_I(inode), 0);
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638 if (last_unlink_trans >= trans->transid)
4639 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4640 }
4641out:
4642 btrfs_end_transaction(trans);
4643 btrfs_btree_balance_dirty(fs_info);
4644
4645 return err;
4646}
4647
4648
4649
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
4660 int front)
4661{
4662 struct btrfs_fs_info *fs_info = inode->root->fs_info;
4663 struct address_space *mapping = inode->vfs_inode.i_mapping;
4664 struct extent_io_tree *io_tree = &inode->io_tree;
4665 struct btrfs_ordered_extent *ordered;
4666 struct extent_state *cached_state = NULL;
4667 struct extent_changeset *data_reserved = NULL;
4668 bool only_release_metadata = false;
4669 u32 blocksize = fs_info->sectorsize;
4670 pgoff_t index = from >> PAGE_SHIFT;
4671 unsigned offset = from & (blocksize - 1);
4672 struct page *page;
4673 gfp_t mask = btrfs_alloc_write_mask(mapping);
4674 size_t write_bytes = blocksize;
4675 int ret = 0;
4676 u64 block_start;
4677 u64 block_end;
4678
4679 if (IS_ALIGNED(offset, blocksize) &&
4680 (!len || IS_ALIGNED(len, blocksize)))
4681 goto out;
4682
4683 block_start = round_down(from, blocksize);
4684 block_end = block_start + blocksize - 1;
4685
4686 ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
4687 blocksize);
4688 if (ret < 0) {
4689 if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
4690
4691 only_release_metadata = true;
4692 } else {
4693 goto out;
4694 }
4695 }
4696 ret = btrfs_delalloc_reserve_metadata(inode, blocksize, blocksize);
4697 if (ret < 0) {
4698 if (!only_release_metadata)
4699 btrfs_free_reserved_data_space(inode, data_reserved,
4700 block_start, blocksize);
4701 goto out;
4702 }
4703again:
4704 page = find_or_create_page(mapping, index, mask);
4705 if (!page) {
4706 btrfs_delalloc_release_space(inode, data_reserved, block_start,
4707 blocksize, true);
4708 btrfs_delalloc_release_extents(inode, blocksize);
4709 ret = -ENOMEM;
4710 goto out;
4711 }
4712 ret = set_page_extent_mapped(page);
4713 if (ret < 0)
4714 goto out_unlock;
4715
4716 if (!PageUptodate(page)) {
4717 ret = btrfs_readpage(NULL, page);
4718 lock_page(page);
4719 if (page->mapping != mapping) {
4720 unlock_page(page);
4721 put_page(page);
4722 goto again;
4723 }
4724 if (!PageUptodate(page)) {
4725 ret = -EIO;
4726 goto out_unlock;
4727 }
4728 }
4729 wait_on_page_writeback(page);
4730
4731 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4732
4733 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4734 if (ordered) {
4735 unlock_extent_cached(io_tree, block_start, block_end,
4736 &cached_state);
4737 unlock_page(page);
4738 put_page(page);
4739 btrfs_start_ordered_extent(ordered, 1);
4740 btrfs_put_ordered_extent(ordered);
4741 goto again;
4742 }
4743
4744 clear_extent_bit(&inode->io_tree, block_start, block_end,
4745 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4746 0, 0, &cached_state);
4747
4748 ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
4749 &cached_state);
4750 if (ret) {
4751 unlock_extent_cached(io_tree, block_start, block_end,
4752 &cached_state);
4753 goto out_unlock;
4754 }
4755
4756 if (offset != blocksize) {
4757 if (!len)
4758 len = blocksize - offset;
4759 if (front)
4760 memzero_page(page, (block_start - page_offset(page)),
4761 offset);
4762 else
4763 memzero_page(page, (block_start - page_offset(page)) + offset,
4764 len);
4765 flush_dcache_page(page);
4766 }
4767 btrfs_page_clear_checked(fs_info, page, block_start,
4768 block_end + 1 - block_start);
4769 btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start);
4770 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
4771
4772 if (only_release_metadata)
4773 set_extent_bit(&inode->io_tree, block_start, block_end,
4774 EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL);
4775
4776out_unlock:
4777 if (ret) {
4778 if (only_release_metadata)
4779 btrfs_delalloc_release_metadata(inode, blocksize, true);
4780 else
4781 btrfs_delalloc_release_space(inode, data_reserved,
4782 block_start, blocksize, true);
4783 }
4784 btrfs_delalloc_release_extents(inode, blocksize);
4785 unlock_page(page);
4786 put_page(page);
4787out:
4788 if (only_release_metadata)
4789 btrfs_check_nocow_unlock(inode);
4790 extent_changeset_free(data_reserved);
4791 return ret;
4792}
4793
4794static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
4795 u64 offset, u64 len)
4796{
4797 struct btrfs_fs_info *fs_info = root->fs_info;
4798 struct btrfs_trans_handle *trans;
4799 struct btrfs_drop_extents_args drop_args = { 0 };
4800 int ret;
4801
4802
4803
4804
4805
4806
4807
4808 if (btrfs_fs_incompat(fs_info, NO_HOLES))
4809 return 0;
4810
4811
4812
4813
4814
4815
4816 trans = btrfs_start_transaction(root, 3);
4817 if (IS_ERR(trans))
4818 return PTR_ERR(trans);
4819
4820 drop_args.start = offset;
4821 drop_args.end = offset + len;
4822 drop_args.drop_cache = true;
4823
4824 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
4825 if (ret) {
4826 btrfs_abort_transaction(trans, ret);
4827 btrfs_end_transaction(trans);
4828 return ret;
4829 }
4830
4831 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
4832 offset, 0, 0, len, 0, len, 0, 0, 0);
4833 if (ret) {
4834 btrfs_abort_transaction(trans, ret);
4835 } else {
4836 btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found);
4837 btrfs_update_inode(trans, root, inode);
4838 }
4839 btrfs_end_transaction(trans);
4840 return ret;
4841}
4842
4843
4844
4845
4846
4847
4848
4849int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
4850{
4851 struct btrfs_root *root = inode->root;
4852 struct btrfs_fs_info *fs_info = root->fs_info;
4853 struct extent_io_tree *io_tree = &inode->io_tree;
4854 struct extent_map *em = NULL;
4855 struct extent_state *cached_state = NULL;
4856 struct extent_map_tree *em_tree = &inode->extent_tree;
4857 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
4858 u64 block_end = ALIGN(size, fs_info->sectorsize);
4859 u64 last_byte;
4860 u64 cur_offset;
4861 u64 hole_size;
4862 int err = 0;
4863
4864
4865
4866
4867
4868
4869 err = btrfs_truncate_block(inode, oldsize, 0, 0);
4870 if (err)
4871 return err;
4872
4873 if (size <= hole_start)
4874 return 0;
4875
4876 btrfs_lock_and_flush_ordered_range(inode, hole_start, block_end - 1,
4877 &cached_state);
4878 cur_offset = hole_start;
4879 while (1) {
4880 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
4881 block_end - cur_offset);
4882 if (IS_ERR(em)) {
4883 err = PTR_ERR(em);
4884 em = NULL;
4885 break;
4886 }
4887 last_byte = min(extent_map_end(em), block_end);
4888 last_byte = ALIGN(last_byte, fs_info->sectorsize);
4889 hole_size = last_byte - cur_offset;
4890
4891 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4892 struct extent_map *hole_em;
4893
4894 err = maybe_insert_hole(root, inode, cur_offset,
4895 hole_size);
4896 if (err)
4897 break;
4898
4899 err = btrfs_inode_set_file_extent_range(inode,
4900 cur_offset, hole_size);
4901 if (err)
4902 break;
4903
4904 btrfs_drop_extent_cache(inode, cur_offset,
4905 cur_offset + hole_size - 1, 0);
4906 hole_em = alloc_extent_map();
4907 if (!hole_em) {
4908 btrfs_set_inode_full_sync(inode);
4909 goto next;
4910 }
4911 hole_em->start = cur_offset;
4912 hole_em->len = hole_size;
4913 hole_em->orig_start = cur_offset;
4914
4915 hole_em->block_start = EXTENT_MAP_HOLE;
4916 hole_em->block_len = 0;
4917 hole_em->orig_block_len = 0;
4918 hole_em->ram_bytes = hole_size;
4919 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4920 hole_em->generation = fs_info->generation;
4921
4922 while (1) {
4923 write_lock(&em_tree->lock);
4924 err = add_extent_mapping(em_tree, hole_em, 1);
4925 write_unlock(&em_tree->lock);
4926 if (err != -EEXIST)
4927 break;
4928 btrfs_drop_extent_cache(inode, cur_offset,
4929 cur_offset +
4930 hole_size - 1, 0);
4931 }
4932 free_extent_map(hole_em);
4933 } else {
4934 err = btrfs_inode_set_file_extent_range(inode,
4935 cur_offset, hole_size);
4936 if (err)
4937 break;
4938 }
4939next:
4940 free_extent_map(em);
4941 em = NULL;
4942 cur_offset = last_byte;
4943 if (cur_offset >= block_end)
4944 break;
4945 }
4946 free_extent_map(em);
4947 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
4948 return err;
4949}
4950
4951static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4952{
4953 struct btrfs_root *root = BTRFS_I(inode)->root;
4954 struct btrfs_trans_handle *trans;
4955 loff_t oldsize = i_size_read(inode);
4956 loff_t newsize = attr->ia_size;
4957 int mask = attr->ia_valid;
4958 int ret;
4959
4960
4961
4962
4963
4964
4965
4966 if (newsize != oldsize) {
4967 inode_inc_iversion(inode);
4968 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
4969 inode->i_ctime = inode->i_mtime =
4970 current_time(inode);
4971 }
4972
4973 if (newsize > oldsize) {
4974
4975
4976
4977
4978
4979
4980
4981 btrfs_drew_write_lock(&root->snapshot_lock);
4982 ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize);
4983 if (ret) {
4984 btrfs_drew_write_unlock(&root->snapshot_lock);
4985 return ret;
4986 }
4987
4988 trans = btrfs_start_transaction(root, 1);
4989 if (IS_ERR(trans)) {
4990 btrfs_drew_write_unlock(&root->snapshot_lock);
4991 return PTR_ERR(trans);
4992 }
4993
4994 i_size_write(inode, newsize);
4995 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
4996 pagecache_isize_extended(inode, oldsize, newsize);
4997 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
4998 btrfs_drew_write_unlock(&root->snapshot_lock);
4999 btrfs_end_transaction(trans);
5000 } else {
5001 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5002
5003 if (btrfs_is_zoned(fs_info)) {
5004 ret = btrfs_wait_ordered_range(inode,
5005 ALIGN(newsize, fs_info->sectorsize),
5006 (u64)-1);
5007 if (ret)
5008 return ret;
5009 }
5010
5011
5012
5013
5014
5015
5016 if (newsize == 0)
5017 set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
5018 &BTRFS_I(inode)->runtime_flags);
5019
5020 truncate_setsize(inode, newsize);
5021
5022 inode_dio_wait(inode);
5023
5024 ret = btrfs_truncate(inode, newsize == oldsize);
5025 if (ret && inode->i_nlink) {
5026 int err;
5027
5028
5029
5030
5031
5032
5033
5034 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5035 if (err)
5036 return err;
5037 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5038 }
5039 }
5040
5041 return ret;
5042}
5043
5044static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
5045 struct iattr *attr)
5046{
5047 struct inode *inode = d_inode(dentry);
5048 struct btrfs_root *root = BTRFS_I(inode)->root;
5049 int err;
5050
5051 if (btrfs_root_readonly(root))
5052 return -EROFS;
5053
5054 err = setattr_prepare(mnt_userns, dentry, attr);
5055 if (err)
5056 return err;
5057
5058 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5059 err = btrfs_setsize(inode, attr);
5060 if (err)
5061 return err;
5062 }
5063
5064 if (attr->ia_valid) {
5065 setattr_copy(mnt_userns, inode, attr);
5066 inode_inc_iversion(inode);
5067 err = btrfs_dirty_inode(inode);
5068
5069 if (!err && attr->ia_valid & ATTR_MODE)
5070 err = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
5071 }
5072
5073 return err;
5074}
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089static void evict_inode_truncate_pages(struct inode *inode)
5090{
5091 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5092 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5093 struct rb_node *node;
5094
5095 ASSERT(inode->i_state & I_FREEING);
5096 truncate_inode_pages_final(&inode->i_data);
5097
5098 write_lock(&map_tree->lock);
5099 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
5100 struct extent_map *em;
5101
5102 node = rb_first_cached(&map_tree->map);
5103 em = rb_entry(node, struct extent_map, rb_node);
5104 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5105 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5106 remove_extent_mapping(map_tree, em);
5107 free_extent_map(em);
5108 if (need_resched()) {
5109 write_unlock(&map_tree->lock);
5110 cond_resched();
5111 write_lock(&map_tree->lock);
5112 }
5113 }
5114 write_unlock(&map_tree->lock);
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132 spin_lock(&io_tree->lock);
5133 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5134 struct extent_state *state;
5135 struct extent_state *cached_state = NULL;
5136 u64 start;
5137 u64 end;
5138 unsigned state_flags;
5139
5140 node = rb_first(&io_tree->state);
5141 state = rb_entry(node, struct extent_state, rb_node);
5142 start = state->start;
5143 end = state->end;
5144 state_flags = state->state;
5145 spin_unlock(&io_tree->lock);
5146
5147 lock_extent_bits(io_tree, start, end, &cached_state);
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157 if (state_flags & EXTENT_DELALLOC)
5158 btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
5159 end - start + 1);
5160
5161 clear_extent_bit(io_tree, start, end,
5162 EXTENT_LOCKED | EXTENT_DELALLOC |
5163 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
5164 &cached_state);
5165
5166 cond_resched();
5167 spin_lock(&io_tree->lock);
5168 }
5169 spin_unlock(&io_tree->lock);
5170}
5171
5172static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5173 struct btrfs_block_rsv *rsv)
5174{
5175 struct btrfs_fs_info *fs_info = root->fs_info;
5176 struct btrfs_trans_handle *trans;
5177 u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
5178 int ret;
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194 ret = btrfs_block_rsv_refill(fs_info, rsv, rsv->size + delayed_refs_extra,
5195 BTRFS_RESERVE_FLUSH_EVICT);
5196 if (ret) {
5197 ret = btrfs_block_rsv_refill(fs_info, rsv, rsv->size,
5198 BTRFS_RESERVE_FLUSH_EVICT);
5199 if (ret) {
5200 btrfs_warn(fs_info,
5201 "could not allocate space for delete; will truncate on mount");
5202 return ERR_PTR(-ENOSPC);
5203 }
5204 delayed_refs_extra = 0;
5205 }
5206
5207 trans = btrfs_join_transaction(root);
5208 if (IS_ERR(trans))
5209 return trans;
5210
5211 if (delayed_refs_extra) {
5212 trans->block_rsv = &fs_info->trans_block_rsv;
5213 trans->bytes_reserved = delayed_refs_extra;
5214 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5215 delayed_refs_extra, 1);
5216 }
5217 return trans;
5218}
5219
5220void btrfs_evict_inode(struct inode *inode)
5221{
5222 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5223 struct btrfs_trans_handle *trans;
5224 struct btrfs_root *root = BTRFS_I(inode)->root;
5225 struct btrfs_block_rsv *rsv;
5226 int ret;
5227
5228 trace_btrfs_inode_evict(inode);
5229
5230 if (!root) {
5231 fsverity_cleanup_inode(inode);
5232 clear_inode(inode);
5233 return;
5234 }
5235
5236 evict_inode_truncate_pages(inode);
5237
5238 if (inode->i_nlink &&
5239 ((btrfs_root_refs(&root->root_item) != 0 &&
5240 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5241 btrfs_is_free_space_inode(BTRFS_I(inode))))
5242 goto no_delete;
5243
5244 if (is_bad_inode(inode))
5245 goto no_delete;
5246
5247 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5248
5249 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5250 goto no_delete;
5251
5252 if (inode->i_nlink > 0) {
5253 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5254 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5255 goto no_delete;
5256 }
5257
5258
5259
5260
5261
5262 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5263 if (ret)
5264 goto no_delete;
5265
5266
5267
5268
5269
5270
5271
5272 btrfs_kill_delayed_inode_items(BTRFS_I(inode));
5273
5274 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5275 if (!rsv)
5276 goto no_delete;
5277 rsv->size = btrfs_calc_metadata_size(fs_info, 1);
5278 rsv->failfast = 1;
5279
5280 btrfs_i_size_write(BTRFS_I(inode), 0);
5281
5282 while (1) {
5283 struct btrfs_truncate_control control = {
5284 .inode = BTRFS_I(inode),
5285 .ino = btrfs_ino(BTRFS_I(inode)),
5286 .new_size = 0,
5287 .min_type = 0,
5288 };
5289
5290 trans = evict_refill_and_join(root, rsv);
5291 if (IS_ERR(trans))
5292 goto free_rsv;
5293
5294 trans->block_rsv = rsv;
5295
5296 ret = btrfs_truncate_inode_items(trans, root, &control);
5297 trans->block_rsv = &fs_info->trans_block_rsv;
5298 btrfs_end_transaction(trans);
5299 btrfs_btree_balance_dirty(fs_info);
5300 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5301 goto free_rsv;
5302 else if (!ret)
5303 break;
5304 }
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315 trans = evict_refill_and_join(root, rsv);
5316 if (!IS_ERR(trans)) {
5317 trans->block_rsv = rsv;
5318 btrfs_orphan_del(trans, BTRFS_I(inode));
5319 trans->block_rsv = &fs_info->trans_block_rsv;
5320 btrfs_end_transaction(trans);
5321 }
5322
5323free_rsv:
5324 btrfs_free_block_rsv(fs_info, rsv);
5325no_delete:
5326
5327
5328
5329
5330
5331 btrfs_remove_delayed_node(BTRFS_I(inode));
5332 fsverity_cleanup_inode(inode);
5333 clear_inode(inode);
5334}
5335
5336
5337
5338
5339
5340
5341
5342
5343static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5344 struct btrfs_key *location, u8 *type)
5345{
5346 const char *name = dentry->d_name.name;
5347 int namelen = dentry->d_name.len;
5348 struct btrfs_dir_item *di;
5349 struct btrfs_path *path;
5350 struct btrfs_root *root = BTRFS_I(dir)->root;
5351 int ret = 0;
5352
5353 path = btrfs_alloc_path();
5354 if (!path)
5355 return -ENOMEM;
5356
5357 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5358 name, namelen, 0);
5359 if (IS_ERR_OR_NULL(di)) {
5360 ret = di ? PTR_ERR(di) : -ENOENT;
5361 goto out;
5362 }
5363
5364 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5365 if (location->type != BTRFS_INODE_ITEM_KEY &&
5366 location->type != BTRFS_ROOT_ITEM_KEY) {
5367 ret = -EUCLEAN;
5368 btrfs_warn(root->fs_info,
5369"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5370 __func__, name, btrfs_ino(BTRFS_I(dir)),
5371 location->objectid, location->type, location->offset);
5372 }
5373 if (!ret)
5374 *type = btrfs_dir_type(path->nodes[0], di);
5375out:
5376 btrfs_free_path(path);
5377 return ret;
5378}
5379
5380
5381
5382
5383
5384
5385static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5386 struct inode *dir,
5387 struct dentry *dentry,
5388 struct btrfs_key *location,
5389 struct btrfs_root **sub_root)
5390{
5391 struct btrfs_path *path;
5392 struct btrfs_root *new_root;
5393 struct btrfs_root_ref *ref;
5394 struct extent_buffer *leaf;
5395 struct btrfs_key key;
5396 int ret;
5397 int err = 0;
5398
5399 path = btrfs_alloc_path();
5400 if (!path) {
5401 err = -ENOMEM;
5402 goto out;
5403 }
5404
5405 err = -ENOENT;
5406 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5407 key.type = BTRFS_ROOT_REF_KEY;
5408 key.offset = location->objectid;
5409
5410 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5411 if (ret) {
5412 if (ret < 0)
5413 err = ret;
5414 goto out;
5415 }
5416
5417 leaf = path->nodes[0];
5418 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5419 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5420 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5421 goto out;
5422
5423 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5424 (unsigned long)(ref + 1),
5425 dentry->d_name.len);
5426 if (ret)
5427 goto out;
5428
5429 btrfs_release_path(path);
5430
5431 new_root = btrfs_get_fs_root(fs_info, location->objectid, true);
5432 if (IS_ERR(new_root)) {
5433 err = PTR_ERR(new_root);
5434 goto out;
5435 }
5436
5437 *sub_root = new_root;
5438 location->objectid = btrfs_root_dirid(&new_root->root_item);
5439 location->type = BTRFS_INODE_ITEM_KEY;
5440 location->offset = 0;
5441 err = 0;
5442out:
5443 btrfs_free_path(path);
5444 return err;
5445}
5446
5447static void inode_tree_add(struct inode *inode)
5448{
5449 struct btrfs_root *root = BTRFS_I(inode)->root;
5450 struct btrfs_inode *entry;
5451 struct rb_node **p;
5452 struct rb_node *parent;
5453 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5454 u64 ino = btrfs_ino(BTRFS_I(inode));
5455
5456 if (inode_unhashed(inode))
5457 return;
5458 parent = NULL;
5459 spin_lock(&root->inode_lock);
5460 p = &root->inode_tree.rb_node;
5461 while (*p) {
5462 parent = *p;
5463 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5464
5465 if (ino < btrfs_ino(entry))
5466 p = &parent->rb_left;
5467 else if (ino > btrfs_ino(entry))
5468 p = &parent->rb_right;
5469 else {
5470 WARN_ON(!(entry->vfs_inode.i_state &
5471 (I_WILL_FREE | I_FREEING)));
5472 rb_replace_node(parent, new, &root->inode_tree);
5473 RB_CLEAR_NODE(parent);
5474 spin_unlock(&root->inode_lock);
5475 return;
5476 }
5477 }
5478 rb_link_node(new, parent, p);
5479 rb_insert_color(new, &root->inode_tree);
5480 spin_unlock(&root->inode_lock);
5481}
5482
5483static void inode_tree_del(struct btrfs_inode *inode)
5484{
5485 struct btrfs_root *root = inode->root;
5486 int empty = 0;
5487
5488 spin_lock(&root->inode_lock);
5489 if (!RB_EMPTY_NODE(&inode->rb_node)) {
5490 rb_erase(&inode->rb_node, &root->inode_tree);
5491 RB_CLEAR_NODE(&inode->rb_node);
5492 empty = RB_EMPTY_ROOT(&root->inode_tree);
5493 }
5494 spin_unlock(&root->inode_lock);
5495
5496 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5497 spin_lock(&root->inode_lock);
5498 empty = RB_EMPTY_ROOT(&root->inode_tree);
5499 spin_unlock(&root->inode_lock);
5500 if (empty)
5501 btrfs_add_dead_root(root);
5502 }
5503}
5504
5505
5506static int btrfs_init_locked_inode(struct inode *inode, void *p)
5507{
5508 struct btrfs_iget_args *args = p;
5509
5510 inode->i_ino = args->ino;
5511 BTRFS_I(inode)->location.objectid = args->ino;
5512 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5513 BTRFS_I(inode)->location.offset = 0;
5514 BTRFS_I(inode)->root = btrfs_grab_root(args->root);
5515 BUG_ON(args->root && !BTRFS_I(inode)->root);
5516 return 0;
5517}
5518
5519static int btrfs_find_actor(struct inode *inode, void *opaque)
5520{
5521 struct btrfs_iget_args *args = opaque;
5522
5523 return args->ino == BTRFS_I(inode)->location.objectid &&
5524 args->root == BTRFS_I(inode)->root;
5525}
5526
5527static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
5528 struct btrfs_root *root)
5529{
5530 struct inode *inode;
5531 struct btrfs_iget_args args;
5532 unsigned long hashval = btrfs_inode_hash(ino, root);
5533
5534 args.ino = ino;
5535 args.root = root;
5536
5537 inode = iget5_locked(s, hashval, btrfs_find_actor,
5538 btrfs_init_locked_inode,
5539 (void *)&args);
5540 return inode;
5541}
5542
5543
5544
5545
5546
5547
5548
5549struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
5550 struct btrfs_root *root, struct btrfs_path *path)
5551{
5552 struct inode *inode;
5553
5554 inode = btrfs_iget_locked(s, ino, root);
5555 if (!inode)
5556 return ERR_PTR(-ENOMEM);
5557
5558 if (inode->i_state & I_NEW) {
5559 int ret;
5560
5561 ret = btrfs_read_locked_inode(inode, path);
5562 if (!ret) {
5563 inode_tree_add(inode);
5564 unlock_new_inode(inode);
5565 } else {
5566 iget_failed(inode);
5567
5568
5569
5570
5571
5572 if (ret > 0)
5573 ret = -ENOENT;
5574 inode = ERR_PTR(ret);
5575 }
5576 }
5577
5578 return inode;
5579}
5580
5581struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root)
5582{
5583 return btrfs_iget_path(s, ino, root, NULL);
5584}
5585
5586static struct inode *new_simple_dir(struct super_block *s,
5587 struct btrfs_key *key,
5588 struct btrfs_root *root)
5589{
5590 struct inode *inode = new_inode(s);
5591
5592 if (!inode)
5593 return ERR_PTR(-ENOMEM);
5594
5595 BTRFS_I(inode)->root = btrfs_grab_root(root);
5596 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5597 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5598
5599 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5600
5601
5602
5603
5604 inode->i_op = &simple_dir_inode_operations;
5605 inode->i_opflags &= ~IOP_XATTR;
5606 inode->i_fop = &simple_dir_operations;
5607 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5608 inode->i_mtime = current_time(inode);
5609 inode->i_atime = inode->i_mtime;
5610 inode->i_ctime = inode->i_mtime;
5611 BTRFS_I(inode)->i_otime = inode->i_mtime;
5612
5613 return inode;
5614}
5615
5616static_assert(BTRFS_FT_UNKNOWN == FT_UNKNOWN);
5617static_assert(BTRFS_FT_REG_FILE == FT_REG_FILE);
5618static_assert(BTRFS_FT_DIR == FT_DIR);
5619static_assert(BTRFS_FT_CHRDEV == FT_CHRDEV);
5620static_assert(BTRFS_FT_BLKDEV == FT_BLKDEV);
5621static_assert(BTRFS_FT_FIFO == FT_FIFO);
5622static_assert(BTRFS_FT_SOCK == FT_SOCK);
5623static_assert(BTRFS_FT_SYMLINK == FT_SYMLINK);
5624
5625static inline u8 btrfs_inode_type(struct inode *inode)
5626{
5627 return fs_umode_to_ftype(inode->i_mode);
5628}
5629
5630struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5631{
5632 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5633 struct inode *inode;
5634 struct btrfs_root *root = BTRFS_I(dir)->root;
5635 struct btrfs_root *sub_root = root;
5636 struct btrfs_key location;
5637 u8 di_type = 0;
5638 int ret = 0;
5639
5640 if (dentry->d_name.len > BTRFS_NAME_LEN)
5641 return ERR_PTR(-ENAMETOOLONG);
5642
5643 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5644 if (ret < 0)
5645 return ERR_PTR(ret);
5646
5647 if (location.type == BTRFS_INODE_ITEM_KEY) {
5648 inode = btrfs_iget(dir->i_sb, location.objectid, root);
5649 if (IS_ERR(inode))
5650 return inode;
5651
5652
5653 if (btrfs_inode_type(inode) != di_type) {
5654 btrfs_crit(fs_info,
5655"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5656 inode->i_mode, btrfs_inode_type(inode),
5657 di_type);
5658 iput(inode);
5659 return ERR_PTR(-EUCLEAN);
5660 }
5661 return inode;
5662 }
5663
5664 ret = fixup_tree_root_location(fs_info, dir, dentry,
5665 &location, &sub_root);
5666 if (ret < 0) {
5667 if (ret != -ENOENT)
5668 inode = ERR_PTR(ret);
5669 else
5670 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5671 } else {
5672 inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
5673 }
5674 if (root != sub_root)
5675 btrfs_put_root(sub_root);
5676
5677 if (!IS_ERR(inode) && root != sub_root) {
5678 down_read(&fs_info->cleanup_work_sem);
5679 if (!sb_rdonly(inode->i_sb))
5680 ret = btrfs_orphan_cleanup(sub_root);
5681 up_read(&fs_info->cleanup_work_sem);
5682 if (ret) {
5683 iput(inode);
5684 inode = ERR_PTR(ret);
5685 }
5686 }
5687
5688 return inode;
5689}
5690
5691static int btrfs_dentry_delete(const struct dentry *dentry)
5692{
5693 struct btrfs_root *root;
5694 struct inode *inode = d_inode(dentry);
5695
5696 if (!inode && !IS_ROOT(dentry))
5697 inode = d_inode(dentry->d_parent);
5698
5699 if (inode) {
5700 root = BTRFS_I(inode)->root;
5701 if (btrfs_root_refs(&root->root_item) == 0)
5702 return 1;
5703
5704 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5705 return 1;
5706 }
5707 return 0;
5708}
5709
5710static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5711 unsigned int flags)
5712{
5713 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
5714
5715 if (inode == ERR_PTR(-ENOENT))
5716 inode = NULL;
5717 return d_splice_alias(inode, dentry);
5718}
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729static int btrfs_opendir(struct inode *inode, struct file *file)
5730{
5731 struct btrfs_file_private *private;
5732
5733 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5734 if (!private)
5735 return -ENOMEM;
5736 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5737 if (!private->filldir_buf) {
5738 kfree(private);
5739 return -ENOMEM;
5740 }
5741 file->private_data = private;
5742 return 0;
5743}
5744
5745struct dir_entry {
5746 u64 ino;
5747 u64 offset;
5748 unsigned type;
5749 int name_len;
5750};
5751
5752static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5753{
5754 while (entries--) {
5755 struct dir_entry *entry = addr;
5756 char *name = (char *)(entry + 1);
5757
5758 ctx->pos = get_unaligned(&entry->offset);
5759 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5760 get_unaligned(&entry->ino),
5761 get_unaligned(&entry->type)))
5762 return 1;
5763 addr += sizeof(struct dir_entry) +
5764 get_unaligned(&entry->name_len);
5765 ctx->pos++;
5766 }
5767 return 0;
5768}
5769
5770static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5771{
5772 struct inode *inode = file_inode(file);
5773 struct btrfs_root *root = BTRFS_I(inode)->root;
5774 struct btrfs_file_private *private = file->private_data;
5775 struct btrfs_dir_item *di;
5776 struct btrfs_key key;
5777 struct btrfs_key found_key;
5778 struct btrfs_path *path;
5779 void *addr;
5780 struct list_head ins_list;
5781 struct list_head del_list;
5782 int ret;
5783 struct extent_buffer *leaf;
5784 int slot;
5785 char *name_ptr;
5786 int name_len;
5787 int entries = 0;
5788 int total_len = 0;
5789 bool put = false;
5790 struct btrfs_key location;
5791
5792 if (!dir_emit_dots(file, ctx))
5793 return 0;
5794
5795 path = btrfs_alloc_path();
5796 if (!path)
5797 return -ENOMEM;
5798
5799 addr = private->filldir_buf;
5800 path->reada = READA_FORWARD;
5801
5802 INIT_LIST_HEAD(&ins_list);
5803 INIT_LIST_HEAD(&del_list);
5804 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5805
5806again:
5807 key.type = BTRFS_DIR_INDEX_KEY;
5808 key.offset = ctx->pos;
5809 key.objectid = btrfs_ino(BTRFS_I(inode));
5810
5811 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5812 if (ret < 0)
5813 goto err;
5814
5815 while (1) {
5816 struct dir_entry *entry;
5817
5818 leaf = path->nodes[0];
5819 slot = path->slots[0];
5820 if (slot >= btrfs_header_nritems(leaf)) {
5821 ret = btrfs_next_leaf(root, path);
5822 if (ret < 0)
5823 goto err;
5824 else if (ret > 0)
5825 break;
5826 continue;
5827 }
5828
5829 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5830
5831 if (found_key.objectid != key.objectid)
5832 break;
5833 if (found_key.type != BTRFS_DIR_INDEX_KEY)
5834 break;
5835 if (found_key.offset < ctx->pos)
5836 goto next;
5837 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
5838 goto next;
5839 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5840 name_len = btrfs_dir_name_len(leaf, di);
5841 if ((total_len + sizeof(struct dir_entry) + name_len) >=
5842 PAGE_SIZE) {
5843 btrfs_release_path(path);
5844 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5845 if (ret)
5846 goto nopos;
5847 addr = private->filldir_buf;
5848 entries = 0;
5849 total_len = 0;
5850 goto again;
5851 }
5852
5853 entry = addr;
5854 put_unaligned(name_len, &entry->name_len);
5855 name_ptr = (char *)(entry + 1);
5856 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
5857 name_len);
5858 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
5859 &entry->type);
5860 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5861 put_unaligned(location.objectid, &entry->ino);
5862 put_unaligned(found_key.offset, &entry->offset);
5863 entries++;
5864 addr += sizeof(struct dir_entry) + name_len;
5865 total_len += sizeof(struct dir_entry) + name_len;
5866next:
5867 path->slots[0]++;
5868 }
5869 btrfs_release_path(path);
5870
5871 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5872 if (ret)
5873 goto nopos;
5874
5875 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
5876 if (ret)
5877 goto nopos;
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896 if (ctx->pos >= INT_MAX)
5897 ctx->pos = LLONG_MAX;
5898 else
5899 ctx->pos = INT_MAX;
5900nopos:
5901 ret = 0;
5902err:
5903 if (put)
5904 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
5905 btrfs_free_path(path);
5906 return ret;
5907}
5908
5909
5910
5911
5912
5913
5914
5915static int btrfs_dirty_inode(struct inode *inode)
5916{
5917 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5918 struct btrfs_root *root = BTRFS_I(inode)->root;
5919 struct btrfs_trans_handle *trans;
5920 int ret;
5921
5922 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
5923 return 0;
5924
5925 trans = btrfs_join_transaction(root);
5926 if (IS_ERR(trans))
5927 return PTR_ERR(trans);
5928
5929 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
5930 if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
5931
5932 btrfs_end_transaction(trans);
5933 trans = btrfs_start_transaction(root, 1);
5934 if (IS_ERR(trans))
5935 return PTR_ERR(trans);
5936
5937 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
5938 }
5939 btrfs_end_transaction(trans);
5940 if (BTRFS_I(inode)->delayed_node)
5941 btrfs_balance_delayed_items(fs_info);
5942
5943 return ret;
5944}
5945
5946
5947
5948
5949
5950static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
5951 int flags)
5952{
5953 struct btrfs_root *root = BTRFS_I(inode)->root;
5954 bool dirty = flags & ~S_VERSION;
5955
5956 if (btrfs_root_readonly(root))
5957 return -EROFS;
5958
5959 if (flags & S_VERSION)
5960 dirty |= inode_maybe_inc_iversion(inode, dirty);
5961 if (flags & S_CTIME)
5962 inode->i_ctime = *now;
5963 if (flags & S_MTIME)
5964 inode->i_mtime = *now;
5965 if (flags & S_ATIME)
5966 inode->i_atime = *now;
5967 return dirty ? btrfs_dirty_inode(inode) : 0;
5968}
5969
5970
5971
5972
5973
5974
5975static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
5976{
5977 struct btrfs_root *root = inode->root;
5978 struct btrfs_key key, found_key;
5979 struct btrfs_path *path;
5980 struct extent_buffer *leaf;
5981 int ret;
5982
5983 key.objectid = btrfs_ino(inode);
5984 key.type = BTRFS_DIR_INDEX_KEY;
5985 key.offset = (u64)-1;
5986
5987 path = btrfs_alloc_path();
5988 if (!path)
5989 return -ENOMEM;
5990
5991 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5992 if (ret < 0)
5993 goto out;
5994
5995 if (ret == 0)
5996 goto out;
5997 ret = 0;
5998
5999 if (path->slots[0] == 0) {
6000 inode->index_cnt = BTRFS_DIR_START_INDEX;
6001 goto out;
6002 }
6003
6004 path->slots[0]--;
6005
6006 leaf = path->nodes[0];
6007 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6008
6009 if (found_key.objectid != btrfs_ino(inode) ||
6010 found_key.type != BTRFS_DIR_INDEX_KEY) {
6011 inode->index_cnt = BTRFS_DIR_START_INDEX;
6012 goto out;
6013 }
6014
6015 inode->index_cnt = found_key.offset + 1;
6016out:
6017 btrfs_free_path(path);
6018 return ret;
6019}
6020
6021
6022
6023
6024
6025int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
6026{
6027 int ret = 0;
6028
6029 if (dir->index_cnt == (u64)-1) {
6030 ret = btrfs_inode_delayed_dir_index_count(dir);
6031 if (ret) {
6032 ret = btrfs_set_inode_index_count(dir);
6033 if (ret)
6034 return ret;
6035 }
6036 }
6037
6038 *index = dir->index_cnt;
6039 dir->index_cnt++;
6040
6041 return ret;
6042}
6043
6044static int btrfs_insert_inode_locked(struct inode *inode)
6045{
6046 struct btrfs_iget_args args;
6047
6048 args.ino = BTRFS_I(inode)->location.objectid;
6049 args.root = BTRFS_I(inode)->root;
6050
6051 return insert_inode_locked4(inode,
6052 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6053 btrfs_find_actor, &args);
6054}
6055
6056
6057
6058
6059
6060
6061static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
6062{
6063 unsigned int flags;
6064
6065 if (!dir)
6066 return;
6067
6068 flags = BTRFS_I(dir)->flags;
6069
6070 if (flags & BTRFS_INODE_NOCOMPRESS) {
6071 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
6072 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
6073 } else if (flags & BTRFS_INODE_COMPRESS) {
6074 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
6075 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
6076 }
6077
6078 if (flags & BTRFS_INODE_NODATACOW) {
6079 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
6080 if (S_ISREG(inode->i_mode))
6081 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6082 }
6083
6084 btrfs_sync_inode_flags_to_i_flags(inode);
6085}
6086
6087static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6088 struct btrfs_root *root,
6089 struct user_namespace *mnt_userns,
6090 struct inode *dir,
6091 const char *name, int name_len,
6092 u64 ref_objectid, u64 objectid,
6093 umode_t mode, u64 *index)
6094{
6095 struct btrfs_fs_info *fs_info = root->fs_info;
6096 struct inode *inode;
6097 struct btrfs_inode_item *inode_item;
6098 struct btrfs_key *location;
6099 struct btrfs_path *path;
6100 struct btrfs_inode_ref *ref;
6101 struct btrfs_key key[2];
6102 u32 sizes[2];
6103 struct btrfs_item_batch batch;
6104 unsigned long ptr;
6105 unsigned int nofs_flag;
6106 int ret;
6107
6108 path = btrfs_alloc_path();
6109 if (!path)
6110 return ERR_PTR(-ENOMEM);
6111
6112 nofs_flag = memalloc_nofs_save();
6113 inode = new_inode(fs_info->sb);
6114 memalloc_nofs_restore(nofs_flag);
6115 if (!inode) {
6116 btrfs_free_path(path);
6117 return ERR_PTR(-ENOMEM);
6118 }
6119
6120
6121
6122
6123
6124 if (!name)
6125 set_nlink(inode, 0);
6126
6127
6128
6129
6130
6131 inode->i_ino = objectid;
6132
6133 if (dir && name) {
6134 trace_btrfs_inode_request(dir);
6135
6136 ret = btrfs_set_inode_index(BTRFS_I(dir), index);
6137 if (ret) {
6138 btrfs_free_path(path);
6139 iput(inode);
6140 return ERR_PTR(ret);
6141 }
6142 } else if (dir) {
6143 *index = 0;
6144 }
6145
6146
6147
6148
6149
6150 BTRFS_I(inode)->index_cnt = 2;
6151 BTRFS_I(inode)->dir_index = *index;
6152 BTRFS_I(inode)->root = btrfs_grab_root(root);
6153 BTRFS_I(inode)->generation = trans->transid;
6154 inode->i_generation = BTRFS_I(inode)->generation;
6155
6156
6157
6158
6159
6160
6161
6162 btrfs_set_inode_full_sync(BTRFS_I(inode));
6163
6164 key[0].objectid = objectid;
6165 key[0].type = BTRFS_INODE_ITEM_KEY;
6166 key[0].offset = 0;
6167
6168 sizes[0] = sizeof(struct btrfs_inode_item);
6169
6170 if (name) {
6171
6172
6173
6174
6175
6176
6177 key[1].objectid = objectid;
6178 key[1].type = BTRFS_INODE_REF_KEY;
6179 key[1].offset = ref_objectid;
6180
6181 sizes[1] = name_len + sizeof(*ref);
6182 }
6183
6184 location = &BTRFS_I(inode)->location;
6185 location->objectid = objectid;
6186 location->offset = 0;
6187 location->type = BTRFS_INODE_ITEM_KEY;
6188
6189 ret = btrfs_insert_inode_locked(inode);
6190 if (ret < 0) {
6191 iput(inode);
6192 goto fail;
6193 }
6194
6195 batch.keys = &key[0];
6196 batch.data_sizes = &sizes[0];
6197 batch.total_data_size = sizes[0] + (name ? sizes[1] : 0);
6198 batch.nr = name ? 2 : 1;
6199 ret = btrfs_insert_empty_items(trans, root, path, &batch);
6200 if (ret != 0)
6201 goto fail_unlock;
6202
6203 inode_init_owner(mnt_userns, inode, dir, mode);
6204 inode_set_bytes(inode, 0);
6205
6206 inode->i_mtime = current_time(inode);
6207 inode->i_atime = inode->i_mtime;
6208 inode->i_ctime = inode->i_mtime;
6209 BTRFS_I(inode)->i_otime = inode->i_mtime;
6210
6211 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6212 struct btrfs_inode_item);
6213 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6214 sizeof(*inode_item));
6215 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6216
6217 if (name) {
6218 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6219 struct btrfs_inode_ref);
6220 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6221 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6222 ptr = (unsigned long)(ref + 1);
6223 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6224 }
6225
6226 btrfs_mark_buffer_dirty(path->nodes[0]);
6227 btrfs_free_path(path);
6228
6229 btrfs_inherit_iflags(inode, dir);
6230
6231 if (S_ISREG(mode)) {
6232 if (btrfs_test_opt(fs_info, NODATASUM))
6233 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6234 if (btrfs_test_opt(fs_info, NODATACOW))
6235 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6236 BTRFS_INODE_NODATASUM;
6237 }
6238
6239 inode_tree_add(inode);
6240
6241 trace_btrfs_inode_new(inode);
6242 btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
6243
6244 btrfs_update_root_times(trans, root);
6245
6246 ret = btrfs_inode_inherit_props(trans, inode, dir);
6247 if (ret)
6248 btrfs_err(fs_info,
6249 "error inheriting props for ino %llu (root %llu): %d",
6250 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
6251
6252 return inode;
6253
6254fail_unlock:
6255 discard_new_inode(inode);
6256fail:
6257 if (dir && name)
6258 BTRFS_I(dir)->index_cnt--;
6259 btrfs_free_path(path);
6260 return ERR_PTR(ret);
6261}
6262
6263
6264
6265
6266
6267
6268
6269int btrfs_add_link(struct btrfs_trans_handle *trans,
6270 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6271 const char *name, int name_len, int add_backref, u64 index)
6272{
6273 int ret = 0;
6274 struct btrfs_key key;
6275 struct btrfs_root *root = parent_inode->root;
6276 u64 ino = btrfs_ino(inode);
6277 u64 parent_ino = btrfs_ino(parent_inode);
6278
6279 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6280 memcpy(&key, &inode->root->root_key, sizeof(key));
6281 } else {
6282 key.objectid = ino;
6283 key.type = BTRFS_INODE_ITEM_KEY;
6284 key.offset = 0;
6285 }
6286
6287 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6288 ret = btrfs_add_root_ref(trans, key.objectid,
6289 root->root_key.objectid, parent_ino,
6290 index, name, name_len);
6291 } else if (add_backref) {
6292 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6293 parent_ino, index);
6294 }
6295
6296
6297 if (ret)
6298 return ret;
6299
6300 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6301 btrfs_inode_type(&inode->vfs_inode), index);
6302 if (ret == -EEXIST || ret == -EOVERFLOW)
6303 goto fail_dir_item;
6304 else if (ret) {
6305 btrfs_abort_transaction(trans, ret);
6306 return ret;
6307 }
6308
6309 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6310 name_len * 2);
6311 inode_inc_iversion(&parent_inode->vfs_inode);
6312
6313
6314
6315
6316
6317
6318 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6319 struct timespec64 now = current_time(&parent_inode->vfs_inode);
6320
6321 parent_inode->vfs_inode.i_mtime = now;
6322 parent_inode->vfs_inode.i_ctime = now;
6323 }
6324 ret = btrfs_update_inode(trans, root, parent_inode);
6325 if (ret)
6326 btrfs_abort_transaction(trans, ret);
6327 return ret;
6328
6329fail_dir_item:
6330 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6331 u64 local_index;
6332 int err;
6333 err = btrfs_del_root_ref(trans, key.objectid,
6334 root->root_key.objectid, parent_ino,
6335 &local_index, name, name_len);
6336 if (err)
6337 btrfs_abort_transaction(trans, err);
6338 } else if (add_backref) {
6339 u64 local_index;
6340 int err;
6341
6342 err = btrfs_del_inode_ref(trans, root, name, name_len,
6343 ino, parent_ino, &local_index);
6344 if (err)
6345 btrfs_abort_transaction(trans, err);
6346 }
6347
6348
6349 return ret;
6350}
6351
6352static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6353 struct btrfs_inode *dir, struct dentry *dentry,
6354 struct btrfs_inode *inode, int backref, u64 index)
6355{
6356 int err = btrfs_add_link(trans, dir, inode,
6357 dentry->d_name.name, dentry->d_name.len,
6358 backref, index);
6359 if (err > 0)
6360 err = -EEXIST;
6361 return err;
6362}
6363
6364static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
6365 struct dentry *dentry, umode_t mode, dev_t rdev)
6366{
6367 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6368 struct btrfs_trans_handle *trans;
6369 struct btrfs_root *root = BTRFS_I(dir)->root;
6370 struct inode *inode = NULL;
6371 int err;
6372 u64 objectid;
6373 u64 index = 0;
6374
6375
6376
6377
6378
6379
6380 trans = btrfs_start_transaction(root, 5);
6381 if (IS_ERR(trans))
6382 return PTR_ERR(trans);
6383
6384 err = btrfs_get_free_objectid(root, &objectid);
6385 if (err)
6386 goto out_unlock;
6387
6388 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
6389 dentry->d_name.name, dentry->d_name.len,
6390 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
6391 if (IS_ERR(inode)) {
6392 err = PTR_ERR(inode);
6393 inode = NULL;
6394 goto out_unlock;
6395 }
6396
6397
6398
6399
6400
6401
6402
6403 inode->i_op = &btrfs_special_inode_operations;
6404 init_special_inode(inode, inode->i_mode, rdev);
6405
6406 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6407 if (err)
6408 goto out_unlock;
6409
6410 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6411 0, index);
6412 if (err)
6413 goto out_unlock;
6414
6415 btrfs_update_inode(trans, root, BTRFS_I(inode));
6416 d_instantiate_new(dentry, inode);
6417
6418out_unlock:
6419 btrfs_end_transaction(trans);
6420 btrfs_btree_balance_dirty(fs_info);
6421 if (err && inode) {
6422 inode_dec_link_count(inode);
6423 discard_new_inode(inode);
6424 }
6425 return err;
6426}
6427
6428static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
6429 struct dentry *dentry, umode_t mode, bool excl)
6430{
6431 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6432 struct btrfs_trans_handle *trans;
6433 struct btrfs_root *root = BTRFS_I(dir)->root;
6434 struct inode *inode = NULL;
6435 int err;
6436 u64 objectid;
6437 u64 index = 0;
6438
6439
6440
6441
6442
6443
6444 trans = btrfs_start_transaction(root, 5);
6445 if (IS_ERR(trans))
6446 return PTR_ERR(trans);
6447
6448 err = btrfs_get_free_objectid(root, &objectid);
6449 if (err)
6450 goto out_unlock;
6451
6452 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
6453 dentry->d_name.name, dentry->d_name.len,
6454 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
6455 if (IS_ERR(inode)) {
6456 err = PTR_ERR(inode);
6457 inode = NULL;
6458 goto out_unlock;
6459 }
6460
6461
6462
6463
6464
6465
6466 inode->i_fop = &btrfs_file_operations;
6467 inode->i_op = &btrfs_file_inode_operations;
6468 inode->i_mapping->a_ops = &btrfs_aops;
6469
6470 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6471 if (err)
6472 goto out_unlock;
6473
6474 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6475 if (err)
6476 goto out_unlock;
6477
6478 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6479 0, index);
6480 if (err)
6481 goto out_unlock;
6482
6483 d_instantiate_new(dentry, inode);
6484
6485out_unlock:
6486 btrfs_end_transaction(trans);
6487 if (err && inode) {
6488 inode_dec_link_count(inode);
6489 discard_new_inode(inode);
6490 }
6491 btrfs_btree_balance_dirty(fs_info);
6492 return err;
6493}
6494
6495static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6496 struct dentry *dentry)
6497{
6498 struct btrfs_trans_handle *trans = NULL;
6499 struct btrfs_root *root = BTRFS_I(dir)->root;
6500 struct inode *inode = d_inode(old_dentry);
6501 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6502 u64 index;
6503 int err;
6504 int drop_inode = 0;
6505
6506
6507 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6508 return -EXDEV;
6509
6510 if (inode->i_nlink >= BTRFS_LINK_MAX)
6511 return -EMLINK;
6512
6513 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6514 if (err)
6515 goto fail;
6516
6517
6518
6519
6520
6521
6522
6523 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6524 if (IS_ERR(trans)) {
6525 err = PTR_ERR(trans);
6526 trans = NULL;
6527 goto fail;
6528 }
6529
6530
6531 BTRFS_I(inode)->dir_index = 0ULL;
6532 inc_nlink(inode);
6533 inode_inc_iversion(inode);
6534 inode->i_ctime = current_time(inode);
6535 ihold(inode);
6536 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6537
6538 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6539 1, index);
6540
6541 if (err) {
6542 drop_inode = 1;
6543 } else {
6544 struct dentry *parent = dentry->d_parent;
6545
6546 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6547 if (err)
6548 goto fail;
6549 if (inode->i_nlink == 1) {
6550
6551
6552
6553
6554 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6555 if (err)
6556 goto fail;
6557 }
6558 d_instantiate(dentry, inode);
6559 btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
6560 }
6561
6562fail:
6563 if (trans)
6564 btrfs_end_transaction(trans);
6565 if (drop_inode) {
6566 inode_dec_link_count(inode);
6567 iput(inode);
6568 }
6569 btrfs_btree_balance_dirty(fs_info);
6570 return err;
6571}
6572
6573static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
6574 struct dentry *dentry, umode_t mode)
6575{
6576 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6577 struct inode *inode = NULL;
6578 struct btrfs_trans_handle *trans;
6579 struct btrfs_root *root = BTRFS_I(dir)->root;
6580 int err = 0;
6581 u64 objectid = 0;
6582 u64 index = 0;
6583
6584
6585
6586
6587
6588
6589 trans = btrfs_start_transaction(root, 5);
6590 if (IS_ERR(trans))
6591 return PTR_ERR(trans);
6592
6593 err = btrfs_get_free_objectid(root, &objectid);
6594 if (err)
6595 goto out_fail;
6596
6597 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
6598 dentry->d_name.name, dentry->d_name.len,
6599 btrfs_ino(BTRFS_I(dir)), objectid,
6600 S_IFDIR | mode, &index);
6601 if (IS_ERR(inode)) {
6602 err = PTR_ERR(inode);
6603 inode = NULL;
6604 goto out_fail;
6605 }
6606
6607
6608 inode->i_op = &btrfs_dir_inode_operations;
6609 inode->i_fop = &btrfs_dir_file_operations;
6610
6611 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6612 if (err)
6613 goto out_fail;
6614
6615 btrfs_i_size_write(BTRFS_I(inode), 0);
6616 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6617 if (err)
6618 goto out_fail;
6619
6620 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6621 dentry->d_name.name,
6622 dentry->d_name.len, 0, index);
6623 if (err)
6624 goto out_fail;
6625
6626 d_instantiate_new(dentry, inode);
6627
6628out_fail:
6629 btrfs_end_transaction(trans);
6630 if (err && inode) {
6631 inode_dec_link_count(inode);
6632 discard_new_inode(inode);
6633 }
6634 btrfs_btree_balance_dirty(fs_info);
6635 return err;
6636}
6637
6638static noinline int uncompress_inline(struct btrfs_path *path,
6639 struct page *page,
6640 size_t pg_offset, u64 extent_offset,
6641 struct btrfs_file_extent_item *item)
6642{
6643 int ret;
6644 struct extent_buffer *leaf = path->nodes[0];
6645 char *tmp;
6646 size_t max_size;
6647 unsigned long inline_size;
6648 unsigned long ptr;
6649 int compress_type;
6650
6651 WARN_ON(pg_offset != 0);
6652 compress_type = btrfs_file_extent_compression(leaf, item);
6653 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6654 inline_size = btrfs_file_extent_inline_item_len(leaf, path->slots[0]);
6655 tmp = kmalloc(inline_size, GFP_NOFS);
6656 if (!tmp)
6657 return -ENOMEM;
6658 ptr = btrfs_file_extent_inline_start(item);
6659
6660 read_extent_buffer(leaf, tmp, ptr, inline_size);
6661
6662 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6663 ret = btrfs_decompress(compress_type, tmp, page,
6664 extent_offset, inline_size, max_size);
6665
6666
6667
6668
6669
6670
6671
6672
6673
6674 if (max_size + pg_offset < PAGE_SIZE)
6675 memzero_page(page, pg_offset + max_size,
6676 PAGE_SIZE - max_size - pg_offset);
6677 kfree(tmp);
6678 return ret;
6679}
6680
6681
6682
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695
6696
6697
6698
6699struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6700 struct page *page, size_t pg_offset,
6701 u64 start, u64 len)
6702{
6703 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6704 int ret = 0;
6705 u64 extent_start = 0;
6706 u64 extent_end = 0;
6707 u64 objectid = btrfs_ino(inode);
6708 int extent_type = -1;
6709 struct btrfs_path *path = NULL;
6710 struct btrfs_root *root = inode->root;
6711 struct btrfs_file_extent_item *item;
6712 struct extent_buffer *leaf;
6713 struct btrfs_key found_key;
6714 struct extent_map *em = NULL;
6715 struct extent_map_tree *em_tree = &inode->extent_tree;
6716 struct extent_io_tree *io_tree = &inode->io_tree;
6717
6718 read_lock(&em_tree->lock);
6719 em = lookup_extent_mapping(em_tree, start, len);
6720 read_unlock(&em_tree->lock);
6721
6722 if (em) {
6723 if (em->start > start || em->start + em->len <= start)
6724 free_extent_map(em);
6725 else if (em->block_start == EXTENT_MAP_INLINE && page)
6726 free_extent_map(em);
6727 else
6728 goto out;
6729 }
6730 em = alloc_extent_map();
6731 if (!em) {
6732 ret = -ENOMEM;
6733 goto out;
6734 }
6735 em->start = EXTENT_MAP_HOLE;
6736 em->orig_start = EXTENT_MAP_HOLE;
6737 em->len = (u64)-1;
6738 em->block_len = (u64)-1;
6739
6740 path = btrfs_alloc_path();
6741 if (!path) {
6742 ret = -ENOMEM;
6743 goto out;
6744 }
6745
6746
6747 path->reada = READA_FORWARD;
6748
6749
6750
6751
6752
6753
6754 if (btrfs_is_free_space_inode(inode)) {
6755 path->search_commit_root = 1;
6756 path->skip_locking = 1;
6757 }
6758
6759 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
6760 if (ret < 0) {
6761 goto out;
6762 } else if (ret > 0) {
6763 if (path->slots[0] == 0)
6764 goto not_found;
6765 path->slots[0]--;
6766 ret = 0;
6767 }
6768
6769 leaf = path->nodes[0];
6770 item = btrfs_item_ptr(leaf, path->slots[0],
6771 struct btrfs_file_extent_item);
6772 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6773 if (found_key.objectid != objectid ||
6774 found_key.type != BTRFS_EXTENT_DATA_KEY) {
6775
6776
6777
6778
6779
6780
6781 extent_end = start;
6782 goto next;
6783 }
6784
6785 extent_type = btrfs_file_extent_type(leaf, item);
6786 extent_start = found_key.offset;
6787 extent_end = btrfs_file_extent_end(path);
6788 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6789 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6790
6791 if (!S_ISREG(inode->vfs_inode.i_mode)) {
6792 ret = -EUCLEAN;
6793 btrfs_crit(fs_info,
6794 "regular/prealloc extent found for non-regular inode %llu",
6795 btrfs_ino(inode));
6796 goto out;
6797 }
6798 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
6799 extent_start);
6800 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6801 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
6802 path->slots[0],
6803 extent_start);
6804 }
6805next:
6806 if (start >= extent_end) {
6807 path->slots[0]++;
6808 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6809 ret = btrfs_next_leaf(root, path);
6810 if (ret < 0)
6811 goto out;
6812 else if (ret > 0)
6813 goto not_found;
6814
6815 leaf = path->nodes[0];
6816 }
6817 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6818 if (found_key.objectid != objectid ||
6819 found_key.type != BTRFS_EXTENT_DATA_KEY)
6820 goto not_found;
6821 if (start + len <= found_key.offset)
6822 goto not_found;
6823 if (start > found_key.offset)
6824 goto next;
6825
6826
6827 em->start = start;
6828 em->orig_start = start;
6829 em->len = found_key.offset - start;
6830 em->block_start = EXTENT_MAP_HOLE;
6831 goto insert;
6832 }
6833
6834 btrfs_extent_item_to_extent_map(inode, path, item, !page, em);
6835
6836 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6837 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6838 goto insert;
6839 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6840 unsigned long ptr;
6841 char *map;
6842 size_t size;
6843 size_t extent_offset;
6844 size_t copy_size;
6845
6846 if (!page)
6847 goto out;
6848
6849 size = btrfs_file_extent_ram_bytes(leaf, item);
6850 extent_offset = page_offset(page) + pg_offset - extent_start;
6851 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
6852 size - extent_offset);
6853 em->start = extent_start + extent_offset;
6854 em->len = ALIGN(copy_size, fs_info->sectorsize);
6855 em->orig_block_len = em->len;
6856 em->orig_start = em->start;
6857 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6858
6859 if (!PageUptodate(page)) {
6860 if (btrfs_file_extent_compression(leaf, item) !=
6861 BTRFS_COMPRESS_NONE) {
6862 ret = uncompress_inline(path, page, pg_offset,
6863 extent_offset, item);
6864 if (ret)
6865 goto out;
6866 } else {
6867 map = kmap_local_page(page);
6868 read_extent_buffer(leaf, map + pg_offset, ptr,
6869 copy_size);
6870 if (pg_offset + copy_size < PAGE_SIZE) {
6871 memset(map + pg_offset + copy_size, 0,
6872 PAGE_SIZE - pg_offset -
6873 copy_size);
6874 }
6875 kunmap_local(map);
6876 }
6877 flush_dcache_page(page);
6878 }
6879 set_extent_uptodate(io_tree, em->start,
6880 extent_map_end(em) - 1, NULL, GFP_NOFS);
6881 goto insert;
6882 }
6883not_found:
6884 em->start = start;
6885 em->orig_start = start;
6886 em->len = len;
6887 em->block_start = EXTENT_MAP_HOLE;
6888insert:
6889 ret = 0;
6890 btrfs_release_path(path);
6891 if (em->start > start || extent_map_end(em) <= start) {
6892 btrfs_err(fs_info,
6893 "bad extent! em: [%llu %llu] passed [%llu %llu]",
6894 em->start, em->len, start, len);
6895 ret = -EIO;
6896 goto out;
6897 }
6898
6899 write_lock(&em_tree->lock);
6900 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
6901 write_unlock(&em_tree->lock);
6902out:
6903 btrfs_free_path(path);
6904
6905 trace_btrfs_get_extent(root, inode, em);
6906
6907 if (ret) {
6908 free_extent_map(em);
6909 return ERR_PTR(ret);
6910 }
6911 return em;
6912}
6913
6914struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
6915 u64 start, u64 len)
6916{
6917 struct extent_map *em;
6918 struct extent_map *hole_em = NULL;
6919 u64 delalloc_start = start;
6920 u64 end;
6921 u64 delalloc_len;
6922 u64 delalloc_end;
6923 int err = 0;
6924
6925 em = btrfs_get_extent(inode, NULL, 0, start, len);
6926 if (IS_ERR(em))
6927 return em;
6928
6929
6930
6931
6932
6933
6934 if (em->block_start != EXTENT_MAP_HOLE &&
6935 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
6936 return em;
6937 else
6938 hole_em = em;
6939
6940
6941 end = start + len;
6942 if (end < start)
6943 end = (u64)-1;
6944 else
6945 end -= 1;
6946
6947 em = NULL;
6948
6949
6950 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
6951 end, len, EXTENT_DELALLOC, 1);
6952 delalloc_end = delalloc_start + delalloc_len;
6953 if (delalloc_end < delalloc_start)
6954 delalloc_end = (u64)-1;
6955
6956
6957
6958
6959
6960 if (delalloc_start > end || delalloc_end <= start) {
6961 em = hole_em;
6962 hole_em = NULL;
6963 goto out;
6964 }
6965
6966
6967
6968
6969
6970 delalloc_start = max(start, delalloc_start);
6971 delalloc_len = delalloc_end - delalloc_start;
6972
6973 if (delalloc_len > 0) {
6974 u64 hole_start;
6975 u64 hole_len;
6976 const u64 hole_end = extent_map_end(hole_em);
6977
6978 em = alloc_extent_map();
6979 if (!em) {
6980 err = -ENOMEM;
6981 goto out;
6982 }
6983
6984 ASSERT(hole_em);
6985
6986
6987
6988
6989
6990
6991
6992 if (hole_end <= start || hole_em->start > end) {
6993 free_extent_map(hole_em);
6994 hole_em = NULL;
6995 } else {
6996 hole_start = max(hole_em->start, start);
6997 hole_len = hole_end - hole_start;
6998 }
6999
7000 if (hole_em && delalloc_start > hole_start) {
7001
7002
7003
7004
7005
7006 em->len = min(hole_len, delalloc_start - hole_start);
7007 em->start = hole_start;
7008 em->orig_start = hole_start;
7009
7010
7011
7012
7013 em->block_start = hole_em->block_start;
7014 em->block_len = hole_len;
7015 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7016 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7017 } else {
7018
7019
7020
7021
7022 em->start = delalloc_start;
7023 em->len = delalloc_len;
7024 em->orig_start = delalloc_start;
7025 em->block_start = EXTENT_MAP_DELALLOC;
7026 em->block_len = delalloc_len;
7027 }
7028 } else {
7029 return hole_em;
7030 }
7031out:
7032
7033 free_extent_map(hole_em);
7034 if (err) {
7035 free_extent_map(em);
7036 return ERR_PTR(err);
7037 }
7038 return em;
7039}
7040
7041static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
7042 const u64 start,
7043 const u64 len,
7044 const u64 orig_start,
7045 const u64 block_start,
7046 const u64 block_len,
7047 const u64 orig_block_len,
7048 const u64 ram_bytes,
7049 const int type)
7050{
7051 struct extent_map *em = NULL;
7052 int ret;
7053
7054 if (type != BTRFS_ORDERED_NOCOW) {
7055 em = create_io_em(inode, start, len, orig_start, block_start,
7056 block_len, orig_block_len, ram_bytes,
7057 BTRFS_COMPRESS_NONE,
7058 type);
7059 if (IS_ERR(em))
7060 goto out;
7061 }
7062 ret = btrfs_add_ordered_extent(inode, start, len, len, block_start,
7063 block_len, 0,
7064 (1 << type) |
7065 (1 << BTRFS_ORDERED_DIRECT),
7066 BTRFS_COMPRESS_NONE);
7067 if (ret) {
7068 if (em) {
7069 free_extent_map(em);
7070 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
7071 }
7072 em = ERR_PTR(ret);
7073 }
7074 out:
7075
7076 return em;
7077}
7078
7079static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
7080 u64 start, u64 len)
7081{
7082 struct btrfs_root *root = inode->root;
7083 struct btrfs_fs_info *fs_info = root->fs_info;
7084 struct extent_map *em;
7085 struct btrfs_key ins;
7086 u64 alloc_hint;
7087 int ret;
7088
7089 alloc_hint = get_extent_allocation_hint(inode, start, len);
7090 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
7091 0, alloc_hint, &ins, 1, 1);
7092 if (ret)
7093 return ERR_PTR(ret);
7094
7095 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7096 ins.objectid, ins.offset, ins.offset,
7097 ins.offset, BTRFS_ORDERED_REGULAR);
7098 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
7099 if (IS_ERR(em))
7100 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
7101 1);
7102
7103 return em;
7104}
7105
7106static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
7107{
7108 struct btrfs_block_group *block_group;
7109 bool readonly = false;
7110
7111 block_group = btrfs_lookup_block_group(fs_info, bytenr);
7112 if (!block_group || block_group->ro)
7113 readonly = true;
7114 if (block_group)
7115 btrfs_put_block_group(block_group);
7116 return readonly;
7117}
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134
7135
7136
7137
7138
7139noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7140 u64 *orig_start, u64 *orig_block_len,
7141 u64 *ram_bytes, bool strict)
7142{
7143 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7144 struct btrfs_path *path;
7145 int ret;
7146 struct extent_buffer *leaf;
7147 struct btrfs_root *root = BTRFS_I(inode)->root;
7148 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7149 struct btrfs_file_extent_item *fi;
7150 struct btrfs_key key;
7151 u64 disk_bytenr;
7152 u64 backref_offset;
7153 u64 extent_end;
7154 u64 num_bytes;
7155 int slot;
7156 int found_type;
7157 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7158
7159 path = btrfs_alloc_path();
7160 if (!path)
7161 return -ENOMEM;
7162
7163 ret = btrfs_lookup_file_extent(NULL, root, path,
7164 btrfs_ino(BTRFS_I(inode)), offset, 0);
7165 if (ret < 0)
7166 goto out;
7167
7168 slot = path->slots[0];
7169 if (ret == 1) {
7170 if (slot == 0) {
7171
7172 ret = 0;
7173 goto out;
7174 }
7175 slot--;
7176 }
7177 ret = 0;
7178 leaf = path->nodes[0];
7179 btrfs_item_key_to_cpu(leaf, &key, slot);
7180 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7181 key.type != BTRFS_EXTENT_DATA_KEY) {
7182
7183 goto out;
7184 }
7185
7186 if (key.offset > offset) {
7187
7188 goto out;
7189 }
7190
7191 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7192 found_type = btrfs_file_extent_type(leaf, fi);
7193 if (found_type != BTRFS_FILE_EXTENT_REG &&
7194 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7195
7196 goto out;
7197 }
7198
7199 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7200 goto out;
7201
7202 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7203 if (extent_end <= offset)
7204 goto out;
7205
7206 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7207 if (disk_bytenr == 0)
7208 goto out;
7209
7210 if (btrfs_file_extent_compression(leaf, fi) ||
7211 btrfs_file_extent_encryption(leaf, fi) ||
7212 btrfs_file_extent_other_encoding(leaf, fi))
7213 goto out;
7214
7215
7216
7217
7218
7219 if (!strict &&
7220 (btrfs_file_extent_generation(leaf, fi) <=
7221 btrfs_root_last_snapshot(&root->root_item)))
7222 goto out;
7223
7224 backref_offset = btrfs_file_extent_offset(leaf, fi);
7225
7226 if (orig_start) {
7227 *orig_start = key.offset - backref_offset;
7228 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7229 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7230 }
7231
7232 if (btrfs_extent_readonly(fs_info, disk_bytenr))
7233 goto out;
7234
7235 num_bytes = min(offset + *len, extent_end) - offset;
7236 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7237 u64 range_end;
7238
7239 range_end = round_up(offset + num_bytes,
7240 root->fs_info->sectorsize) - 1;
7241 ret = test_range_bit(io_tree, offset, range_end,
7242 EXTENT_DELALLOC, 0, NULL);
7243 if (ret) {
7244 ret = -EAGAIN;
7245 goto out;
7246 }
7247 }
7248
7249 btrfs_release_path(path);
7250
7251
7252
7253
7254
7255
7256 ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
7257 key.offset - backref_offset, disk_bytenr,
7258 strict);
7259 if (ret) {
7260 ret = 0;
7261 goto out;
7262 }
7263
7264
7265
7266
7267
7268
7269
7270 disk_bytenr += backref_offset;
7271 disk_bytenr += offset - key.offset;
7272 if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
7273 goto out;
7274
7275
7276
7277
7278 *len = num_bytes;
7279 ret = 1;
7280out:
7281 btrfs_free_path(path);
7282 return ret;
7283}
7284
7285static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7286 struct extent_state **cached_state, bool writing)
7287{
7288 struct btrfs_ordered_extent *ordered;
7289 int ret = 0;
7290
7291 while (1) {
7292 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7293 cached_state);
7294
7295
7296
7297
7298
7299 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7300 lockend - lockstart + 1);
7301
7302
7303
7304
7305
7306
7307
7308
7309 if (!ordered &&
7310 (!writing || !filemap_range_has_page(inode->i_mapping,
7311 lockstart, lockend)))
7312 break;
7313
7314 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7315 cached_state);
7316
7317 if (ordered) {
7318
7319
7320
7321
7322
7323
7324
7325
7326
7327
7328
7329
7330
7331
7332
7333 if (writing ||
7334 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7335 btrfs_start_ordered_extent(ordered, 1);
7336 else
7337 ret = -ENOTBLK;
7338 btrfs_put_ordered_extent(ordered);
7339 } else {
7340
7341
7342
7343
7344
7345
7346
7347
7348
7349
7350
7351
7352
7353 ret = -ENOTBLK;
7354 }
7355
7356 if (ret)
7357 break;
7358
7359 cond_resched();
7360 }
7361
7362 return ret;
7363}
7364
7365
7366static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
7367 u64 len, u64 orig_start, u64 block_start,
7368 u64 block_len, u64 orig_block_len,
7369 u64 ram_bytes, int compress_type,
7370 int type)
7371{
7372 struct extent_map_tree *em_tree;
7373 struct extent_map *em;
7374 int ret;
7375
7376 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7377 type == BTRFS_ORDERED_COMPRESSED ||
7378 type == BTRFS_ORDERED_NOCOW ||
7379 type == BTRFS_ORDERED_REGULAR);
7380
7381 em_tree = &inode->extent_tree;
7382 em = alloc_extent_map();
7383 if (!em)
7384 return ERR_PTR(-ENOMEM);
7385
7386 em->start = start;
7387 em->orig_start = orig_start;
7388 em->len = len;
7389 em->block_len = block_len;
7390 em->block_start = block_start;
7391 em->orig_block_len = orig_block_len;
7392 em->ram_bytes = ram_bytes;
7393 em->generation = -1;
7394 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7395 if (type == BTRFS_ORDERED_PREALLOC) {
7396 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7397 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7398 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7399 em->compress_type = compress_type;
7400 }
7401
7402 do {
7403 btrfs_drop_extent_cache(inode, em->start,
7404 em->start + em->len - 1, 0);
7405 write_lock(&em_tree->lock);
7406 ret = add_extent_mapping(em_tree, em, 1);
7407 write_unlock(&em_tree->lock);
7408
7409
7410
7411
7412 } while (ret == -EEXIST);
7413
7414 if (ret) {
7415 free_extent_map(em);
7416 return ERR_PTR(ret);
7417 }
7418
7419
7420 return em;
7421}
7422
7423
7424static int btrfs_get_blocks_direct_write(struct extent_map **map,
7425 struct inode *inode,
7426 struct btrfs_dio_data *dio_data,
7427 u64 start, u64 len)
7428{
7429 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7430 struct extent_map *em = *map;
7431 int type;
7432 u64 block_start, orig_start, orig_block_len, ram_bytes;
7433 bool can_nocow = false;
7434 bool space_reserved = false;
7435 u64 prev_len;
7436 int ret = 0;
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7448 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7449 em->block_start != EXTENT_MAP_HOLE)) {
7450 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7451 type = BTRFS_ORDERED_PREALLOC;
7452 else
7453 type = BTRFS_ORDERED_NOCOW;
7454 len = min(len, em->len - (start - em->start));
7455 block_start = em->block_start + (start - em->start);
7456
7457 if (can_nocow_extent(inode, start, &len, &orig_start,
7458 &orig_block_len, &ram_bytes, false) == 1 &&
7459 btrfs_inc_nocow_writers(fs_info, block_start))
7460 can_nocow = true;
7461 }
7462
7463 prev_len = len;
7464 if (can_nocow) {
7465 struct extent_map *em2;
7466
7467
7468 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len, len);
7469 if (ret < 0) {
7470
7471 free_extent_map(em);
7472 *map = NULL;
7473 btrfs_dec_nocow_writers(fs_info, block_start);
7474 goto out;
7475 }
7476 space_reserved = true;
7477
7478 em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
7479 orig_start, block_start,
7480 len, orig_block_len,
7481 ram_bytes, type);
7482 btrfs_dec_nocow_writers(fs_info, block_start);
7483 if (type == BTRFS_ORDERED_PREALLOC) {
7484 free_extent_map(em);
7485 *map = em = em2;
7486 }
7487
7488 if (IS_ERR(em2)) {
7489 ret = PTR_ERR(em2);
7490 goto out;
7491 }
7492 } else {
7493
7494 free_extent_map(em);
7495 *map = NULL;
7496
7497
7498 ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
7499 &dio_data->data_reserved,
7500 start, len);
7501 if (ret < 0)
7502 goto out;
7503 space_reserved = true;
7504
7505 em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
7506 if (IS_ERR(em)) {
7507 ret = PTR_ERR(em);
7508 goto out;
7509 }
7510 *map = em;
7511 len = min(len, em->len - (start - em->start));
7512 if (len < prev_len)
7513 btrfs_delalloc_release_space(BTRFS_I(inode),
7514 dio_data->data_reserved,
7515 start + len, prev_len - len,
7516 true);
7517 }
7518
7519
7520
7521
7522
7523 btrfs_delalloc_release_extents(BTRFS_I(inode), prev_len);
7524
7525
7526
7527
7528
7529 if (start + len > i_size_read(inode))
7530 i_size_write(inode, start + len);
7531out:
7532 if (ret && space_reserved) {
7533 btrfs_delalloc_release_extents(BTRFS_I(inode), len);
7534 if (can_nocow) {
7535 btrfs_delalloc_release_metadata(BTRFS_I(inode), len, true);
7536 } else {
7537 btrfs_delalloc_release_space(BTRFS_I(inode),
7538 dio_data->data_reserved,
7539 start, len, true);
7540 extent_changeset_free(dio_data->data_reserved);
7541 dio_data->data_reserved = NULL;
7542 }
7543 }
7544 return ret;
7545}
7546
7547static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
7548 loff_t length, unsigned int flags, struct iomap *iomap,
7549 struct iomap *srcmap)
7550{
7551 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7552 struct extent_map *em;
7553 struct extent_state *cached_state = NULL;
7554 struct btrfs_dio_data *dio_data = NULL;
7555 u64 lockstart, lockend;
7556 const bool write = !!(flags & IOMAP_WRITE);
7557 int ret = 0;
7558 u64 len = length;
7559 bool unlock_extents = false;
7560
7561 if (!write)
7562 len = min_t(u64, len, fs_info->sectorsize);
7563
7564 lockstart = start;
7565 lockend = start + len - 1;
7566
7567
7568
7569
7570
7571
7572
7573 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7574 &BTRFS_I(inode)->runtime_flags)) {
7575 ret = filemap_fdatawrite_range(inode->i_mapping, start,
7576 start + length - 1);
7577 if (ret)
7578 return ret;
7579 }
7580
7581 dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS);
7582 if (!dio_data)
7583 return -ENOMEM;
7584
7585 iomap->private = dio_data;
7586
7587
7588
7589
7590
7591
7592 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) {
7593 ret = -ENOTBLK;
7594 goto err;
7595 }
7596
7597 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
7598 if (IS_ERR(em)) {
7599 ret = PTR_ERR(em);
7600 goto unlock_err;
7601 }
7602
7603
7604
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615
7616
7617 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7618 em->block_start == EXTENT_MAP_INLINE) {
7619 free_extent_map(em);
7620 ret = -ENOTBLK;
7621 goto unlock_err;
7622 }
7623
7624 len = min(len, em->len - (start - em->start));
7625
7626
7627
7628
7629
7630
7631
7632
7633
7634
7635
7636
7637
7638
7639
7640
7641
7642
7643
7644
7645
7646
7647 if ((flags & IOMAP_NOWAIT) && len < length) {
7648 free_extent_map(em);
7649 ret = -EAGAIN;
7650 goto unlock_err;
7651 }
7652
7653 if (write) {
7654 ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
7655 start, len);
7656 if (ret < 0)
7657 goto unlock_err;
7658 unlock_extents = true;
7659
7660 len = min(len, em->len - (start - em->start));
7661 } else {
7662
7663
7664
7665
7666 lockstart = start + len;
7667 if (lockstart < lockend)
7668 unlock_extents = true;
7669 }
7670
7671 if (unlock_extents)
7672 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
7673 lockstart, lockend, &cached_state);
7674 else
7675 free_extent_state(cached_state);
7676
7677
7678
7679
7680
7681
7682 if ((em->block_start == EXTENT_MAP_HOLE) ||
7683 (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) {
7684 iomap->addr = IOMAP_NULL_ADDR;
7685 iomap->type = IOMAP_HOLE;
7686 } else {
7687 iomap->addr = em->block_start + (start - em->start);
7688 iomap->type = IOMAP_MAPPED;
7689 }
7690 iomap->offset = start;
7691 iomap->bdev = fs_info->fs_devices->latest_dev->bdev;
7692 iomap->length = len;
7693
7694 if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
7695 iomap->flags |= IOMAP_F_ZONE_APPEND;
7696
7697 free_extent_map(em);
7698
7699 return 0;
7700
7701unlock_err:
7702 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7703 &cached_state);
7704err:
7705 kfree(dio_data);
7706
7707 return ret;
7708}
7709
7710static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
7711 ssize_t written, unsigned int flags, struct iomap *iomap)
7712{
7713 int ret = 0;
7714 struct btrfs_dio_data *dio_data = iomap->private;
7715 size_t submitted = dio_data->submitted;
7716 const bool write = !!(flags & IOMAP_WRITE);
7717
7718 if (!write && (iomap->type == IOMAP_HOLE)) {
7719
7720 unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1);
7721 goto out;
7722 }
7723
7724 if (submitted < length) {
7725 pos += submitted;
7726 length -= submitted;
7727 if (write)
7728 __endio_write_update_ordered(BTRFS_I(inode), pos,
7729 length, false);
7730 else
7731 unlock_extent(&BTRFS_I(inode)->io_tree, pos,
7732 pos + length - 1);
7733 ret = -ENOTBLK;
7734 }
7735
7736 if (write)
7737 extent_changeset_free(dio_data->data_reserved);
7738out:
7739 kfree(dio_data);
7740 iomap->private = NULL;
7741
7742 return ret;
7743}
7744
7745static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
7746{
7747
7748
7749
7750
7751 if (!refcount_dec_and_test(&dip->refs))
7752 return;
7753
7754 if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) {
7755 __endio_write_update_ordered(BTRFS_I(dip->inode),
7756 dip->file_offset,
7757 dip->bytes,
7758 !dip->dio_bio->bi_status);
7759 } else {
7760 unlock_extent(&BTRFS_I(dip->inode)->io_tree,
7761 dip->file_offset,
7762 dip->file_offset + dip->bytes - 1);
7763 }
7764
7765 bio_endio(dip->dio_bio);
7766 kfree(dip);
7767}
7768
7769static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7770 int mirror_num,
7771 unsigned long bio_flags)
7772{
7773 struct btrfs_dio_private *dip = bio->bi_private;
7774 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7775 blk_status_t ret;
7776
7777 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
7778
7779 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
7780 if (ret)
7781 return ret;
7782
7783 refcount_inc(&dip->refs);
7784 ret = btrfs_map_bio(fs_info, bio, mirror_num);
7785 if (ret)
7786 refcount_dec(&dip->refs);
7787 return ret;
7788}
7789
7790static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
7791 struct btrfs_bio *bbio,
7792 const bool uptodate)
7793{
7794 struct inode *inode = dip->inode;
7795 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
7796 const u32 sectorsize = fs_info->sectorsize;
7797 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
7798 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7799 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
7800 struct bio_vec bvec;
7801 struct bvec_iter iter;
7802 u32 bio_offset = 0;
7803 blk_status_t err = BLK_STS_OK;
7804
7805 __bio_for_each_segment(bvec, &bbio->bio, iter, bbio->iter) {
7806 unsigned int i, nr_sectors, pgoff;
7807
7808 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
7809 pgoff = bvec.bv_offset;
7810 for (i = 0; i < nr_sectors; i++) {
7811 u64 start = bbio->file_offset + bio_offset;
7812
7813 ASSERT(pgoff < PAGE_SIZE);
7814 if (uptodate &&
7815 (!csum || !check_data_csum(inode, bbio,
7816 bio_offset, bvec.bv_page,
7817 pgoff, start))) {
7818 clean_io_failure(fs_info, failure_tree, io_tree,
7819 start, bvec.bv_page,
7820 btrfs_ino(BTRFS_I(inode)),
7821 pgoff);
7822 } else {
7823 int ret;
7824
7825 ret = btrfs_repair_one_sector(inode, &bbio->bio,
7826 bio_offset, bvec.bv_page, pgoff,
7827 start, bbio->mirror_num,
7828 submit_dio_repair_bio);
7829 if (ret)
7830 err = errno_to_blk_status(ret);
7831 }
7832 ASSERT(bio_offset + sectorsize > bio_offset);
7833 bio_offset += sectorsize;
7834 pgoff += sectorsize;
7835 }
7836 }
7837 return err;
7838}
7839
7840static void __endio_write_update_ordered(struct btrfs_inode *inode,
7841 const u64 offset, const u64 bytes,
7842 const bool uptodate)
7843{
7844 btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes,
7845 finish_ordered_fn, uptodate);
7846}
7847
7848static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
7849 struct bio *bio,
7850 u64 dio_file_offset)
7851{
7852 return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, false);
7853}
7854
7855static void btrfs_end_dio_bio(struct bio *bio)
7856{
7857 struct btrfs_dio_private *dip = bio->bi_private;
7858 struct btrfs_bio *bbio = btrfs_bio(bio);
7859 blk_status_t err = bio->bi_status;
7860
7861 if (err)
7862 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
7863 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
7864 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
7865 bio->bi_opf, bio->bi_iter.bi_sector,
7866 bio->bi_iter.bi_size, err);
7867
7868 if (bio_op(bio) == REQ_OP_READ)
7869 err = btrfs_check_read_dio_bio(dip, bbio, !err);
7870
7871 if (err)
7872 dip->dio_bio->bi_status = err;
7873
7874 btrfs_record_physical_zoned(dip->inode, bbio->file_offset, bio);
7875
7876 bio_put(bio);
7877 btrfs_dio_private_put(dip);
7878}
7879
7880static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
7881 struct inode *inode, u64 file_offset, int async_submit)
7882{
7883 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7884 struct btrfs_dio_private *dip = bio->bi_private;
7885 bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
7886 blk_status_t ret;
7887
7888
7889 if (async_submit)
7890 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
7891
7892 if (!write) {
7893 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
7894 if (ret)
7895 goto err;
7896 }
7897
7898 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
7899 goto map;
7900
7901 if (write && async_submit) {
7902 ret = btrfs_wq_submit_bio(inode, bio, 0, 0, file_offset,
7903 btrfs_submit_bio_start_direct_io);
7904 goto err;
7905 } else if (write) {
7906
7907
7908
7909
7910 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, false);
7911 if (ret)
7912 goto err;
7913 } else {
7914 u64 csum_offset;
7915
7916 csum_offset = file_offset - dip->file_offset;
7917 csum_offset >>= fs_info->sectorsize_bits;
7918 csum_offset *= fs_info->csum_size;
7919 btrfs_bio(bio)->csum = dip->csums + csum_offset;
7920 }
7921map:
7922 ret = btrfs_map_bio(fs_info, bio, 0);
7923err:
7924 return ret;
7925}
7926
7927
7928
7929
7930
7931static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
7932 struct inode *inode,
7933 loff_t file_offset)
7934{
7935 const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
7936 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
7937 size_t dip_size;
7938 struct btrfs_dio_private *dip;
7939
7940 dip_size = sizeof(*dip);
7941 if (!write && csum) {
7942 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7943 size_t nblocks;
7944
7945 nblocks = dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits;
7946 dip_size += fs_info->csum_size * nblocks;
7947 }
7948
7949 dip = kzalloc(dip_size, GFP_NOFS);
7950 if (!dip)
7951 return NULL;
7952
7953 dip->inode = inode;
7954 dip->file_offset = file_offset;
7955 dip->bytes = dio_bio->bi_iter.bi_size;
7956 dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
7957 dip->dio_bio = dio_bio;
7958 refcount_set(&dip->refs, 1);
7959 return dip;
7960}
7961
7962static void btrfs_submit_direct(const struct iomap_iter *iter,
7963 struct bio *dio_bio, loff_t file_offset)
7964{
7965 struct inode *inode = iter->inode;
7966 const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
7967 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7968 const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
7969 BTRFS_BLOCK_GROUP_RAID56_MASK);
7970 struct btrfs_dio_private *dip;
7971 struct bio *bio;
7972 u64 start_sector;
7973 int async_submit = 0;
7974 u64 submit_len;
7975 u64 clone_offset = 0;
7976 u64 clone_len;
7977 u64 logical;
7978 int ret;
7979 blk_status_t status;
7980 struct btrfs_io_geometry geom;
7981 struct btrfs_dio_data *dio_data = iter->iomap.private;
7982 struct extent_map *em = NULL;
7983
7984 dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
7985 if (!dip) {
7986 if (!write) {
7987 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
7988 file_offset + dio_bio->bi_iter.bi_size - 1);
7989 }
7990 dio_bio->bi_status = BLK_STS_RESOURCE;
7991 bio_endio(dio_bio);
7992 return;
7993 }
7994
7995 if (!write) {
7996
7997
7998
7999
8000
8001
8002 status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
8003 if (status != BLK_STS_OK)
8004 goto out_err;
8005 }
8006
8007 start_sector = dio_bio->bi_iter.bi_sector;
8008 submit_len = dio_bio->bi_iter.bi_size;
8009
8010 do {
8011 logical = start_sector << 9;
8012 em = btrfs_get_chunk_map(fs_info, logical, submit_len);
8013 if (IS_ERR(em)) {
8014 status = errno_to_blk_status(PTR_ERR(em));
8015 em = NULL;
8016 goto out_err_em;
8017 }
8018 ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
8019 logical, &geom);
8020 if (ret) {
8021 status = errno_to_blk_status(ret);
8022 goto out_err_em;
8023 }
8024
8025 clone_len = min(submit_len, geom.len);
8026 ASSERT(clone_len <= UINT_MAX);
8027
8028
8029
8030
8031
8032 bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
8033 bio->bi_private = dip;
8034 bio->bi_end_io = btrfs_end_dio_bio;
8035 btrfs_bio(bio)->file_offset = file_offset;
8036
8037 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
8038 status = extract_ordered_extent(BTRFS_I(inode), bio,
8039 file_offset);
8040 if (status) {
8041 bio_put(bio);
8042 goto out_err;
8043 }
8044 }
8045
8046 ASSERT(submit_len >= clone_len);
8047 submit_len -= clone_len;
8048
8049
8050
8051
8052
8053
8054
8055
8056
8057
8058 if (submit_len > 0) {
8059 refcount_inc(&dip->refs);
8060
8061
8062
8063
8064
8065
8066 if (!raid56)
8067 async_submit = 1;
8068 }
8069
8070 status = btrfs_submit_dio_bio(bio, inode, file_offset,
8071 async_submit);
8072 if (status) {
8073 bio_put(bio);
8074 if (submit_len > 0)
8075 refcount_dec(&dip->refs);
8076 goto out_err_em;
8077 }
8078
8079 dio_data->submitted += clone_len;
8080 clone_offset += clone_len;
8081 start_sector += clone_len >> 9;
8082 file_offset += clone_len;
8083
8084 free_extent_map(em);
8085 } while (submit_len > 0);
8086 return;
8087
8088out_err_em:
8089 free_extent_map(em);
8090out_err:
8091 dip->dio_bio->bi_status = status;
8092 btrfs_dio_private_put(dip);
8093}
8094
8095const struct iomap_ops btrfs_dio_iomap_ops = {
8096 .iomap_begin = btrfs_dio_iomap_begin,
8097 .iomap_end = btrfs_dio_iomap_end,
8098};
8099
8100const struct iomap_dio_ops btrfs_dio_ops = {
8101 .submit_io = btrfs_submit_direct,
8102};
8103
8104static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8105 u64 start, u64 len)
8106{
8107 int ret;
8108
8109 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
8110 if (ret)
8111 return ret;
8112
8113 return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
8114}
8115
8116int btrfs_readpage(struct file *file, struct page *page)
8117{
8118 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
8119 u64 start = page_offset(page);
8120 u64 end = start + PAGE_SIZE - 1;
8121 struct btrfs_bio_ctrl bio_ctrl = { 0 };
8122 int ret;
8123
8124 btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
8125
8126 ret = btrfs_do_readpage(page, NULL, &bio_ctrl, 0, NULL);
8127 if (bio_ctrl.bio) {
8128 int ret2;
8129
8130 ret2 = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
8131 if (ret == 0)
8132 ret = ret2;
8133 }
8134 return ret;
8135}
8136
8137static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8138{
8139 struct inode *inode = page->mapping->host;
8140 int ret;
8141
8142 if (current->flags & PF_MEMALLOC) {
8143 redirty_page_for_writepage(wbc, page);
8144 unlock_page(page);
8145 return 0;
8146 }
8147
8148
8149
8150
8151
8152
8153 if (!igrab(inode)) {
8154 redirty_page_for_writepage(wbc, page);
8155 return AOP_WRITEPAGE_ACTIVATE;
8156 }
8157 ret = extent_write_full_page(page, wbc);
8158 btrfs_add_delayed_iput(inode);
8159 return ret;
8160}
8161
8162static int btrfs_writepages(struct address_space *mapping,
8163 struct writeback_control *wbc)
8164{
8165 return extent_writepages(mapping, wbc);
8166}
8167
8168static void btrfs_readahead(struct readahead_control *rac)
8169{
8170 extent_readahead(rac);
8171}
8172
8173
8174
8175
8176
8177
8178
8179
8180static void wait_subpage_spinlock(struct page *page)
8181{
8182 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
8183 struct btrfs_subpage *subpage;
8184
8185 if (fs_info->sectorsize == PAGE_SIZE)
8186 return;
8187
8188 ASSERT(PagePrivate(page) && page->private);
8189 subpage = (struct btrfs_subpage *)page->private;
8190
8191
8192
8193
8194
8195
8196
8197
8198
8199
8200
8201
8202 spin_lock_irq(&subpage->lock);
8203 spin_unlock_irq(&subpage->lock);
8204}
8205
8206static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8207{
8208 int ret = try_release_extent_mapping(page, gfp_flags);
8209
8210 if (ret == 1) {
8211 wait_subpage_spinlock(page);
8212 clear_page_extent_mapped(page);
8213 }
8214 return ret;
8215}
8216
8217static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8218{
8219 if (PageWriteback(page) || PageDirty(page))
8220 return 0;
8221 return __btrfs_releasepage(page, gfp_flags);
8222}
8223
8224#ifdef CONFIG_MIGRATION
8225static int btrfs_migratepage(struct address_space *mapping,
8226 struct page *newpage, struct page *page,
8227 enum migrate_mode mode)
8228{
8229 int ret;
8230
8231 ret = migrate_page_move_mapping(mapping, newpage, page, 0);
8232 if (ret != MIGRATEPAGE_SUCCESS)
8233 return ret;
8234
8235 if (page_has_private(page))
8236 attach_page_private(newpage, detach_page_private(page));
8237
8238 if (PageOrdered(page)) {
8239 ClearPageOrdered(page);
8240 SetPageOrdered(newpage);
8241 }
8242
8243 if (mode != MIGRATE_SYNC_NO_COPY)
8244 migrate_page_copy(newpage, page);
8245 else
8246 migrate_page_states(newpage, page);
8247 return MIGRATEPAGE_SUCCESS;
8248}
8249#endif
8250
8251static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
8252 size_t length)
8253{
8254 struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
8255 struct btrfs_fs_info *fs_info = inode->root->fs_info;
8256 struct extent_io_tree *tree = &inode->io_tree;
8257 struct extent_state *cached_state = NULL;
8258 u64 page_start = folio_pos(folio);
8259 u64 page_end = page_start + folio_size(folio) - 1;
8260 u64 cur;
8261 int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
8262
8263
8264
8265
8266
8267
8268
8269
8270
8271
8272
8273
8274
8275
8276 folio_wait_writeback(folio);
8277 wait_subpage_spinlock(&folio->page);
8278
8279
8280
8281
8282
8283
8284
8285
8286
8287
8288
8289
8290
8291 if (!(offset == 0 && length == folio_size(folio))) {
8292 btrfs_releasepage(&folio->page, GFP_NOFS);
8293 return;
8294 }
8295
8296 if (!inode_evicting)
8297 lock_extent_bits(tree, page_start, page_end, &cached_state);
8298
8299 cur = page_start;
8300 while (cur < page_end) {
8301 struct btrfs_ordered_extent *ordered;
8302 bool delete_states;
8303 u64 range_end;
8304 u32 range_len;
8305
8306 ordered = btrfs_lookup_first_ordered_range(inode, cur,
8307 page_end + 1 - cur);
8308 if (!ordered) {
8309 range_end = page_end;
8310
8311
8312
8313
8314 delete_states = true;
8315 goto next;
8316 }
8317 if (ordered->file_offset > cur) {
8318
8319
8320
8321
8322
8323
8324 range_end = ordered->file_offset - 1;
8325 delete_states = true;
8326 goto next;
8327 }
8328
8329 range_end = min(ordered->file_offset + ordered->num_bytes - 1,
8330 page_end);
8331 ASSERT(range_end + 1 - cur < U32_MAX);
8332 range_len = range_end + 1 - cur;
8333 if (!btrfs_page_test_ordered(fs_info, &folio->page, cur, range_len)) {
8334
8335
8336
8337
8338
8339
8340 delete_states = false;
8341 goto next;
8342 }
8343 btrfs_page_clear_ordered(fs_info, &folio->page, cur, range_len);
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353 if (!inode_evicting)
8354 clear_extent_bit(tree, cur, range_end,
8355 EXTENT_DELALLOC |
8356 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8357 EXTENT_DEFRAG, 1, 0, &cached_state);
8358
8359 spin_lock_irq(&inode->ordered_tree.lock);
8360 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8361 ordered->truncated_len = min(ordered->truncated_len,
8362 cur - ordered->file_offset);
8363 spin_unlock_irq(&inode->ordered_tree.lock);
8364
8365 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8366 cur, range_end + 1 - cur)) {
8367 btrfs_finish_ordered_io(ordered);
8368
8369
8370
8371
8372 delete_states = true;
8373 } else {
8374
8375
8376
8377
8378
8379 delete_states = false;
8380 }
8381next:
8382 if (ordered)
8383 btrfs_put_ordered_extent(ordered);
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399 btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
8400 if (!inode_evicting) {
8401 clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
8402 EXTENT_DELALLOC | EXTENT_UPTODATE |
8403 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
8404 delete_states, &cached_state);
8405 }
8406 cur = range_end + 1;
8407 }
8408
8409
8410
8411
8412
8413 ASSERT(!folio_test_ordered(folio));
8414 btrfs_page_clear_checked(fs_info, &folio->page, folio_pos(folio), folio_size(folio));
8415 if (!inode_evicting)
8416 __btrfs_releasepage(&folio->page, GFP_NOFS);
8417 clear_page_extent_mapped(&folio->page);
8418}
8419
8420
8421
8422
8423
8424
8425
8426
8427
8428
8429
8430
8431
8432
8433
8434
8435vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8436{
8437 struct page *page = vmf->page;
8438 struct inode *inode = file_inode(vmf->vma->vm_file);
8439 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8440 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8441 struct btrfs_ordered_extent *ordered;
8442 struct extent_state *cached_state = NULL;
8443 struct extent_changeset *data_reserved = NULL;
8444 unsigned long zero_start;
8445 loff_t size;
8446 vm_fault_t ret;
8447 int ret2;
8448 int reserved = 0;
8449 u64 reserved_space;
8450 u64 page_start;
8451 u64 page_end;
8452 u64 end;
8453
8454 reserved_space = PAGE_SIZE;
8455
8456 sb_start_pagefault(inode->i_sb);
8457 page_start = page_offset(page);
8458 page_end = page_start + PAGE_SIZE - 1;
8459 end = page_end;
8460
8461
8462
8463
8464
8465
8466
8467
8468
8469 ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
8470 page_start, reserved_space);
8471 if (!ret2) {
8472 ret2 = file_update_time(vmf->vma->vm_file);
8473 reserved = 1;
8474 }
8475 if (ret2) {
8476 ret = vmf_error(ret2);
8477 if (reserved)
8478 goto out;
8479 goto out_noreserve;
8480 }
8481
8482 ret = VM_FAULT_NOPAGE;
8483again:
8484 down_read(&BTRFS_I(inode)->i_mmap_lock);
8485 lock_page(page);
8486 size = i_size_read(inode);
8487
8488 if ((page->mapping != inode->i_mapping) ||
8489 (page_start >= size)) {
8490
8491 goto out_unlock;
8492 }
8493 wait_on_page_writeback(page);
8494
8495 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8496 ret2 = set_page_extent_mapped(page);
8497 if (ret2 < 0) {
8498 ret = vmf_error(ret2);
8499 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8500 goto out_unlock;
8501 }
8502
8503
8504
8505
8506
8507 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8508 PAGE_SIZE);
8509 if (ordered) {
8510 unlock_extent_cached(io_tree, page_start, page_end,
8511 &cached_state);
8512 unlock_page(page);
8513 up_read(&BTRFS_I(inode)->i_mmap_lock);
8514 btrfs_start_ordered_extent(ordered, 1);
8515 btrfs_put_ordered_extent(ordered);
8516 goto again;
8517 }
8518
8519 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8520 reserved_space = round_up(size - page_start,
8521 fs_info->sectorsize);
8522 if (reserved_space < PAGE_SIZE) {
8523 end = page_start + reserved_space - 1;
8524 btrfs_delalloc_release_space(BTRFS_I(inode),
8525 data_reserved, page_start,
8526 PAGE_SIZE - reserved_space, true);
8527 }
8528 }
8529
8530
8531
8532
8533
8534
8535
8536
8537 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8538 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8539 EXTENT_DEFRAG, 0, 0, &cached_state);
8540
8541 ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
8542 &cached_state);
8543 if (ret2) {
8544 unlock_extent_cached(io_tree, page_start, page_end,
8545 &cached_state);
8546 ret = VM_FAULT_SIGBUS;
8547 goto out_unlock;
8548 }
8549
8550
8551 if (page_start + PAGE_SIZE > size)
8552 zero_start = offset_in_page(size);
8553 else
8554 zero_start = PAGE_SIZE;
8555
8556 if (zero_start != PAGE_SIZE) {
8557 memzero_page(page, zero_start, PAGE_SIZE - zero_start);
8558 flush_dcache_page(page);
8559 }
8560 btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
8561 btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start);
8562 btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start);
8563
8564 btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
8565
8566 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8567 up_read(&BTRFS_I(inode)->i_mmap_lock);
8568
8569 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8570 sb_end_pagefault(inode->i_sb);
8571 extent_changeset_free(data_reserved);
8572 return VM_FAULT_LOCKED;
8573
8574out_unlock:
8575 unlock_page(page);
8576 up_read(&BTRFS_I(inode)->i_mmap_lock);
8577out:
8578 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8579 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
8580 reserved_space, (ret != 0));
8581out_noreserve:
8582 sb_end_pagefault(inode->i_sb);
8583 extent_changeset_free(data_reserved);
8584 return ret;
8585}
8586
8587static int btrfs_truncate(struct inode *inode, bool skip_writeback)
8588{
8589 struct btrfs_truncate_control control = {
8590 .inode = BTRFS_I(inode),
8591 .ino = btrfs_ino(BTRFS_I(inode)),
8592 .min_type = BTRFS_EXTENT_DATA_KEY,
8593 .clear_extent_range = true,
8594 };
8595 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8596 struct btrfs_root *root = BTRFS_I(inode)->root;
8597 struct btrfs_block_rsv *rsv;
8598 int ret;
8599 struct btrfs_trans_handle *trans;
8600 u64 mask = fs_info->sectorsize - 1;
8601 u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
8602
8603 if (!skip_writeback) {
8604 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
8605 (u64)-1);
8606 if (ret)
8607 return ret;
8608 }
8609
8610
8611
8612
8613
8614
8615
8616
8617
8618
8619
8620
8621
8622
8623
8624
8625
8626
8627
8628
8629
8630
8631
8632
8633
8634
8635
8636
8637
8638 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
8639 if (!rsv)
8640 return -ENOMEM;
8641 rsv->size = min_size;
8642 rsv->failfast = 1;
8643
8644
8645
8646
8647
8648 trans = btrfs_start_transaction(root, 2);
8649 if (IS_ERR(trans)) {
8650 ret = PTR_ERR(trans);
8651 goto out;
8652 }
8653
8654
8655 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
8656 min_size, false);
8657 BUG_ON(ret);
8658
8659 trans->block_rsv = rsv;
8660
8661 while (1) {
8662 struct extent_state *cached_state = NULL;
8663 const u64 new_size = inode->i_size;
8664 const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
8665
8666 control.new_size = new_size;
8667 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
8668 &cached_state);
8669
8670
8671
8672
8673
8674 btrfs_drop_extent_cache(BTRFS_I(inode),
8675 ALIGN(new_size, fs_info->sectorsize),
8676 (u64)-1, 0);
8677
8678 ret = btrfs_truncate_inode_items(trans, root, &control);
8679
8680 inode_sub_bytes(inode, control.sub_bytes);
8681 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), control.last_size);
8682
8683 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
8684 (u64)-1, &cached_state);
8685
8686 trans->block_rsv = &fs_info->trans_block_rsv;
8687 if (ret != -ENOSPC && ret != -EAGAIN)
8688 break;
8689
8690 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
8691 if (ret)
8692 break;
8693
8694 btrfs_end_transaction(trans);
8695 btrfs_btree_balance_dirty(fs_info);
8696
8697 trans = btrfs_start_transaction(root, 2);
8698 if (IS_ERR(trans)) {
8699 ret = PTR_ERR(trans);
8700 trans = NULL;
8701 break;
8702 }
8703
8704 btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
8705 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
8706 rsv, min_size, false);
8707 BUG_ON(ret);
8708 trans->block_rsv = rsv;
8709 }
8710
8711
8712
8713
8714
8715
8716
8717 if (ret == BTRFS_NEED_TRUNCATE_BLOCK) {
8718 btrfs_end_transaction(trans);
8719 btrfs_btree_balance_dirty(fs_info);
8720
8721 ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0);
8722 if (ret)
8723 goto out;
8724 trans = btrfs_start_transaction(root, 1);
8725 if (IS_ERR(trans)) {
8726 ret = PTR_ERR(trans);
8727 goto out;
8728 }
8729 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
8730 }
8731
8732 if (trans) {
8733 int ret2;
8734
8735 trans->block_rsv = &fs_info->trans_block_rsv;
8736 ret2 = btrfs_update_inode(trans, root, BTRFS_I(inode));
8737 if (ret2 && !ret)
8738 ret = ret2;
8739
8740 ret2 = btrfs_end_transaction(trans);
8741 if (ret2 && !ret)
8742 ret = ret2;
8743 btrfs_btree_balance_dirty(fs_info);
8744 }
8745out:
8746 btrfs_free_block_rsv(fs_info, rsv);
8747
8748
8749
8750
8751
8752
8753
8754
8755
8756
8757
8758
8759
8760
8761 if (control.extents_found > 0)
8762 btrfs_set_inode_full_sync(BTRFS_I(inode));
8763
8764 return ret;
8765}
8766
8767
8768
8769
8770int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
8771 struct btrfs_root *new_root,
8772 struct btrfs_root *parent_root,
8773 struct user_namespace *mnt_userns)
8774{
8775 struct inode *inode;
8776 int err;
8777 u64 index = 0;
8778 u64 ino;
8779
8780 err = btrfs_get_free_objectid(new_root, &ino);
8781 if (err < 0)
8782 return err;
8783
8784 inode = btrfs_new_inode(trans, new_root, mnt_userns, NULL, "..", 2,
8785 ino, ino,
8786 S_IFDIR | (~current_umask() & S_IRWXUGO),
8787 &index);
8788 if (IS_ERR(inode))
8789 return PTR_ERR(inode);
8790 inode->i_op = &btrfs_dir_inode_operations;
8791 inode->i_fop = &btrfs_dir_file_operations;
8792
8793 set_nlink(inode, 1);
8794 btrfs_i_size_write(BTRFS_I(inode), 0);
8795 unlock_new_inode(inode);
8796
8797 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
8798 if (err)
8799 btrfs_err(new_root->fs_info,
8800 "error inheriting subvolume %llu properties: %d",
8801 new_root->root_key.objectid, err);
8802
8803 err = btrfs_update_inode(trans, new_root, BTRFS_I(inode));
8804
8805 iput(inode);
8806 return err;
8807}
8808
8809struct inode *btrfs_alloc_inode(struct super_block *sb)
8810{
8811 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
8812 struct btrfs_inode *ei;
8813 struct inode *inode;
8814
8815 ei = alloc_inode_sb(sb, btrfs_inode_cachep, GFP_KERNEL);
8816 if (!ei)
8817 return NULL;
8818
8819 ei->root = NULL;
8820 ei->generation = 0;
8821 ei->last_trans = 0;
8822 ei->last_sub_trans = 0;
8823 ei->logged_trans = 0;
8824 ei->delalloc_bytes = 0;
8825 ei->new_delalloc_bytes = 0;
8826 ei->defrag_bytes = 0;
8827 ei->disk_i_size = 0;
8828 ei->flags = 0;
8829 ei->ro_flags = 0;
8830 ei->csum_bytes = 0;
8831 ei->index_cnt = (u64)-1;
8832 ei->dir_index = 0;
8833 ei->last_unlink_trans = 0;
8834 ei->last_reflink_trans = 0;
8835 ei->last_log_commit = 0;
8836
8837 spin_lock_init(&ei->lock);
8838 ei->outstanding_extents = 0;
8839 if (sb->s_magic != BTRFS_TEST_MAGIC)
8840 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
8841 BTRFS_BLOCK_RSV_DELALLOC);
8842 ei->runtime_flags = 0;
8843 ei->prop_compress = BTRFS_COMPRESS_NONE;
8844 ei->defrag_compress = BTRFS_COMPRESS_NONE;
8845
8846 ei->delayed_node = NULL;
8847
8848 ei->i_otime.tv_sec = 0;
8849 ei->i_otime.tv_nsec = 0;
8850
8851 inode = &ei->vfs_inode;
8852 extent_map_tree_init(&ei->extent_tree);
8853 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
8854 extent_io_tree_init(fs_info, &ei->io_failure_tree,
8855 IO_TREE_INODE_IO_FAILURE, inode);
8856 extent_io_tree_init(fs_info, &ei->file_extent_tree,
8857 IO_TREE_INODE_FILE_EXTENT, inode);
8858 ei->io_tree.track_uptodate = true;
8859 ei->io_failure_tree.track_uptodate = true;
8860 atomic_set(&ei->sync_writers, 0);
8861 mutex_init(&ei->log_mutex);
8862 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8863 INIT_LIST_HEAD(&ei->delalloc_inodes);
8864 INIT_LIST_HEAD(&ei->delayed_iput);
8865 RB_CLEAR_NODE(&ei->rb_node);
8866 init_rwsem(&ei->i_mmap_lock);
8867
8868 return inode;
8869}
8870
8871#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8872void btrfs_test_destroy_inode(struct inode *inode)
8873{
8874 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
8875 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8876}
8877#endif
8878
8879void btrfs_free_inode(struct inode *inode)
8880{
8881 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8882}
8883
8884void btrfs_destroy_inode(struct inode *vfs_inode)
8885{
8886 struct btrfs_ordered_extent *ordered;
8887 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
8888 struct btrfs_root *root = inode->root;
8889
8890 WARN_ON(!hlist_empty(&vfs_inode->i_dentry));
8891 WARN_ON(vfs_inode->i_data.nrpages);
8892 WARN_ON(inode->block_rsv.reserved);
8893 WARN_ON(inode->block_rsv.size);
8894 WARN_ON(inode->outstanding_extents);
8895 if (!S_ISDIR(vfs_inode->i_mode)) {
8896 WARN_ON(inode->delalloc_bytes);
8897 WARN_ON(inode->new_delalloc_bytes);
8898 }
8899 WARN_ON(inode->csum_bytes);
8900 WARN_ON(inode->defrag_bytes);
8901
8902
8903
8904
8905
8906
8907 if (!root)
8908 return;
8909
8910 while (1) {
8911 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
8912 if (!ordered)
8913 break;
8914 else {
8915 btrfs_err(root->fs_info,
8916 "found ordered extent %llu %llu on inode cleanup",
8917 ordered->file_offset, ordered->num_bytes);
8918 btrfs_remove_ordered_extent(inode, ordered);
8919 btrfs_put_ordered_extent(ordered);
8920 btrfs_put_ordered_extent(ordered);
8921 }
8922 }
8923 btrfs_qgroup_check_reserved_leak(inode);
8924 inode_tree_del(inode);
8925 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
8926 btrfs_inode_clear_file_extent_range(inode, 0, (u64)-1);
8927 btrfs_put_root(inode->root);
8928}
8929
8930int btrfs_drop_inode(struct inode *inode)
8931{
8932 struct btrfs_root *root = BTRFS_I(inode)->root;
8933
8934 if (root == NULL)
8935 return 1;
8936
8937
8938 if (btrfs_root_refs(&root->root_item) == 0)
8939 return 1;
8940 else
8941 return generic_drop_inode(inode);
8942}
8943
8944static void init_once(void *foo)
8945{
8946 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
8947
8948 inode_init_once(&ei->vfs_inode);
8949}
8950
8951void __cold btrfs_destroy_cachep(void)
8952{
8953
8954
8955
8956
8957 rcu_barrier();
8958 kmem_cache_destroy(btrfs_inode_cachep);
8959 kmem_cache_destroy(btrfs_trans_handle_cachep);
8960 kmem_cache_destroy(btrfs_path_cachep);
8961 kmem_cache_destroy(btrfs_free_space_cachep);
8962 kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
8963}
8964
8965int __init btrfs_init_cachep(void)
8966{
8967 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
8968 sizeof(struct btrfs_inode), 0,
8969 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
8970 init_once);
8971 if (!btrfs_inode_cachep)
8972 goto fail;
8973
8974 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
8975 sizeof(struct btrfs_trans_handle), 0,
8976 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
8977 if (!btrfs_trans_handle_cachep)
8978 goto fail;
8979
8980 btrfs_path_cachep = kmem_cache_create("btrfs_path",
8981 sizeof(struct btrfs_path), 0,
8982 SLAB_MEM_SPREAD, NULL);
8983 if (!btrfs_path_cachep)
8984 goto fail;
8985
8986 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
8987 sizeof(struct btrfs_free_space), 0,
8988 SLAB_MEM_SPREAD, NULL);
8989 if (!btrfs_free_space_cachep)
8990 goto fail;
8991
8992 btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
8993 PAGE_SIZE, PAGE_SIZE,
8994 SLAB_MEM_SPREAD, NULL);
8995 if (!btrfs_free_space_bitmap_cachep)
8996 goto fail;
8997
8998 return 0;
8999fail:
9000 btrfs_destroy_cachep();
9001 return -ENOMEM;
9002}
9003
9004static int btrfs_getattr(struct user_namespace *mnt_userns,
9005 const struct path *path, struct kstat *stat,
9006 u32 request_mask, unsigned int flags)
9007{
9008 u64 delalloc_bytes;
9009 u64 inode_bytes;
9010 struct inode *inode = d_inode(path->dentry);
9011 u32 blocksize = inode->i_sb->s_blocksize;
9012 u32 bi_flags = BTRFS_I(inode)->flags;
9013 u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
9014
9015 stat->result_mask |= STATX_BTIME;
9016 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
9017 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
9018 if (bi_flags & BTRFS_INODE_APPEND)
9019 stat->attributes |= STATX_ATTR_APPEND;
9020 if (bi_flags & BTRFS_INODE_COMPRESS)
9021 stat->attributes |= STATX_ATTR_COMPRESSED;
9022 if (bi_flags & BTRFS_INODE_IMMUTABLE)
9023 stat->attributes |= STATX_ATTR_IMMUTABLE;
9024 if (bi_flags & BTRFS_INODE_NODUMP)
9025 stat->attributes |= STATX_ATTR_NODUMP;
9026 if (bi_ro_flags & BTRFS_INODE_RO_VERITY)
9027 stat->attributes |= STATX_ATTR_VERITY;
9028
9029 stat->attributes_mask |= (STATX_ATTR_APPEND |
9030 STATX_ATTR_COMPRESSED |
9031 STATX_ATTR_IMMUTABLE |
9032 STATX_ATTR_NODUMP);
9033
9034 generic_fillattr(mnt_userns, inode, stat);
9035 stat->dev = BTRFS_I(inode)->root->anon_dev;
9036
9037 spin_lock(&BTRFS_I(inode)->lock);
9038 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9039 inode_bytes = inode_get_bytes(inode);
9040 spin_unlock(&BTRFS_I(inode)->lock);
9041 stat->blocks = (ALIGN(inode_bytes, blocksize) +
9042 ALIGN(delalloc_bytes, blocksize)) >> 9;
9043 return 0;
9044}
9045
9046static int btrfs_rename_exchange(struct inode *old_dir,
9047 struct dentry *old_dentry,
9048 struct inode *new_dir,
9049 struct dentry *new_dentry)
9050{
9051 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9052 struct btrfs_trans_handle *trans;
9053 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9054 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9055 struct inode *new_inode = new_dentry->d_inode;
9056 struct inode *old_inode = old_dentry->d_inode;
9057 struct timespec64 ctime = current_time(old_inode);
9058 struct btrfs_rename_ctx old_rename_ctx;
9059 struct btrfs_rename_ctx new_rename_ctx;
9060 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9061 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
9062 u64 old_idx = 0;
9063 u64 new_idx = 0;
9064 int ret;
9065 int ret2;
9066 bool need_abort = false;
9067
9068
9069
9070
9071
9072
9073 if (root != dest &&
9074 (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
9075 new_ino != BTRFS_FIRST_FREE_OBJECTID))
9076 return -EXDEV;
9077
9078
9079 if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
9080 new_ino == BTRFS_FIRST_FREE_OBJECTID)
9081 down_read(&fs_info->subvol_sem);
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091 trans = btrfs_start_transaction(root, 12);
9092 if (IS_ERR(trans)) {
9093 ret = PTR_ERR(trans);
9094 goto out_notrans;
9095 }
9096
9097 if (dest != root) {
9098 ret = btrfs_record_root_in_trans(trans, dest);
9099 if (ret)
9100 goto out_fail;
9101 }
9102
9103
9104
9105
9106
9107 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
9108 if (ret)
9109 goto out_fail;
9110 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
9111 if (ret)
9112 goto out_fail;
9113
9114 BTRFS_I(old_inode)->dir_index = 0ULL;
9115 BTRFS_I(new_inode)->dir_index = 0ULL;
9116
9117
9118 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9119
9120 btrfs_set_log_full_commit(trans);
9121 } else {
9122 ret = btrfs_insert_inode_ref(trans, dest,
9123 new_dentry->d_name.name,
9124 new_dentry->d_name.len,
9125 old_ino,
9126 btrfs_ino(BTRFS_I(new_dir)),
9127 old_idx);
9128 if (ret)
9129 goto out_fail;
9130 need_abort = true;
9131 }
9132
9133
9134 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9135
9136 btrfs_set_log_full_commit(trans);
9137 } else {
9138 ret = btrfs_insert_inode_ref(trans, root,
9139 old_dentry->d_name.name,
9140 old_dentry->d_name.len,
9141 new_ino,
9142 btrfs_ino(BTRFS_I(old_dir)),
9143 new_idx);
9144 if (ret) {
9145 if (need_abort)
9146 btrfs_abort_transaction(trans, ret);
9147 goto out_fail;
9148 }
9149 }
9150
9151
9152 inode_inc_iversion(old_dir);
9153 inode_inc_iversion(new_dir);
9154 inode_inc_iversion(old_inode);
9155 inode_inc_iversion(new_inode);
9156 old_dir->i_ctime = old_dir->i_mtime = ctime;
9157 new_dir->i_ctime = new_dir->i_mtime = ctime;
9158 old_inode->i_ctime = ctime;
9159 new_inode->i_ctime = ctime;
9160
9161 if (old_dentry->d_parent != new_dentry->d_parent) {
9162 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9163 BTRFS_I(old_inode), 1);
9164 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
9165 BTRFS_I(new_inode), 1);
9166 }
9167
9168
9169 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9170 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9171 } else {
9172 ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
9173 BTRFS_I(old_dentry->d_inode),
9174 old_dentry->d_name.name,
9175 old_dentry->d_name.len,
9176 &old_rename_ctx);
9177 if (!ret)
9178 ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
9179 }
9180 if (ret) {
9181 btrfs_abort_transaction(trans, ret);
9182 goto out_fail;
9183 }
9184
9185
9186 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9187 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9188 } else {
9189 ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
9190 BTRFS_I(new_dentry->d_inode),
9191 new_dentry->d_name.name,
9192 new_dentry->d_name.len,
9193 &new_rename_ctx);
9194 if (!ret)
9195 ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
9196 }
9197 if (ret) {
9198 btrfs_abort_transaction(trans, ret);
9199 goto out_fail;
9200 }
9201
9202 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9203 new_dentry->d_name.name,
9204 new_dentry->d_name.len, 0, old_idx);
9205 if (ret) {
9206 btrfs_abort_transaction(trans, ret);
9207 goto out_fail;
9208 }
9209
9210 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
9211 old_dentry->d_name.name,
9212 old_dentry->d_name.len, 0, new_idx);
9213 if (ret) {
9214 btrfs_abort_transaction(trans, ret);
9215 goto out_fail;
9216 }
9217
9218 if (old_inode->i_nlink == 1)
9219 BTRFS_I(old_inode)->dir_index = old_idx;
9220 if (new_inode->i_nlink == 1)
9221 BTRFS_I(new_inode)->dir_index = new_idx;
9222
9223
9224
9225
9226
9227
9228
9229 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9230 btrfs_pin_log_trans(root);
9231 if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
9232 btrfs_pin_log_trans(dest);
9233
9234
9235 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9236 btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
9237 old_rename_ctx.index, new_dentry->d_parent);
9238 if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
9239 btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
9240 new_rename_ctx.index, old_dentry->d_parent);
9241
9242
9243 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9244 btrfs_end_log_trans(root);
9245 if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
9246 btrfs_end_log_trans(dest);
9247out_fail:
9248 ret2 = btrfs_end_transaction(trans);
9249 ret = ret ? ret : ret2;
9250out_notrans:
9251 if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
9252 old_ino == BTRFS_FIRST_FREE_OBJECTID)
9253 up_read(&fs_info->subvol_sem);
9254
9255 return ret;
9256}
9257
9258static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9259 struct btrfs_root *root,
9260 struct user_namespace *mnt_userns,
9261 struct inode *dir,
9262 struct dentry *dentry)
9263{
9264 int ret;
9265 struct inode *inode;
9266 u64 objectid;
9267 u64 index;
9268
9269 ret = btrfs_get_free_objectid(root, &objectid);
9270 if (ret)
9271 return ret;
9272
9273 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
9274 dentry->d_name.name,
9275 dentry->d_name.len,
9276 btrfs_ino(BTRFS_I(dir)),
9277 objectid,
9278 S_IFCHR | WHITEOUT_MODE,
9279 &index);
9280
9281 if (IS_ERR(inode)) {
9282 ret = PTR_ERR(inode);
9283 return ret;
9284 }
9285
9286 inode->i_op = &btrfs_special_inode_operations;
9287 init_special_inode(inode, inode->i_mode,
9288 WHITEOUT_DEV);
9289
9290 ret = btrfs_init_inode_security(trans, inode, dir,
9291 &dentry->d_name);
9292 if (ret)
9293 goto out;
9294
9295 ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9296 BTRFS_I(inode), 0, index);
9297 if (ret)
9298 goto out;
9299
9300 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
9301out:
9302 unlock_new_inode(inode);
9303 if (ret)
9304 inode_dec_link_count(inode);
9305 iput(inode);
9306
9307 return ret;
9308}
9309
9310static int btrfs_rename(struct user_namespace *mnt_userns,
9311 struct inode *old_dir, struct dentry *old_dentry,
9312 struct inode *new_dir, struct dentry *new_dentry,
9313 unsigned int flags)
9314{
9315 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9316 struct btrfs_trans_handle *trans;
9317 unsigned int trans_num_items;
9318 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9319 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9320 struct inode *new_inode = d_inode(new_dentry);
9321 struct inode *old_inode = d_inode(old_dentry);
9322 struct btrfs_rename_ctx rename_ctx;
9323 u64 index = 0;
9324 int ret;
9325 int ret2;
9326 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9327
9328 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9329 return -EPERM;
9330
9331
9332 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9333 return -EXDEV;
9334
9335 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9336 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9337 return -ENOTEMPTY;
9338
9339 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9340 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9341 return -ENOTEMPTY;
9342
9343
9344
9345 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9346 new_dentry->d_name.name,
9347 new_dentry->d_name.len);
9348
9349 if (ret) {
9350 if (ret == -EEXIST) {
9351
9352
9353 if (WARN_ON(!new_inode)) {
9354 return ret;
9355 }
9356 } else {
9357
9358 return ret;
9359 }
9360 }
9361 ret = 0;
9362
9363
9364
9365
9366
9367 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9368 filemap_flush(old_inode->i_mapping);
9369
9370
9371 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9372 down_read(&fs_info->subvol_sem);
9373
9374
9375
9376
9377
9378
9379
9380
9381
9382
9383
9384 trans_num_items = 11;
9385 if (flags & RENAME_WHITEOUT)
9386 trans_num_items += 5;
9387 trans = btrfs_start_transaction(root, trans_num_items);
9388 if (IS_ERR(trans)) {
9389 ret = PTR_ERR(trans);
9390 goto out_notrans;
9391 }
9392
9393 if (dest != root) {
9394 ret = btrfs_record_root_in_trans(trans, dest);
9395 if (ret)
9396 goto out_fail;
9397 }
9398
9399 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9400 if (ret)
9401 goto out_fail;
9402
9403 BTRFS_I(old_inode)->dir_index = 0ULL;
9404 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9405
9406 btrfs_set_log_full_commit(trans);
9407 } else {
9408 ret = btrfs_insert_inode_ref(trans, dest,
9409 new_dentry->d_name.name,
9410 new_dentry->d_name.len,
9411 old_ino,
9412 btrfs_ino(BTRFS_I(new_dir)), index);
9413 if (ret)
9414 goto out_fail;
9415 }
9416
9417 inode_inc_iversion(old_dir);
9418 inode_inc_iversion(new_dir);
9419 inode_inc_iversion(old_inode);
9420 old_dir->i_ctime = old_dir->i_mtime =
9421 new_dir->i_ctime = new_dir->i_mtime =
9422 old_inode->i_ctime = current_time(old_dir);
9423
9424 if (old_dentry->d_parent != new_dentry->d_parent)
9425 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9426 BTRFS_I(old_inode), 1);
9427
9428 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9429 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9430 } else {
9431 ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
9432 BTRFS_I(d_inode(old_dentry)),
9433 old_dentry->d_name.name,
9434 old_dentry->d_name.len,
9435 &rename_ctx);
9436 if (!ret)
9437 ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
9438 }
9439 if (ret) {
9440 btrfs_abort_transaction(trans, ret);
9441 goto out_fail;
9442 }
9443
9444 if (new_inode) {
9445 inode_inc_iversion(new_inode);
9446 new_inode->i_ctime = current_time(new_inode);
9447 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9448 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9449 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9450 BUG_ON(new_inode->i_nlink == 0);
9451 } else {
9452 ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
9453 BTRFS_I(d_inode(new_dentry)),
9454 new_dentry->d_name.name,
9455 new_dentry->d_name.len);
9456 }
9457 if (!ret && new_inode->i_nlink == 0)
9458 ret = btrfs_orphan_add(trans,
9459 BTRFS_I(d_inode(new_dentry)));
9460 if (ret) {
9461 btrfs_abort_transaction(trans, ret);
9462 goto out_fail;
9463 }
9464 }
9465
9466 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9467 new_dentry->d_name.name,
9468 new_dentry->d_name.len, 0, index);
9469 if (ret) {
9470 btrfs_abort_transaction(trans, ret);
9471 goto out_fail;
9472 }
9473
9474 if (old_inode->i_nlink == 1)
9475 BTRFS_I(old_inode)->dir_index = index;
9476
9477 if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
9478 btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
9479 rename_ctx.index, new_dentry->d_parent);
9480
9481 if (flags & RENAME_WHITEOUT) {
9482 ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
9483 old_dir, old_dentry);
9484
9485 if (ret) {
9486 btrfs_abort_transaction(trans, ret);
9487 goto out_fail;
9488 }
9489 }
9490out_fail:
9491 ret2 = btrfs_end_transaction(trans);
9492 ret = ret ? ret : ret2;
9493out_notrans:
9494 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9495 up_read(&fs_info->subvol_sem);
9496
9497 return ret;
9498}
9499
9500static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
9501 struct dentry *old_dentry, struct inode *new_dir,
9502 struct dentry *new_dentry, unsigned int flags)
9503{
9504 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
9505 return -EINVAL;
9506
9507 if (flags & RENAME_EXCHANGE)
9508 return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9509 new_dentry);
9510
9511 return btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
9512 new_dentry, flags);
9513}
9514
9515struct btrfs_delalloc_work {
9516 struct inode *inode;
9517 struct completion completion;
9518 struct list_head list;
9519 struct btrfs_work work;
9520};
9521
9522static void btrfs_run_delalloc_work(struct btrfs_work *work)
9523{
9524 struct btrfs_delalloc_work *delalloc_work;
9525 struct inode *inode;
9526
9527 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9528 work);
9529 inode = delalloc_work->inode;
9530 filemap_flush(inode->i_mapping);
9531 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9532 &BTRFS_I(inode)->runtime_flags))
9533 filemap_flush(inode->i_mapping);
9534
9535 iput(inode);
9536 complete(&delalloc_work->completion);
9537}
9538
9539static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
9540{
9541 struct btrfs_delalloc_work *work;
9542
9543 work = kmalloc(sizeof(*work), GFP_NOFS);
9544 if (!work)
9545 return NULL;
9546
9547 init_completion(&work->completion);
9548 INIT_LIST_HEAD(&work->list);
9549 work->inode = inode;
9550 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
9551
9552 return work;
9553}
9554
9555
9556
9557
9558
9559static int start_delalloc_inodes(struct btrfs_root *root,
9560 struct writeback_control *wbc, bool snapshot,
9561 bool in_reclaim_context)
9562{
9563 struct btrfs_inode *binode;
9564 struct inode *inode;
9565 struct btrfs_delalloc_work *work, *next;
9566 struct list_head works;
9567 struct list_head splice;
9568 int ret = 0;
9569 bool full_flush = wbc->nr_to_write == LONG_MAX;
9570
9571 INIT_LIST_HEAD(&works);
9572 INIT_LIST_HEAD(&splice);
9573
9574 mutex_lock(&root->delalloc_mutex);
9575 spin_lock(&root->delalloc_lock);
9576 list_splice_init(&root->delalloc_inodes, &splice);
9577 while (!list_empty(&splice)) {
9578 binode = list_entry(splice.next, struct btrfs_inode,
9579 delalloc_inodes);
9580
9581 list_move_tail(&binode->delalloc_inodes,
9582 &root->delalloc_inodes);
9583
9584 if (in_reclaim_context &&
9585 test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
9586 continue;
9587
9588 inode = igrab(&binode->vfs_inode);
9589 if (!inode) {
9590 cond_resched_lock(&root->delalloc_lock);
9591 continue;
9592 }
9593 spin_unlock(&root->delalloc_lock);
9594
9595 if (snapshot)
9596 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
9597 &binode->runtime_flags);
9598 if (full_flush) {
9599 work = btrfs_alloc_delalloc_work(inode);
9600 if (!work) {
9601 iput(inode);
9602 ret = -ENOMEM;
9603 goto out;
9604 }
9605 list_add_tail(&work->list, &works);
9606 btrfs_queue_work(root->fs_info->flush_workers,
9607 &work->work);
9608 } else {
9609 ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc);
9610 btrfs_add_delayed_iput(inode);
9611 if (ret || wbc->nr_to_write <= 0)
9612 goto out;
9613 }
9614 cond_resched();
9615 spin_lock(&root->delalloc_lock);
9616 }
9617 spin_unlock(&root->delalloc_lock);
9618
9619out:
9620 list_for_each_entry_safe(work, next, &works, list) {
9621 list_del_init(&work->list);
9622 wait_for_completion(&work->completion);
9623 kfree(work);
9624 }
9625
9626 if (!list_empty(&splice)) {
9627 spin_lock(&root->delalloc_lock);
9628 list_splice_tail(&splice, &root->delalloc_inodes);
9629 spin_unlock(&root->delalloc_lock);
9630 }
9631 mutex_unlock(&root->delalloc_mutex);
9632 return ret;
9633}
9634
9635int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
9636{
9637 struct writeback_control wbc = {
9638 .nr_to_write = LONG_MAX,
9639 .sync_mode = WB_SYNC_NONE,
9640 .range_start = 0,
9641 .range_end = LLONG_MAX,
9642 };
9643 struct btrfs_fs_info *fs_info = root->fs_info;
9644
9645 if (BTRFS_FS_ERROR(fs_info))
9646 return -EROFS;
9647
9648 return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
9649}
9650
9651int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
9652 bool in_reclaim_context)
9653{
9654 struct writeback_control wbc = {
9655 .nr_to_write = nr,
9656 .sync_mode = WB_SYNC_NONE,
9657 .range_start = 0,
9658 .range_end = LLONG_MAX,
9659 };
9660 struct btrfs_root *root;
9661 struct list_head splice;
9662 int ret;
9663
9664 if (BTRFS_FS_ERROR(fs_info))
9665 return -EROFS;
9666
9667 INIT_LIST_HEAD(&splice);
9668
9669 mutex_lock(&fs_info->delalloc_root_mutex);
9670 spin_lock(&fs_info->delalloc_root_lock);
9671 list_splice_init(&fs_info->delalloc_roots, &splice);
9672 while (!list_empty(&splice)) {
9673
9674
9675
9676
9677 if (nr == LONG_MAX)
9678 wbc.nr_to_write = LONG_MAX;
9679
9680 root = list_first_entry(&splice, struct btrfs_root,
9681 delalloc_root);
9682 root = btrfs_grab_root(root);
9683 BUG_ON(!root);
9684 list_move_tail(&root->delalloc_root,
9685 &fs_info->delalloc_roots);
9686 spin_unlock(&fs_info->delalloc_root_lock);
9687
9688 ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
9689 btrfs_put_root(root);
9690 if (ret < 0 || wbc.nr_to_write <= 0)
9691 goto out;
9692 spin_lock(&fs_info->delalloc_root_lock);
9693 }
9694 spin_unlock(&fs_info->delalloc_root_lock);
9695
9696 ret = 0;
9697out:
9698 if (!list_empty(&splice)) {
9699 spin_lock(&fs_info->delalloc_root_lock);
9700 list_splice_tail(&splice, &fs_info->delalloc_roots);
9701 spin_unlock(&fs_info->delalloc_root_lock);
9702 }
9703 mutex_unlock(&fs_info->delalloc_root_mutex);
9704 return ret;
9705}
9706
9707static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
9708 struct dentry *dentry, const char *symname)
9709{
9710 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
9711 struct btrfs_trans_handle *trans;
9712 struct btrfs_root *root = BTRFS_I(dir)->root;
9713 struct btrfs_path *path;
9714 struct btrfs_key key;
9715 struct inode *inode = NULL;
9716 int err;
9717 u64 objectid;
9718 u64 index = 0;
9719 int name_len;
9720 int datasize;
9721 unsigned long ptr;
9722 struct btrfs_file_extent_item *ei;
9723 struct extent_buffer *leaf;
9724
9725 name_len = strlen(symname);
9726 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
9727 return -ENAMETOOLONG;
9728
9729
9730
9731
9732
9733
9734
9735
9736 trans = btrfs_start_transaction(root, 7);
9737 if (IS_ERR(trans))
9738 return PTR_ERR(trans);
9739
9740 err = btrfs_get_free_objectid(root, &objectid);
9741 if (err)
9742 goto out_unlock;
9743
9744 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
9745 dentry->d_name.name, dentry->d_name.len,
9746 btrfs_ino(BTRFS_I(dir)), objectid,
9747 S_IFLNK | S_IRWXUGO, &index);
9748 if (IS_ERR(inode)) {
9749 err = PTR_ERR(inode);
9750 inode = NULL;
9751 goto out_unlock;
9752 }
9753
9754
9755
9756
9757
9758
9759
9760 inode->i_fop = &btrfs_file_operations;
9761 inode->i_op = &btrfs_file_inode_operations;
9762 inode->i_mapping->a_ops = &btrfs_aops;
9763
9764 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
9765 if (err)
9766 goto out_unlock;
9767
9768 path = btrfs_alloc_path();
9769 if (!path) {
9770 err = -ENOMEM;
9771 goto out_unlock;
9772 }
9773 key.objectid = btrfs_ino(BTRFS_I(inode));
9774 key.offset = 0;
9775 key.type = BTRFS_EXTENT_DATA_KEY;
9776 datasize = btrfs_file_extent_calc_inline_size(name_len);
9777 err = btrfs_insert_empty_item(trans, root, path, &key,
9778 datasize);
9779 if (err) {
9780 btrfs_free_path(path);
9781 goto out_unlock;
9782 }
9783 leaf = path->nodes[0];
9784 ei = btrfs_item_ptr(leaf, path->slots[0],
9785 struct btrfs_file_extent_item);
9786 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
9787 btrfs_set_file_extent_type(leaf, ei,
9788 BTRFS_FILE_EXTENT_INLINE);
9789 btrfs_set_file_extent_encryption(leaf, ei, 0);
9790 btrfs_set_file_extent_compression(leaf, ei, 0);
9791 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
9792 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
9793
9794 ptr = btrfs_file_extent_inline_start(ei);
9795 write_extent_buffer(leaf, symname, ptr, name_len);
9796 btrfs_mark_buffer_dirty(leaf);
9797 btrfs_free_path(path);
9798
9799 inode->i_op = &btrfs_symlink_inode_operations;
9800 inode_nohighmem(inode);
9801 inode_set_bytes(inode, name_len);
9802 btrfs_i_size_write(BTRFS_I(inode), name_len);
9803 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
9804
9805
9806
9807
9808
9809 if (!err)
9810 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9811 BTRFS_I(inode), 0, index);
9812 if (err)
9813 goto out_unlock;
9814
9815 d_instantiate_new(dentry, inode);
9816
9817out_unlock:
9818 btrfs_end_transaction(trans);
9819 if (err && inode) {
9820 inode_dec_link_count(inode);
9821 discard_new_inode(inode);
9822 }
9823 btrfs_btree_balance_dirty(fs_info);
9824 return err;
9825}
9826
9827static struct btrfs_trans_handle *insert_prealloc_file_extent(
9828 struct btrfs_trans_handle *trans_in,
9829 struct btrfs_inode *inode,
9830 struct btrfs_key *ins,
9831 u64 file_offset)
9832{
9833 struct btrfs_file_extent_item stack_fi;
9834 struct btrfs_replace_extent_info extent_info;
9835 struct btrfs_trans_handle *trans = trans_in;
9836 struct btrfs_path *path;
9837 u64 start = ins->objectid;
9838 u64 len = ins->offset;
9839 int qgroup_released;
9840 int ret;
9841
9842 memset(&stack_fi, 0, sizeof(stack_fi));
9843
9844 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
9845 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
9846 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
9847 btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
9848 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
9849 btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
9850
9851
9852 qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
9853 if (qgroup_released < 0)
9854 return ERR_PTR(qgroup_released);
9855
9856 if (trans) {
9857 ret = insert_reserved_file_extent(trans, inode,
9858 file_offset, &stack_fi,
9859 true, qgroup_released);
9860 if (ret)
9861 goto free_qgroup;
9862 return trans;
9863 }
9864
9865 extent_info.disk_offset = start;
9866 extent_info.disk_len = len;
9867 extent_info.data_offset = 0;
9868 extent_info.data_len = len;
9869 extent_info.file_offset = file_offset;
9870 extent_info.extent_buf = (char *)&stack_fi;
9871 extent_info.is_new_extent = true;
9872 extent_info.qgroup_reserved = qgroup_released;
9873 extent_info.insertions = 0;
9874
9875 path = btrfs_alloc_path();
9876 if (!path) {
9877 ret = -ENOMEM;
9878 goto free_qgroup;
9879 }
9880
9881 ret = btrfs_replace_file_extents(inode, path, file_offset,
9882 file_offset + len - 1, &extent_info,
9883 &trans);
9884 btrfs_free_path(path);
9885 if (ret)
9886 goto free_qgroup;
9887 return trans;
9888
9889free_qgroup:
9890
9891
9892
9893
9894
9895
9896
9897 btrfs_qgroup_free_refroot(inode->root->fs_info,
9898 inode->root->root_key.objectid, qgroup_released,
9899 BTRFS_QGROUP_RSV_DATA);
9900 return ERR_PTR(ret);
9901}
9902
9903static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9904 u64 start, u64 num_bytes, u64 min_size,
9905 loff_t actual_len, u64 *alloc_hint,
9906 struct btrfs_trans_handle *trans)
9907{
9908 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9909 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
9910 struct extent_map *em;
9911 struct btrfs_root *root = BTRFS_I(inode)->root;
9912 struct btrfs_key ins;
9913 u64 cur_offset = start;
9914 u64 clear_offset = start;
9915 u64 i_size;
9916 u64 cur_bytes;
9917 u64 last_alloc = (u64)-1;
9918 int ret = 0;
9919 bool own_trans = true;
9920 u64 end = start + num_bytes - 1;
9921
9922 if (trans)
9923 own_trans = false;
9924 while (num_bytes > 0) {
9925 cur_bytes = min_t(u64, num_bytes, SZ_256M);
9926 cur_bytes = max(cur_bytes, min_size);
9927
9928
9929
9930
9931
9932
9933 cur_bytes = min(cur_bytes, last_alloc);
9934 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
9935 min_size, 0, *alloc_hint, &ins, 1, 0);
9936 if (ret)
9937 break;
9938
9939
9940
9941
9942
9943
9944
9945
9946 clear_offset += ins.offset;
9947
9948 last_alloc = ins.offset;
9949 trans = insert_prealloc_file_extent(trans, BTRFS_I(inode),
9950 &ins, cur_offset);
9951
9952
9953
9954
9955
9956
9957 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
9958 if (IS_ERR(trans)) {
9959 ret = PTR_ERR(trans);
9960 btrfs_free_reserved_extent(fs_info, ins.objectid,
9961 ins.offset, 0);
9962 break;
9963 }
9964
9965 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
9966 cur_offset + ins.offset -1, 0);
9967
9968 em = alloc_extent_map();
9969 if (!em) {
9970 btrfs_set_inode_full_sync(BTRFS_I(inode));
9971 goto next;
9972 }
9973
9974 em->start = cur_offset;
9975 em->orig_start = cur_offset;
9976 em->len = ins.offset;
9977 em->block_start = ins.objectid;
9978 em->block_len = ins.offset;
9979 em->orig_block_len = ins.offset;
9980 em->ram_bytes = ins.offset;
9981 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
9982 em->generation = trans->transid;
9983
9984 while (1) {
9985 write_lock(&em_tree->lock);
9986 ret = add_extent_mapping(em_tree, em, 1);
9987 write_unlock(&em_tree->lock);
9988 if (ret != -EEXIST)
9989 break;
9990 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
9991 cur_offset + ins.offset - 1,
9992 0);
9993 }
9994 free_extent_map(em);
9995next:
9996 num_bytes -= ins.offset;
9997 cur_offset += ins.offset;
9998 *alloc_hint = ins.objectid + ins.offset;
9999
10000 inode_inc_iversion(inode);
10001 inode->i_ctime = current_time(inode);
10002 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10003 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10004 (actual_len > inode->i_size) &&
10005 (cur_offset > inode->i_size)) {
10006 if (cur_offset > actual_len)
10007 i_size = actual_len;
10008 else
10009 i_size = cur_offset;
10010 i_size_write(inode, i_size);
10011 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
10012 }
10013
10014 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
10015
10016 if (ret) {
10017 btrfs_abort_transaction(trans, ret);
10018 if (own_trans)
10019 btrfs_end_transaction(trans);
10020 break;
10021 }
10022
10023 if (own_trans) {
10024 btrfs_end_transaction(trans);
10025 trans = NULL;
10026 }
10027 }
10028 if (clear_offset < end)
10029 btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
10030 end - clear_offset + 1);
10031 return ret;
10032}
10033
10034int btrfs_prealloc_file_range(struct inode *inode, int mode,
10035 u64 start, u64 num_bytes, u64 min_size,
10036 loff_t actual_len, u64 *alloc_hint)
10037{
10038 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10039 min_size, actual_len, alloc_hint,
10040 NULL);
10041}
10042
10043int btrfs_prealloc_file_range_trans(struct inode *inode,
10044 struct btrfs_trans_handle *trans, int mode,
10045 u64 start, u64 num_bytes, u64 min_size,
10046 loff_t actual_len, u64 *alloc_hint)
10047{
10048 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10049 min_size, actual_len, alloc_hint, trans);
10050}
10051
10052static int btrfs_permission(struct user_namespace *mnt_userns,
10053 struct inode *inode, int mask)
10054{
10055 struct btrfs_root *root = BTRFS_I(inode)->root;
10056 umode_t mode = inode->i_mode;
10057
10058 if (mask & MAY_WRITE &&
10059 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10060 if (btrfs_root_readonly(root))
10061 return -EROFS;
10062 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10063 return -EACCES;
10064 }
10065 return generic_permission(mnt_userns, inode, mask);
10066}
10067
10068static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
10069 struct dentry *dentry, umode_t mode)
10070{
10071 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10072 struct btrfs_trans_handle *trans;
10073 struct btrfs_root *root = BTRFS_I(dir)->root;
10074 struct inode *inode = NULL;
10075 u64 objectid;
10076 u64 index;
10077 int ret = 0;
10078
10079
10080
10081
10082 trans = btrfs_start_transaction(root, 5);
10083 if (IS_ERR(trans))
10084 return PTR_ERR(trans);
10085
10086 ret = btrfs_get_free_objectid(root, &objectid);
10087 if (ret)
10088 goto out;
10089
10090 inode = btrfs_new_inode(trans, root, mnt_userns, dir, NULL, 0,
10091 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
10092 if (IS_ERR(inode)) {
10093 ret = PTR_ERR(inode);
10094 inode = NULL;
10095 goto out;
10096 }
10097
10098 inode->i_fop = &btrfs_file_operations;
10099 inode->i_op = &btrfs_file_inode_operations;
10100
10101 inode->i_mapping->a_ops = &btrfs_aops;
10102
10103 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
10104 if (ret)
10105 goto out;
10106
10107 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
10108 if (ret)
10109 goto out;
10110 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10111 if (ret)
10112 goto out;
10113
10114
10115
10116
10117
10118
10119
10120
10121 set_nlink(inode, 1);
10122 d_tmpfile(dentry, inode);
10123 unlock_new_inode(inode);
10124 mark_inode_dirty(inode);
10125out:
10126 btrfs_end_transaction(trans);
10127 if (ret && inode)
10128 discard_new_inode(inode);
10129 btrfs_btree_balance_dirty(fs_info);
10130 return ret;
10131}
10132
10133void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
10134{
10135 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10136 unsigned long index = start >> PAGE_SHIFT;
10137 unsigned long end_index = end >> PAGE_SHIFT;
10138 struct page *page;
10139 u32 len;
10140
10141 ASSERT(end + 1 - start <= U32_MAX);
10142 len = end + 1 - start;
10143 while (index <= end_index) {
10144 page = find_get_page(inode->vfs_inode.i_mapping, index);
10145 ASSERT(page);
10146
10147 btrfs_page_set_writeback(fs_info, page, start, len);
10148 put_page(page);
10149 index++;
10150 }
10151}
10152
10153static int btrfs_encoded_io_compression_from_extent(
10154 struct btrfs_fs_info *fs_info,
10155 int compress_type)
10156{
10157 switch (compress_type) {
10158 case BTRFS_COMPRESS_NONE:
10159 return BTRFS_ENCODED_IO_COMPRESSION_NONE;
10160 case BTRFS_COMPRESS_ZLIB:
10161 return BTRFS_ENCODED_IO_COMPRESSION_ZLIB;
10162 case BTRFS_COMPRESS_LZO:
10163
10164
10165
10166
10167 if (fs_info->sectorsize < SZ_4K || fs_info->sectorsize > SZ_64K)
10168 return -EINVAL;
10169 return BTRFS_ENCODED_IO_COMPRESSION_LZO_4K +
10170 (fs_info->sectorsize_bits - 12);
10171 case BTRFS_COMPRESS_ZSTD:
10172 return BTRFS_ENCODED_IO_COMPRESSION_ZSTD;
10173 default:
10174 return -EUCLEAN;
10175 }
10176}
10177
10178static ssize_t btrfs_encoded_read_inline(
10179 struct kiocb *iocb,
10180 struct iov_iter *iter, u64 start,
10181 u64 lockend,
10182 struct extent_state **cached_state,
10183 u64 extent_start, size_t count,
10184 struct btrfs_ioctl_encoded_io_args *encoded,
10185 bool *unlocked)
10186{
10187 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10188 struct btrfs_root *root = inode->root;
10189 struct btrfs_fs_info *fs_info = root->fs_info;
10190 struct extent_io_tree *io_tree = &inode->io_tree;
10191 struct btrfs_path *path;
10192 struct extent_buffer *leaf;
10193 struct btrfs_file_extent_item *item;
10194 u64 ram_bytes;
10195 unsigned long ptr;
10196 void *tmp;
10197 ssize_t ret;
10198
10199 path = btrfs_alloc_path();
10200 if (!path) {
10201 ret = -ENOMEM;
10202 goto out;
10203 }
10204 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
10205 extent_start, 0);
10206 if (ret) {
10207 if (ret > 0) {
10208
10209 ret = -EIO;
10210 }
10211 goto out;
10212 }
10213 leaf = path->nodes[0];
10214 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
10215
10216 ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
10217 ptr = btrfs_file_extent_inline_start(item);
10218
10219 encoded->len = min_t(u64, extent_start + ram_bytes,
10220 inode->vfs_inode.i_size) - iocb->ki_pos;
10221 ret = btrfs_encoded_io_compression_from_extent(fs_info,
10222 btrfs_file_extent_compression(leaf, item));
10223 if (ret < 0)
10224 goto out;
10225 encoded->compression = ret;
10226 if (encoded->compression) {
10227 size_t inline_size;
10228
10229 inline_size = btrfs_file_extent_inline_item_len(leaf,
10230 path->slots[0]);
10231 if (inline_size > count) {
10232 ret = -ENOBUFS;
10233 goto out;
10234 }
10235 count = inline_size;
10236 encoded->unencoded_len = ram_bytes;
10237 encoded->unencoded_offset = iocb->ki_pos - extent_start;
10238 } else {
10239 count = min_t(u64, count, encoded->len);
10240 encoded->len = count;
10241 encoded->unencoded_len = count;
10242 ptr += iocb->ki_pos - extent_start;
10243 }
10244
10245 tmp = kmalloc(count, GFP_NOFS);
10246 if (!tmp) {
10247 ret = -ENOMEM;
10248 goto out;
10249 }
10250 read_extent_buffer(leaf, tmp, ptr, count);
10251 btrfs_release_path(path);
10252 unlock_extent_cached(io_tree, start, lockend, cached_state);
10253 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10254 *unlocked = true;
10255
10256 ret = copy_to_iter(tmp, count, iter);
10257 if (ret != count)
10258 ret = -EFAULT;
10259 kfree(tmp);
10260out:
10261 btrfs_free_path(path);
10262 return ret;
10263}
10264
10265struct btrfs_encoded_read_private {
10266 struct btrfs_inode *inode;
10267 u64 file_offset;
10268 wait_queue_head_t wait;
10269 atomic_t pending;
10270 blk_status_t status;
10271 bool skip_csum;
10272};
10273
10274static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
10275 struct bio *bio, int mirror_num)
10276{
10277 struct btrfs_encoded_read_private *priv = bio->bi_private;
10278 struct btrfs_bio *bbio = btrfs_bio(bio);
10279 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10280 blk_status_t ret;
10281
10282 if (!priv->skip_csum) {
10283 ret = btrfs_lookup_bio_sums(&inode->vfs_inode, bio, NULL);
10284 if (ret)
10285 return ret;
10286 }
10287
10288 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
10289 if (ret) {
10290 btrfs_bio_free_csum(bbio);
10291 return ret;
10292 }
10293
10294 atomic_inc(&priv->pending);
10295 ret = btrfs_map_bio(fs_info, bio, mirror_num);
10296 if (ret) {
10297 atomic_dec(&priv->pending);
10298 btrfs_bio_free_csum(bbio);
10299 }
10300 return ret;
10301}
10302
10303static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
10304{
10305 const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
10306 struct btrfs_encoded_read_private *priv = bbio->bio.bi_private;
10307 struct btrfs_inode *inode = priv->inode;
10308 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10309 u32 sectorsize = fs_info->sectorsize;
10310 struct bio_vec *bvec;
10311 struct bvec_iter_all iter_all;
10312 u64 start = priv->file_offset;
10313 u32 bio_offset = 0;
10314
10315 if (priv->skip_csum || !uptodate)
10316 return bbio->bio.bi_status;
10317
10318 bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
10319 unsigned int i, nr_sectors, pgoff;
10320
10321 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
10322 pgoff = bvec->bv_offset;
10323 for (i = 0; i < nr_sectors; i++) {
10324 ASSERT(pgoff < PAGE_SIZE);
10325 if (check_data_csum(&inode->vfs_inode, bbio, bio_offset,
10326 bvec->bv_page, pgoff, start))
10327 return BLK_STS_IOERR;
10328 start += sectorsize;
10329 bio_offset += sectorsize;
10330 pgoff += sectorsize;
10331 }
10332 }
10333 return BLK_STS_OK;
10334}
10335
10336static void btrfs_encoded_read_endio(struct bio *bio)
10337{
10338 struct btrfs_encoded_read_private *priv = bio->bi_private;
10339 struct btrfs_bio *bbio = btrfs_bio(bio);
10340 blk_status_t status;
10341
10342 status = btrfs_encoded_read_verify_csum(bbio);
10343 if (status) {
10344
10345
10346
10347
10348
10349
10350
10351
10352 WRITE_ONCE(priv->status, status);
10353 }
10354 if (!atomic_dec_return(&priv->pending))
10355 wake_up(&priv->wait);
10356 btrfs_bio_free_csum(bbio);
10357 bio_put(bio);
10358}
10359
10360static int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
10361 u64 file_offset,
10362 u64 disk_bytenr,
10363 u64 disk_io_size,
10364 struct page **pages)
10365{
10366 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10367 struct btrfs_encoded_read_private priv = {
10368 .inode = inode,
10369 .file_offset = file_offset,
10370 .pending = ATOMIC_INIT(1),
10371 .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
10372 };
10373 unsigned long i = 0;
10374 u64 cur = 0;
10375 int ret;
10376
10377 init_waitqueue_head(&priv.wait);
10378
10379
10380
10381
10382 while (cur < disk_io_size) {
10383 struct extent_map *em;
10384 struct btrfs_io_geometry geom;
10385 struct bio *bio = NULL;
10386 u64 remaining;
10387
10388 em = btrfs_get_chunk_map(fs_info, disk_bytenr + cur,
10389 disk_io_size - cur);
10390 if (IS_ERR(em)) {
10391 ret = PTR_ERR(em);
10392 } else {
10393 ret = btrfs_get_io_geometry(fs_info, em, BTRFS_MAP_READ,
10394 disk_bytenr + cur, &geom);
10395 free_extent_map(em);
10396 }
10397 if (ret) {
10398 WRITE_ONCE(priv.status, errno_to_blk_status(ret));
10399 break;
10400 }
10401 remaining = min(geom.len, disk_io_size - cur);
10402 while (bio || remaining) {
10403 size_t bytes = min_t(u64, remaining, PAGE_SIZE);
10404
10405 if (!bio) {
10406 bio = btrfs_bio_alloc(BIO_MAX_VECS);
10407 bio->bi_iter.bi_sector =
10408 (disk_bytenr + cur) >> SECTOR_SHIFT;
10409 bio->bi_end_io = btrfs_encoded_read_endio;
10410 bio->bi_private = &priv;
10411 bio->bi_opf = REQ_OP_READ;
10412 }
10413
10414 if (!bytes ||
10415 bio_add_page(bio, pages[i], bytes, 0) < bytes) {
10416 blk_status_t status;
10417
10418 status = submit_encoded_read_bio(inode, bio, 0);
10419 if (status) {
10420 WRITE_ONCE(priv.status, status);
10421 bio_put(bio);
10422 goto out;
10423 }
10424 bio = NULL;
10425 continue;
10426 }
10427
10428 i++;
10429 cur += bytes;
10430 remaining -= bytes;
10431 }
10432 }
10433
10434out:
10435 if (atomic_dec_return(&priv.pending))
10436 io_wait_event(priv.wait, !atomic_read(&priv.pending));
10437
10438 return blk_status_to_errno(READ_ONCE(priv.status));
10439}
10440
10441static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
10442 struct iov_iter *iter,
10443 u64 start, u64 lockend,
10444 struct extent_state **cached_state,
10445 u64 disk_bytenr, u64 disk_io_size,
10446 size_t count, bool compressed,
10447 bool *unlocked)
10448{
10449 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10450 struct extent_io_tree *io_tree = &inode->io_tree;
10451 struct page **pages;
10452 unsigned long nr_pages, i;
10453 u64 cur;
10454 size_t page_offset;
10455 ssize_t ret;
10456
10457 nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
10458 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
10459 if (!pages)
10460 return -ENOMEM;
10461 for (i = 0; i < nr_pages; i++) {
10462 pages[i] = alloc_page(GFP_NOFS);
10463 if (!pages[i]) {
10464 ret = -ENOMEM;
10465 goto out;
10466 }
10467 }
10468
10469 ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
10470 disk_io_size, pages);
10471 if (ret)
10472 goto out;
10473
10474 unlock_extent_cached(io_tree, start, lockend, cached_state);
10475 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10476 *unlocked = true;
10477
10478 if (compressed) {
10479 i = 0;
10480 page_offset = 0;
10481 } else {
10482 i = (iocb->ki_pos - start) >> PAGE_SHIFT;
10483 page_offset = (iocb->ki_pos - start) & (PAGE_SIZE - 1);
10484 }
10485 cur = 0;
10486 while (cur < count) {
10487 size_t bytes = min_t(size_t, count - cur,
10488 PAGE_SIZE - page_offset);
10489
10490 if (copy_page_to_iter(pages[i], page_offset, bytes,
10491 iter) != bytes) {
10492 ret = -EFAULT;
10493 goto out;
10494 }
10495 i++;
10496 cur += bytes;
10497 page_offset = 0;
10498 }
10499 ret = count;
10500out:
10501 for (i = 0; i < nr_pages; i++) {
10502 if (pages[i])
10503 __free_page(pages[i]);
10504 }
10505 kfree(pages);
10506 return ret;
10507}
10508
10509ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
10510 struct btrfs_ioctl_encoded_io_args *encoded)
10511{
10512 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10513 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10514 struct extent_io_tree *io_tree = &inode->io_tree;
10515 ssize_t ret;
10516 size_t count = iov_iter_count(iter);
10517 u64 start, lockend, disk_bytenr, disk_io_size;
10518 struct extent_state *cached_state = NULL;
10519 struct extent_map *em;
10520 bool unlocked = false;
10521
10522 file_accessed(iocb->ki_filp);
10523
10524 btrfs_inode_lock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10525
10526 if (iocb->ki_pos >= inode->vfs_inode.i_size) {
10527 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10528 return 0;
10529 }
10530 start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize);
10531
10532
10533
10534
10535 lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
10536
10537 for (;;) {
10538 struct btrfs_ordered_extent *ordered;
10539
10540 ret = btrfs_wait_ordered_range(&inode->vfs_inode, start,
10541 lockend - start + 1);
10542 if (ret)
10543 goto out_unlock_inode;
10544 lock_extent_bits(io_tree, start, lockend, &cached_state);
10545 ordered = btrfs_lookup_ordered_range(inode, start,
10546 lockend - start + 1);
10547 if (!ordered)
10548 break;
10549 btrfs_put_ordered_extent(ordered);
10550 unlock_extent_cached(io_tree, start, lockend, &cached_state);
10551 cond_resched();
10552 }
10553
10554 em = btrfs_get_extent(inode, NULL, 0, start, lockend - start + 1);
10555 if (IS_ERR(em)) {
10556 ret = PTR_ERR(em);
10557 goto out_unlock_extent;
10558 }
10559
10560 if (em->block_start == EXTENT_MAP_INLINE) {
10561 u64 extent_start = em->start;
10562
10563
10564
10565
10566
10567 free_extent_map(em);
10568 em = NULL;
10569 ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
10570 &cached_state, extent_start,
10571 count, encoded, &unlocked);
10572 goto out;
10573 }
10574
10575
10576
10577
10578
10579 encoded->len = min_t(u64, extent_map_end(em),
10580 inode->vfs_inode.i_size) - iocb->ki_pos;
10581 if (em->block_start == EXTENT_MAP_HOLE ||
10582 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
10583 disk_bytenr = EXTENT_MAP_HOLE;
10584 count = min_t(u64, count, encoded->len);
10585 encoded->len = count;
10586 encoded->unencoded_len = count;
10587 } else if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10588 disk_bytenr = em->block_start;
10589
10590
10591
10592
10593 if (em->block_len > count) {
10594 ret = -ENOBUFS;
10595 goto out_em;
10596 }
10597 disk_io_size = count = em->block_len;
10598 encoded->unencoded_len = em->ram_bytes;
10599 encoded->unencoded_offset = iocb->ki_pos - em->orig_start;
10600 ret = btrfs_encoded_io_compression_from_extent(fs_info,
10601 em->compress_type);
10602 if (ret < 0)
10603 goto out_em;
10604 encoded->compression = ret;
10605 } else {
10606 disk_bytenr = em->block_start + (start - em->start);
10607 if (encoded->len > count)
10608 encoded->len = count;
10609
10610
10611
10612
10613 disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
10614 count = start + disk_io_size - iocb->ki_pos;
10615 encoded->len = count;
10616 encoded->unencoded_len = count;
10617 disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
10618 }
10619 free_extent_map(em);
10620 em = NULL;
10621
10622 if (disk_bytenr == EXTENT_MAP_HOLE) {
10623 unlock_extent_cached(io_tree, start, lockend, &cached_state);
10624 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10625 unlocked = true;
10626 ret = iov_iter_zero(count, iter);
10627 if (ret != count)
10628 ret = -EFAULT;
10629 } else {
10630 ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
10631 &cached_state, disk_bytenr,
10632 disk_io_size, count,
10633 encoded->compression,
10634 &unlocked);
10635 }
10636
10637out:
10638 if (ret >= 0)
10639 iocb->ki_pos += encoded->len;
10640out_em:
10641 free_extent_map(em);
10642out_unlock_extent:
10643 if (!unlocked)
10644 unlock_extent_cached(io_tree, start, lockend, &cached_state);
10645out_unlock_inode:
10646 if (!unlocked)
10647 btrfs_inode_unlock(&inode->vfs_inode, BTRFS_ILOCK_SHARED);
10648 return ret;
10649}
10650
10651ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
10652 const struct btrfs_ioctl_encoded_io_args *encoded)
10653{
10654 struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
10655 struct btrfs_root *root = inode->root;
10656 struct btrfs_fs_info *fs_info = root->fs_info;
10657 struct extent_io_tree *io_tree = &inode->io_tree;
10658 struct extent_changeset *data_reserved = NULL;
10659 struct extent_state *cached_state = NULL;
10660 int compression;
10661 size_t orig_count;
10662 u64 start, end;
10663 u64 num_bytes, ram_bytes, disk_num_bytes;
10664 unsigned long nr_pages, i;
10665 struct page **pages;
10666 struct btrfs_key ins;
10667 bool extent_reserved = false;
10668 struct extent_map *em;
10669 ssize_t ret;
10670
10671 switch (encoded->compression) {
10672 case BTRFS_ENCODED_IO_COMPRESSION_ZLIB:
10673 compression = BTRFS_COMPRESS_ZLIB;
10674 break;
10675 case BTRFS_ENCODED_IO_COMPRESSION_ZSTD:
10676 compression = BTRFS_COMPRESS_ZSTD;
10677 break;
10678 case BTRFS_ENCODED_IO_COMPRESSION_LZO_4K:
10679 case BTRFS_ENCODED_IO_COMPRESSION_LZO_8K:
10680 case BTRFS_ENCODED_IO_COMPRESSION_LZO_16K:
10681 case BTRFS_ENCODED_IO_COMPRESSION_LZO_32K:
10682 case BTRFS_ENCODED_IO_COMPRESSION_LZO_64K:
10683
10684 if (encoded->compression -
10685 BTRFS_ENCODED_IO_COMPRESSION_LZO_4K + 12 !=
10686 fs_info->sectorsize_bits)
10687 return -EINVAL;
10688 compression = BTRFS_COMPRESS_LZO;
10689 break;
10690 default:
10691 return -EINVAL;
10692 }
10693 if (encoded->encryption != BTRFS_ENCODED_IO_ENCRYPTION_NONE)
10694 return -EINVAL;
10695
10696 orig_count = iov_iter_count(from);
10697
10698
10699 if (encoded->unencoded_len > BTRFS_MAX_UNCOMPRESSED ||
10700 orig_count > BTRFS_MAX_COMPRESSED || orig_count == 0)
10701 return -EINVAL;
10702
10703
10704
10705
10706
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716 if (orig_count >= encoded->unencoded_len)
10717 return -EINVAL;
10718
10719
10720 start = iocb->ki_pos;
10721 if (!IS_ALIGNED(start, fs_info->sectorsize))
10722 return -EINVAL;
10723
10724
10725
10726
10727
10728
10729 if (start + encoded->len < inode->vfs_inode.i_size &&
10730 !IS_ALIGNED(start + encoded->len, fs_info->sectorsize))
10731 return -EINVAL;
10732
10733
10734 if (!IS_ALIGNED(encoded->unencoded_offset, fs_info->sectorsize))
10735 return -EINVAL;
10736
10737 num_bytes = ALIGN(encoded->len, fs_info->sectorsize);
10738 ram_bytes = ALIGN(encoded->unencoded_len, fs_info->sectorsize);
10739 end = start + num_bytes - 1;
10740
10741
10742
10743
10744
10745
10746 disk_num_bytes = ALIGN(orig_count, fs_info->sectorsize);
10747 nr_pages = DIV_ROUND_UP(disk_num_bytes, PAGE_SIZE);
10748 pages = kvcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
10749 if (!pages)
10750 return -ENOMEM;
10751 for (i = 0; i < nr_pages; i++) {
10752 size_t bytes = min_t(size_t, PAGE_SIZE, iov_iter_count(from));
10753 char *kaddr;
10754
10755 pages[i] = alloc_page(GFP_KERNEL_ACCOUNT);
10756 if (!pages[i]) {
10757 ret = -ENOMEM;
10758 goto out_pages;
10759 }
10760 kaddr = kmap(pages[i]);
10761 if (copy_from_iter(kaddr, bytes, from) != bytes) {
10762 kunmap(pages[i]);
10763 ret = -EFAULT;
10764 goto out_pages;
10765 }
10766 if (bytes < PAGE_SIZE)
10767 memset(kaddr + bytes, 0, PAGE_SIZE - bytes);
10768 kunmap(pages[i]);
10769 }
10770
10771 for (;;) {
10772 struct btrfs_ordered_extent *ordered;
10773
10774 ret = btrfs_wait_ordered_range(&inode->vfs_inode, start, num_bytes);
10775 if (ret)
10776 goto out_pages;
10777 ret = invalidate_inode_pages2_range(inode->vfs_inode.i_mapping,
10778 start >> PAGE_SHIFT,
10779 end >> PAGE_SHIFT);
10780 if (ret)
10781 goto out_pages;
10782 lock_extent_bits(io_tree, start, end, &cached_state);
10783 ordered = btrfs_lookup_ordered_range(inode, start, num_bytes);
10784 if (!ordered &&
10785 !filemap_range_has_page(inode->vfs_inode.i_mapping, start, end))
10786 break;
10787 if (ordered)
10788 btrfs_put_ordered_extent(ordered);
10789 unlock_extent_cached(io_tree, start, end, &cached_state);
10790 cond_resched();
10791 }
10792
10793
10794
10795
10796
10797 ret = btrfs_alloc_data_chunk_ondemand(inode, disk_num_bytes);
10798 if (ret)
10799 goto out_unlock;
10800 ret = btrfs_qgroup_reserve_data(inode, &data_reserved, start, num_bytes);
10801 if (ret)
10802 goto out_free_data_space;
10803 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes, disk_num_bytes);
10804 if (ret)
10805 goto out_qgroup_free_data;
10806
10807
10808 if (start == 0 && encoded->unencoded_len == encoded->len &&
10809 encoded->unencoded_offset == 0) {
10810 ret = cow_file_range_inline(inode, encoded->len, orig_count,
10811 compression, pages, true);
10812 if (ret <= 0) {
10813 if (ret == 0)
10814 ret = orig_count;
10815 goto out_delalloc_release;
10816 }
10817 }
10818
10819 ret = btrfs_reserve_extent(root, disk_num_bytes, disk_num_bytes,
10820 disk_num_bytes, 0, 0, &ins, 1, 1);
10821 if (ret)
10822 goto out_delalloc_release;
10823 extent_reserved = true;
10824
10825 em = create_io_em(inode, start, num_bytes,
10826 start - encoded->unencoded_offset, ins.objectid,
10827 ins.offset, ins.offset, ram_bytes, compression,
10828 BTRFS_ORDERED_COMPRESSED);
10829 if (IS_ERR(em)) {
10830 ret = PTR_ERR(em);
10831 goto out_free_reserved;
10832 }
10833 free_extent_map(em);
10834
10835 ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
10836 ins.objectid, ins.offset,
10837 encoded->unencoded_offset,
10838 (1 << BTRFS_ORDERED_ENCODED) |
10839 (1 << BTRFS_ORDERED_COMPRESSED),
10840 compression);
10841 if (ret) {
10842 btrfs_drop_extent_cache(inode, start, end, 0);
10843 goto out_free_reserved;
10844 }
10845 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10846
10847 if (start + encoded->len > inode->vfs_inode.i_size)
10848 i_size_write(&inode->vfs_inode, start + encoded->len);
10849
10850 unlock_extent_cached(io_tree, start, end, &cached_state);
10851
10852 btrfs_delalloc_release_extents(inode, num_bytes);
10853
10854 if (btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
10855 ins.offset, pages, nr_pages, 0, NULL,
10856 false)) {
10857 btrfs_writepage_endio_finish_ordered(inode, pages[0], start, end, 0);
10858 ret = -EIO;
10859 goto out_pages;
10860 }
10861 ret = orig_count;
10862 goto out;
10863
10864out_free_reserved:
10865 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10866 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
10867out_delalloc_release:
10868 btrfs_delalloc_release_extents(inode, num_bytes);
10869 btrfs_delalloc_release_metadata(inode, disk_num_bytes, ret < 0);
10870out_qgroup_free_data:
10871 if (ret < 0)
10872 btrfs_qgroup_free_data(inode, data_reserved, start, num_bytes);
10873out_free_data_space:
10874
10875
10876
10877
10878 if (!extent_reserved)
10879 btrfs_free_reserved_data_space_noquota(fs_info, disk_num_bytes);
10880out_unlock:
10881 unlock_extent_cached(io_tree, start, end, &cached_state);
10882out_pages:
10883 for (i = 0; i < nr_pages; i++) {
10884 if (pages[i])
10885 __free_page(pages[i]);
10886 }
10887 kvfree(pages);
10888out:
10889 if (ret >= 0)
10890 iocb->ki_pos += encoded->len;
10891 return ret;
10892}
10893
10894#ifdef CONFIG_SWAP
10895
10896
10897
10898
10899
10900static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
10901 bool is_block_group)
10902{
10903 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10904 struct btrfs_swapfile_pin *sp, *entry;
10905 struct rb_node **p;
10906 struct rb_node *parent = NULL;
10907
10908 sp = kmalloc(sizeof(*sp), GFP_NOFS);
10909 if (!sp)
10910 return -ENOMEM;
10911 sp->ptr = ptr;
10912 sp->inode = inode;
10913 sp->is_block_group = is_block_group;
10914 sp->bg_extent_count = 1;
10915
10916 spin_lock(&fs_info->swapfile_pins_lock);
10917 p = &fs_info->swapfile_pins.rb_node;
10918 while (*p) {
10919 parent = *p;
10920 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
10921 if (sp->ptr < entry->ptr ||
10922 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
10923 p = &(*p)->rb_left;
10924 } else if (sp->ptr > entry->ptr ||
10925 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
10926 p = &(*p)->rb_right;
10927 } else {
10928 if (is_block_group)
10929 entry->bg_extent_count++;
10930 spin_unlock(&fs_info->swapfile_pins_lock);
10931 kfree(sp);
10932 return 1;
10933 }
10934 }
10935 rb_link_node(&sp->node, parent, p);
10936 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
10937 spin_unlock(&fs_info->swapfile_pins_lock);
10938 return 0;
10939}
10940
10941
10942static void btrfs_free_swapfile_pins(struct inode *inode)
10943{
10944 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10945 struct btrfs_swapfile_pin *sp;
10946 struct rb_node *node, *next;
10947
10948 spin_lock(&fs_info->swapfile_pins_lock);
10949 node = rb_first(&fs_info->swapfile_pins);
10950 while (node) {
10951 next = rb_next(node);
10952 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
10953 if (sp->inode == inode) {
10954 rb_erase(&sp->node, &fs_info->swapfile_pins);
10955 if (sp->is_block_group) {
10956 btrfs_dec_block_group_swap_extents(sp->ptr,
10957 sp->bg_extent_count);
10958 btrfs_put_block_group(sp->ptr);
10959 }
10960 kfree(sp);
10961 }
10962 node = next;
10963 }
10964 spin_unlock(&fs_info->swapfile_pins_lock);
10965}
10966
10967struct btrfs_swap_info {
10968 u64 start;
10969 u64 block_start;
10970 u64 block_len;
10971 u64 lowest_ppage;
10972 u64 highest_ppage;
10973 unsigned long nr_pages;
10974 int nr_extents;
10975};
10976
10977static int btrfs_add_swap_extent(struct swap_info_struct *sis,
10978 struct btrfs_swap_info *bsi)
10979{
10980 unsigned long nr_pages;
10981 unsigned long max_pages;
10982 u64 first_ppage, first_ppage_reported, next_ppage;
10983 int ret;
10984
10985
10986
10987
10988
10989
10990 if (bsi->nr_pages >= sis->max)
10991 return 0;
10992
10993 max_pages = sis->max - bsi->nr_pages;
10994 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
10995 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
10996 PAGE_SIZE) >> PAGE_SHIFT;
10997
10998 if (first_ppage >= next_ppage)
10999 return 0;
11000 nr_pages = next_ppage - first_ppage;
11001 nr_pages = min(nr_pages, max_pages);
11002
11003 first_ppage_reported = first_ppage;
11004 if (bsi->start == 0)
11005 first_ppage_reported++;
11006 if (bsi->lowest_ppage > first_ppage_reported)
11007 bsi->lowest_ppage = first_ppage_reported;
11008 if (bsi->highest_ppage < (next_ppage - 1))
11009 bsi->highest_ppage = next_ppage - 1;
11010
11011 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
11012 if (ret < 0)
11013 return ret;
11014 bsi->nr_extents += ret;
11015 bsi->nr_pages += nr_pages;
11016 return 0;
11017}
11018
11019static void btrfs_swap_deactivate(struct file *file)
11020{
11021 struct inode *inode = file_inode(file);
11022
11023 btrfs_free_swapfile_pins(inode);
11024 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
11025}
11026
11027static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
11028 sector_t *span)
11029{
11030 struct inode *inode = file_inode(file);
11031 struct btrfs_root *root = BTRFS_I(inode)->root;
11032 struct btrfs_fs_info *fs_info = root->fs_info;
11033 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
11034 struct extent_state *cached_state = NULL;
11035 struct extent_map *em = NULL;
11036 struct btrfs_device *device = NULL;
11037 struct btrfs_swap_info bsi = {
11038 .lowest_ppage = (sector_t)-1ULL,
11039 };
11040 int ret = 0;
11041 u64 isize;
11042 u64 start;
11043
11044
11045
11046
11047
11048
11049 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
11050 if (ret)
11051 return ret;
11052
11053
11054
11055
11056 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
11057 btrfs_warn(fs_info, "swapfile must not be compressed");
11058 return -EINVAL;
11059 }
11060 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
11061 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
11062 return -EINVAL;
11063 }
11064 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
11065 btrfs_warn(fs_info, "swapfile must not be checksummed");
11066 return -EINVAL;
11067 }
11068
11069
11070
11071
11072
11073
11074
11075
11076
11077
11078 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
11079 btrfs_warn(fs_info,
11080 "cannot activate swapfile while exclusive operation is running");
11081 return -EBUSY;
11082 }
11083
11084
11085
11086
11087
11088
11089
11090
11091 if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
11092 btrfs_exclop_finish(fs_info);
11093 btrfs_warn(fs_info,
11094 "cannot activate swapfile because snapshot creation is in progress");
11095 return -EINVAL;
11096 }
11097
11098
11099
11100
11101
11102
11103
11104
11105
11106
11107 spin_lock(&root->root_item_lock);
11108 if (btrfs_root_dead(root)) {
11109 spin_unlock(&root->root_item_lock);
11110
11111 btrfs_exclop_finish(fs_info);
11112 btrfs_warn(fs_info,
11113 "cannot activate swapfile because subvolume %llu is being deleted",
11114 root->root_key.objectid);
11115 return -EPERM;
11116 }
11117 atomic_inc(&root->nr_swapfiles);
11118 spin_unlock(&root->root_item_lock);
11119
11120 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
11121
11122 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
11123 start = 0;
11124 while (start < isize) {
11125 u64 logical_block_start, physical_block_start;
11126 struct btrfs_block_group *bg;
11127 u64 len = isize - start;
11128
11129 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
11130 if (IS_ERR(em)) {
11131 ret = PTR_ERR(em);
11132 goto out;
11133 }
11134
11135 if (em->block_start == EXTENT_MAP_HOLE) {
11136 btrfs_warn(fs_info, "swapfile must not have holes");
11137 ret = -EINVAL;
11138 goto out;
11139 }
11140 if (em->block_start == EXTENT_MAP_INLINE) {
11141
11142
11143
11144
11145
11146
11147
11148 btrfs_warn(fs_info, "swapfile must not be inline");
11149 ret = -EINVAL;
11150 goto out;
11151 }
11152 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
11153 btrfs_warn(fs_info, "swapfile must not be compressed");
11154 ret = -EINVAL;
11155 goto out;
11156 }
11157
11158 logical_block_start = em->block_start + (start - em->start);
11159 len = min(len, em->len - (start - em->start));
11160 free_extent_map(em);
11161 em = NULL;
11162
11163 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
11164 if (ret < 0) {
11165 goto out;
11166 } else if (ret) {
11167 ret = 0;
11168 } else {
11169 btrfs_warn(fs_info,
11170 "swapfile must not be copy-on-write");
11171 ret = -EINVAL;
11172 goto out;
11173 }
11174
11175 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
11176 if (IS_ERR(em)) {
11177 ret = PTR_ERR(em);
11178 goto out;
11179 }
11180
11181 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
11182 btrfs_warn(fs_info,
11183 "swapfile must have single data profile");
11184 ret = -EINVAL;
11185 goto out;
11186 }
11187
11188 if (device == NULL) {
11189 device = em->map_lookup->stripes[0].dev;
11190 ret = btrfs_add_swapfile_pin(inode, device, false);
11191 if (ret == 1)
11192 ret = 0;
11193 else if (ret)
11194 goto out;
11195 } else if (device != em->map_lookup->stripes[0].dev) {
11196 btrfs_warn(fs_info, "swapfile must be on one device");
11197 ret = -EINVAL;
11198 goto out;
11199 }
11200
11201 physical_block_start = (em->map_lookup->stripes[0].physical +
11202 (logical_block_start - em->start));
11203 len = min(len, em->len - (logical_block_start - em->start));
11204 free_extent_map(em);
11205 em = NULL;
11206
11207 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
11208 if (!bg) {
11209 btrfs_warn(fs_info,
11210 "could not find block group containing swapfile");
11211 ret = -EINVAL;
11212 goto out;
11213 }
11214
11215 if (!btrfs_inc_block_group_swap_extents(bg)) {
11216 btrfs_warn(fs_info,
11217 "block group for swapfile at %llu is read-only%s",
11218 bg->start,
11219 atomic_read(&fs_info->scrubs_running) ?
11220 " (scrub running)" : "");
11221 btrfs_put_block_group(bg);
11222 ret = -EINVAL;
11223 goto out;
11224 }
11225
11226 ret = btrfs_add_swapfile_pin(inode, bg, true);
11227 if (ret) {
11228 btrfs_put_block_group(bg);
11229 if (ret == 1)
11230 ret = 0;
11231 else
11232 goto out;
11233 }
11234
11235 if (bsi.block_len &&
11236 bsi.block_start + bsi.block_len == physical_block_start) {
11237 bsi.block_len += len;
11238 } else {
11239 if (bsi.block_len) {
11240 ret = btrfs_add_swap_extent(sis, &bsi);
11241 if (ret)
11242 goto out;
11243 }
11244 bsi.start = start;
11245 bsi.block_start = physical_block_start;
11246 bsi.block_len = len;
11247 }
11248
11249 start += len;
11250 }
11251
11252 if (bsi.block_len)
11253 ret = btrfs_add_swap_extent(sis, &bsi);
11254
11255out:
11256 if (!IS_ERR_OR_NULL(em))
11257 free_extent_map(em);
11258
11259 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
11260
11261 if (ret)
11262 btrfs_swap_deactivate(file);
11263
11264 btrfs_drew_write_unlock(&root->snapshot_lock);
11265
11266 btrfs_exclop_finish(fs_info);
11267
11268 if (ret)
11269 return ret;
11270
11271 if (device)
11272 sis->bdev = device->bdev;
11273 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
11274 sis->max = bsi.nr_pages;
11275 sis->pages = bsi.nr_pages - 1;
11276 sis->highest_bit = bsi.nr_pages - 1;
11277 return bsi.nr_extents;
11278}
11279#else
11280static void btrfs_swap_deactivate(struct file *file)
11281{
11282}
11283
11284static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
11285 sector_t *span)
11286{
11287 return -EOPNOTSUPP;
11288}
11289#endif
11290
11291
11292
11293
11294
11295
11296
11297void btrfs_update_inode_bytes(struct btrfs_inode *inode,
11298 const u64 add_bytes,
11299 const u64 del_bytes)
11300{
11301 if (add_bytes == del_bytes)
11302 return;
11303
11304 spin_lock(&inode->lock);
11305 if (del_bytes > 0)
11306 inode_sub_bytes(&inode->vfs_inode, del_bytes);
11307 if (add_bytes > 0)
11308 inode_add_bytes(&inode->vfs_inode, add_bytes);
11309 spin_unlock(&inode->lock);
11310}
11311
11312static const struct inode_operations btrfs_dir_inode_operations = {
11313 .getattr = btrfs_getattr,
11314 .lookup = btrfs_lookup,
11315 .create = btrfs_create,
11316 .unlink = btrfs_unlink,
11317 .link = btrfs_link,
11318 .mkdir = btrfs_mkdir,
11319 .rmdir = btrfs_rmdir,
11320 .rename = btrfs_rename2,
11321 .symlink = btrfs_symlink,
11322 .setattr = btrfs_setattr,
11323 .mknod = btrfs_mknod,
11324 .listxattr = btrfs_listxattr,
11325 .permission = btrfs_permission,
11326 .get_acl = btrfs_get_acl,
11327 .set_acl = btrfs_set_acl,
11328 .update_time = btrfs_update_time,
11329 .tmpfile = btrfs_tmpfile,
11330 .fileattr_get = btrfs_fileattr_get,
11331 .fileattr_set = btrfs_fileattr_set,
11332};
11333
11334static const struct file_operations btrfs_dir_file_operations = {
11335 .llseek = generic_file_llseek,
11336 .read = generic_read_dir,
11337 .iterate_shared = btrfs_real_readdir,
11338 .open = btrfs_opendir,
11339 .unlocked_ioctl = btrfs_ioctl,
11340#ifdef CONFIG_COMPAT
11341 .compat_ioctl = btrfs_compat_ioctl,
11342#endif
11343 .release = btrfs_release_file,
11344 .fsync = btrfs_sync_file,
11345};
11346
11347
11348
11349
11350
11351
11352
11353
11354
11355
11356
11357
11358
11359static const struct address_space_operations btrfs_aops = {
11360 .readpage = btrfs_readpage,
11361 .writepage = btrfs_writepage,
11362 .writepages = btrfs_writepages,
11363 .readahead = btrfs_readahead,
11364 .direct_IO = noop_direct_IO,
11365 .invalidate_folio = btrfs_invalidate_folio,
11366 .releasepage = btrfs_releasepage,
11367#ifdef CONFIG_MIGRATION
11368 .migratepage = btrfs_migratepage,
11369#endif
11370 .dirty_folio = filemap_dirty_folio,
11371 .error_remove_page = generic_error_remove_page,
11372 .swap_activate = btrfs_swap_activate,
11373 .swap_deactivate = btrfs_swap_deactivate,
11374};
11375
11376static const struct inode_operations btrfs_file_inode_operations = {
11377 .getattr = btrfs_getattr,
11378 .setattr = btrfs_setattr,
11379 .listxattr = btrfs_listxattr,
11380 .permission = btrfs_permission,
11381 .fiemap = btrfs_fiemap,
11382 .get_acl = btrfs_get_acl,
11383 .set_acl = btrfs_set_acl,
11384 .update_time = btrfs_update_time,
11385 .fileattr_get = btrfs_fileattr_get,
11386 .fileattr_set = btrfs_fileattr_set,
11387};
11388static const struct inode_operations btrfs_special_inode_operations = {
11389 .getattr = btrfs_getattr,
11390 .setattr = btrfs_setattr,
11391 .permission = btrfs_permission,
11392 .listxattr = btrfs_listxattr,
11393 .get_acl = btrfs_get_acl,
11394 .set_acl = btrfs_set_acl,
11395 .update_time = btrfs_update_time,
11396};
11397static const struct inode_operations btrfs_symlink_inode_operations = {
11398 .get_link = page_get_link,
11399 .getattr = btrfs_getattr,
11400 .setattr = btrfs_setattr,
11401 .permission = btrfs_permission,
11402 .listxattr = btrfs_listxattr,
11403 .update_time = btrfs_update_time,
11404};
11405
11406const struct dentry_operations btrfs_dentry_operations = {
11407 .d_delete = btrfs_dentry_delete,
11408};
11409