1
2
3
4
5
6#include <crypto/hash.h>
7#include <linux/kernel.h>
8#include <linux/bio.h>
9#include <linux/file.h>
10#include <linux/fs.h>
11#include <linux/pagemap.h>
12#include <linux/highmem.h>
13#include <linux/time.h>
14#include <linux/init.h>
15#include <linux/string.h>
16#include <linux/backing-dev.h>
17#include <linux/writeback.h>
18#include <linux/compat.h>
19#include <linux/xattr.h>
20#include <linux/posix_acl.h>
21#include <linux/falloc.h>
22#include <linux/slab.h>
23#include <linux/ratelimit.h>
24#include <linux/btrfs.h>
25#include <linux/blkdev.h>
26#include <linux/posix_acl_xattr.h>
27#include <linux/uio.h>
28#include <linux/magic.h>
29#include <linux/iversion.h>
30#include <linux/swap.h>
31#include <linux/migrate.h>
32#include <linux/sched/mm.h>
33#include <linux/iomap.h>
34#include <asm/unaligned.h>
35#include <linux/fsverity.h>
36#include "misc.h"
37#include "ctree.h"
38#include "disk-io.h"
39#include "transaction.h"
40#include "btrfs_inode.h"
41#include "print-tree.h"
42#include "ordered-data.h"
43#include "xattr.h"
44#include "tree-log.h"
45#include "volumes.h"
46#include "compression.h"
47#include "locking.h"
48#include "free-space-cache.h"
49#include "props.h"
50#include "qgroup.h"
51#include "delalloc-space.h"
52#include "block-group.h"
53#include "space-info.h"
54#include "zoned.h"
55#include "subpage.h"
56
57struct btrfs_iget_args {
58 u64 ino;
59 struct btrfs_root *root;
60};
61
62struct btrfs_dio_data {
63 u64 reserve;
64 loff_t length;
65 ssize_t submitted;
66 struct extent_changeset *data_reserved;
67};
68
69static const struct inode_operations btrfs_dir_inode_operations;
70static const struct inode_operations btrfs_symlink_inode_operations;
71static const struct inode_operations btrfs_special_inode_operations;
72static const struct inode_operations btrfs_file_inode_operations;
73static const struct address_space_operations btrfs_aops;
74static const struct file_operations btrfs_dir_file_operations;
75
76static struct kmem_cache *btrfs_inode_cachep;
77struct kmem_cache *btrfs_trans_handle_cachep;
78struct kmem_cache *btrfs_path_cachep;
79struct kmem_cache *btrfs_free_space_cachep;
80struct kmem_cache *btrfs_free_space_bitmap_cachep;
81
82static int btrfs_setsize(struct inode *inode, struct iattr *attr);
83static int btrfs_truncate(struct inode *inode, bool skip_writeback);
84static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
85static noinline int cow_file_range(struct btrfs_inode *inode,
86 struct page *locked_page,
87 u64 start, u64 end, int *page_started,
88 unsigned long *nr_written, int unlock);
89static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
90 u64 len, u64 orig_start, u64 block_start,
91 u64 block_len, u64 orig_block_len,
92 u64 ram_bytes, int compress_type,
93 int type);
94
95static void __endio_write_update_ordered(struct btrfs_inode *inode,
96 const u64 offset, const u64 bytes,
97 const bool uptodate);
98
99
100
101
102
103
104
105
106
107
108
109int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags)
110{
111 if (ilock_flags & BTRFS_ILOCK_SHARED) {
112 if (ilock_flags & BTRFS_ILOCK_TRY) {
113 if (!inode_trylock_shared(inode))
114 return -EAGAIN;
115 else
116 return 0;
117 }
118 inode_lock_shared(inode);
119 } else {
120 if (ilock_flags & BTRFS_ILOCK_TRY) {
121 if (!inode_trylock(inode))
122 return -EAGAIN;
123 else
124 return 0;
125 }
126 inode_lock(inode);
127 }
128 if (ilock_flags & BTRFS_ILOCK_MMAP)
129 down_write(&BTRFS_I(inode)->i_mmap_lock);
130 return 0;
131}
132
133
134
135
136
137
138
139void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags)
140{
141 if (ilock_flags & BTRFS_ILOCK_MMAP)
142 up_write(&BTRFS_I(inode)->i_mmap_lock);
143 if (ilock_flags & BTRFS_ILOCK_SHARED)
144 inode_unlock_shared(inode);
145 else
146 inode_unlock(inode);
147}
148
149
150
151
152
153
154
155
156
157
158
159static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
160 struct page *locked_page,
161 u64 offset, u64 bytes)
162{
163 unsigned long index = offset >> PAGE_SHIFT;
164 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
165 u64 page_start = page_offset(locked_page);
166 u64 page_end = page_start + PAGE_SIZE - 1;
167
168 struct page *page;
169
170 while (index <= end_index) {
171
172
173
174
175
176
177
178
179
180
181
182 if (index == (page_offset(locked_page) >> PAGE_SHIFT)) {
183 index++;
184 continue;
185 }
186 page = find_get_page(inode->vfs_inode.i_mapping, index);
187 index++;
188 if (!page)
189 continue;
190
191
192
193
194
195
196 btrfs_page_clamp_clear_ordered(inode->root->fs_info, page,
197 offset, bytes);
198 put_page(page);
199 }
200
201
202 if (bytes + offset <= page_offset(locked_page) + PAGE_SIZE)
203 return;
204
205
206
207
208
209 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
210 bytes = offset + bytes - page_offset(locked_page) - PAGE_SIZE;
211 offset = page_offset(locked_page) + PAGE_SIZE;
212 }
213
214 return __endio_write_update_ordered(inode, offset, bytes, false);
215}
216
217static int btrfs_dirty_inode(struct inode *inode);
218
219static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
220 struct inode *inode, struct inode *dir,
221 const struct qstr *qstr)
222{
223 int err;
224
225 err = btrfs_init_acl(trans, inode, dir);
226 if (!err)
227 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
228 return err;
229}
230
231
232
233
234
235
236static int insert_inline_extent(struct btrfs_trans_handle *trans,
237 struct btrfs_path *path, bool extent_inserted,
238 struct btrfs_root *root, struct inode *inode,
239 u64 start, size_t size, size_t compressed_size,
240 int compress_type,
241 struct page **compressed_pages)
242{
243 struct extent_buffer *leaf;
244 struct page *page = NULL;
245 char *kaddr;
246 unsigned long ptr;
247 struct btrfs_file_extent_item *ei;
248 int ret;
249 size_t cur_size = size;
250 unsigned long offset;
251
252 ASSERT((compressed_size > 0 && compressed_pages) ||
253 (compressed_size == 0 && !compressed_pages));
254
255 if (compressed_size && compressed_pages)
256 cur_size = compressed_size;
257
258 if (!extent_inserted) {
259 struct btrfs_key key;
260 size_t datasize;
261
262 key.objectid = btrfs_ino(BTRFS_I(inode));
263 key.offset = start;
264 key.type = BTRFS_EXTENT_DATA_KEY;
265
266 datasize = btrfs_file_extent_calc_inline_size(cur_size);
267 ret = btrfs_insert_empty_item(trans, root, path, &key,
268 datasize);
269 if (ret)
270 goto fail;
271 }
272 leaf = path->nodes[0];
273 ei = btrfs_item_ptr(leaf, path->slots[0],
274 struct btrfs_file_extent_item);
275 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
276 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
277 btrfs_set_file_extent_encryption(leaf, ei, 0);
278 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
279 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
280 ptr = btrfs_file_extent_inline_start(ei);
281
282 if (compress_type != BTRFS_COMPRESS_NONE) {
283 struct page *cpage;
284 int i = 0;
285 while (compressed_size > 0) {
286 cpage = compressed_pages[i];
287 cur_size = min_t(unsigned long, compressed_size,
288 PAGE_SIZE);
289
290 kaddr = kmap_atomic(cpage);
291 write_extent_buffer(leaf, kaddr, ptr, cur_size);
292 kunmap_atomic(kaddr);
293
294 i++;
295 ptr += cur_size;
296 compressed_size -= cur_size;
297 }
298 btrfs_set_file_extent_compression(leaf, ei,
299 compress_type);
300 } else {
301 page = find_get_page(inode->i_mapping,
302 start >> PAGE_SHIFT);
303 btrfs_set_file_extent_compression(leaf, ei, 0);
304 kaddr = kmap_atomic(page);
305 offset = offset_in_page(start);
306 write_extent_buffer(leaf, kaddr + offset, ptr, size);
307 kunmap_atomic(kaddr);
308 put_page(page);
309 }
310 btrfs_mark_buffer_dirty(leaf);
311 btrfs_release_path(path);
312
313
314
315
316
317 size = ALIGN(size, root->fs_info->sectorsize);
318 ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
319 if (ret)
320 goto fail;
321
322
323
324
325
326
327
328
329
330
331 BTRFS_I(inode)->disk_i_size = inode->i_size;
332fail:
333 return ret;
334}
335
336
337
338
339
340
341
342static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
343 u64 end, size_t compressed_size,
344 int compress_type,
345 struct page **compressed_pages)
346{
347 struct btrfs_drop_extents_args drop_args = { 0 };
348 struct btrfs_root *root = inode->root;
349 struct btrfs_fs_info *fs_info = root->fs_info;
350 struct btrfs_trans_handle *trans;
351 u64 isize = i_size_read(&inode->vfs_inode);
352 u64 actual_end = min(end + 1, isize);
353 u64 inline_len = actual_end - start;
354 u64 aligned_end = ALIGN(end, fs_info->sectorsize);
355 u64 data_len = inline_len;
356 int ret;
357 struct btrfs_path *path;
358
359 if (compressed_size)
360 data_len = compressed_size;
361
362 if (start > 0 ||
363 actual_end > fs_info->sectorsize ||
364 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
365 (!compressed_size &&
366 (actual_end & (fs_info->sectorsize - 1)) == 0) ||
367 end + 1 < isize ||
368 data_len > fs_info->max_inline) {
369 return 1;
370 }
371
372 path = btrfs_alloc_path();
373 if (!path)
374 return -ENOMEM;
375
376 trans = btrfs_join_transaction(root);
377 if (IS_ERR(trans)) {
378 btrfs_free_path(path);
379 return PTR_ERR(trans);
380 }
381 trans->block_rsv = &inode->block_rsv;
382
383 drop_args.path = path;
384 drop_args.start = start;
385 drop_args.end = aligned_end;
386 drop_args.drop_cache = true;
387 drop_args.replace_extent = true;
388
389 if (compressed_size && compressed_pages)
390 drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
391 compressed_size);
392 else
393 drop_args.extent_item_size = btrfs_file_extent_calc_inline_size(
394 inline_len);
395
396 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
397 if (ret) {
398 btrfs_abort_transaction(trans, ret);
399 goto out;
400 }
401
402 if (isize > actual_end)
403 inline_len = min_t(u64, isize, actual_end);
404 ret = insert_inline_extent(trans, path, drop_args.extent_inserted,
405 root, &inode->vfs_inode, start,
406 inline_len, compressed_size,
407 compress_type, compressed_pages);
408 if (ret && ret != -ENOSPC) {
409 btrfs_abort_transaction(trans, ret);
410 goto out;
411 } else if (ret == -ENOSPC) {
412 ret = 1;
413 goto out;
414 }
415
416 btrfs_update_inode_bytes(inode, inline_len, drop_args.bytes_found);
417 ret = btrfs_update_inode(trans, root, inode);
418 if (ret && ret != -ENOSPC) {
419 btrfs_abort_transaction(trans, ret);
420 goto out;
421 } else if (ret == -ENOSPC) {
422 ret = 1;
423 goto out;
424 }
425
426 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
427out:
428
429
430
431
432
433
434 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
435 btrfs_free_path(path);
436 btrfs_end_transaction(trans);
437 return ret;
438}
439
440struct async_extent {
441 u64 start;
442 u64 ram_size;
443 u64 compressed_size;
444 struct page **pages;
445 unsigned long nr_pages;
446 int compress_type;
447 struct list_head list;
448};
449
450struct async_chunk {
451 struct inode *inode;
452 struct page *locked_page;
453 u64 start;
454 u64 end;
455 unsigned int write_flags;
456 struct list_head extents;
457 struct cgroup_subsys_state *blkcg_css;
458 struct btrfs_work work;
459 atomic_t *pending;
460};
461
462struct async_cow {
463
464 atomic_t num_chunks;
465 struct async_chunk chunks[];
466};
467
468static noinline int add_async_extent(struct async_chunk *cow,
469 u64 start, u64 ram_size,
470 u64 compressed_size,
471 struct page **pages,
472 unsigned long nr_pages,
473 int compress_type)
474{
475 struct async_extent *async_extent;
476
477 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
478 BUG_ON(!async_extent);
479 async_extent->start = start;
480 async_extent->ram_size = ram_size;
481 async_extent->compressed_size = compressed_size;
482 async_extent->pages = pages;
483 async_extent->nr_pages = nr_pages;
484 async_extent->compress_type = compress_type;
485 list_add_tail(&async_extent->list, &cow->extents);
486 return 0;
487}
488
489
490
491
492static inline bool inode_can_compress(struct btrfs_inode *inode)
493{
494
495 if (inode->root->fs_info->sectorsize < PAGE_SIZE)
496 return false;
497 if (inode->flags & BTRFS_INODE_NODATACOW ||
498 inode->flags & BTRFS_INODE_NODATASUM)
499 return false;
500 return true;
501}
502
503
504
505
506
507static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
508 u64 end)
509{
510 struct btrfs_fs_info *fs_info = inode->root->fs_info;
511
512 if (!inode_can_compress(inode)) {
513 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
514 KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
515 btrfs_ino(inode));
516 return 0;
517 }
518
519 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
520 return 1;
521
522 if (inode->defrag_compress)
523 return 1;
524
525 if (inode->flags & BTRFS_INODE_NOCOMPRESS)
526 return 0;
527 if (btrfs_test_opt(fs_info, COMPRESS) ||
528 inode->flags & BTRFS_INODE_COMPRESS ||
529 inode->prop_compress)
530 return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
531 return 0;
532}
533
534static inline void inode_should_defrag(struct btrfs_inode *inode,
535 u64 start, u64 end, u64 num_bytes, u64 small_write)
536{
537
538 if (num_bytes < small_write &&
539 (start > 0 || end + 1 < inode->disk_i_size))
540 btrfs_add_inode_defrag(NULL, inode);
541}
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560static noinline int compress_file_range(struct async_chunk *async_chunk)
561{
562 struct inode *inode = async_chunk->inode;
563 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
564 u64 blocksize = fs_info->sectorsize;
565 u64 start = async_chunk->start;
566 u64 end = async_chunk->end;
567 u64 actual_end;
568 u64 i_size;
569 int ret = 0;
570 struct page **pages = NULL;
571 unsigned long nr_pages;
572 unsigned long total_compressed = 0;
573 unsigned long total_in = 0;
574 int i;
575 int will_compress;
576 int compress_type = fs_info->compress_type;
577 int compressed_extents = 0;
578 int redirty = 0;
579
580 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
581 SZ_16K);
582
583
584
585
586
587
588
589
590
591
592 barrier();
593 i_size = i_size_read(inode);
594 barrier();
595 actual_end = min_t(u64, i_size, end + 1);
596again:
597 will_compress = 0;
598 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
599 BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
600 nr_pages = min_t(unsigned long, nr_pages,
601 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
602
603
604
605
606
607
608
609
610
611
612
613 if (actual_end <= start)
614 goto cleanup_and_bail_uncompressed;
615
616 total_compressed = actual_end - start;
617
618
619
620
621
622 if (total_compressed <= blocksize &&
623 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
624 goto cleanup_and_bail_uncompressed;
625
626 total_compressed = min_t(unsigned long, total_compressed,
627 BTRFS_MAX_UNCOMPRESSED);
628 total_in = 0;
629 ret = 0;
630
631
632
633
634
635
636 if (inode_need_compress(BTRFS_I(inode), start, end)) {
637 WARN_ON(pages);
638 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
639 if (!pages) {
640
641 nr_pages = 0;
642 goto cont;
643 }
644
645 if (BTRFS_I(inode)->defrag_compress)
646 compress_type = BTRFS_I(inode)->defrag_compress;
647 else if (BTRFS_I(inode)->prop_compress)
648 compress_type = BTRFS_I(inode)->prop_compress;
649
650
651
652
653
654
655
656
657
658
659
660
661
662 if (!redirty) {
663 extent_range_clear_dirty_for_io(inode, start, end);
664 redirty = 1;
665 }
666
667
668 ret = btrfs_compress_pages(
669 compress_type | (fs_info->compress_level << 4),
670 inode->i_mapping, start,
671 pages,
672 &nr_pages,
673 &total_in,
674 &total_compressed);
675
676 if (!ret) {
677 unsigned long offset = offset_in_page(total_compressed);
678 struct page *page = pages[nr_pages - 1];
679
680
681
682
683 if (offset)
684 memzero_page(page, offset, PAGE_SIZE - offset);
685 will_compress = 1;
686 }
687 }
688cont:
689
690
691
692
693 if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
694
695 if (ret || total_in < actual_end) {
696
697
698
699 ret = cow_file_range_inline(BTRFS_I(inode), start, end,
700 0, BTRFS_COMPRESS_NONE,
701 NULL);
702 } else {
703
704 ret = cow_file_range_inline(BTRFS_I(inode), start, end,
705 total_compressed,
706 compress_type, pages);
707 }
708 if (ret <= 0) {
709 unsigned long clear_flags = EXTENT_DELALLOC |
710 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
711 EXTENT_DO_ACCOUNTING;
712 unsigned long page_error_op;
713
714 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
715
716
717
718
719
720
721
722
723
724
725
726 extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
727 NULL,
728 clear_flags,
729 PAGE_UNLOCK |
730 PAGE_START_WRITEBACK |
731 page_error_op |
732 PAGE_END_WRITEBACK);
733
734
735
736
737
738
739 if (pages) {
740 for (i = 0; i < nr_pages; i++) {
741 WARN_ON(pages[i]->mapping);
742 put_page(pages[i]);
743 }
744 kfree(pages);
745 }
746 return 0;
747 }
748 }
749
750 if (will_compress) {
751
752
753
754
755
756 total_compressed = ALIGN(total_compressed, blocksize);
757
758
759
760
761
762
763 total_in = ALIGN(total_in, PAGE_SIZE);
764 if (total_compressed + blocksize <= total_in) {
765 compressed_extents++;
766
767
768
769
770
771
772 add_async_extent(async_chunk, start, total_in,
773 total_compressed, pages, nr_pages,
774 compress_type);
775
776 if (start + total_in < end) {
777 start += total_in;
778 pages = NULL;
779 cond_resched();
780 goto again;
781 }
782 return compressed_extents;
783 }
784 }
785 if (pages) {
786
787
788
789
790 for (i = 0; i < nr_pages; i++) {
791 WARN_ON(pages[i]->mapping);
792 put_page(pages[i]);
793 }
794 kfree(pages);
795 pages = NULL;
796 total_compressed = 0;
797 nr_pages = 0;
798
799
800 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
801 !(BTRFS_I(inode)->prop_compress)) {
802 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
803 }
804 }
805cleanup_and_bail_uncompressed:
806
807
808
809
810
811
812 if (async_chunk->locked_page &&
813 (page_offset(async_chunk->locked_page) >= start &&
814 page_offset(async_chunk->locked_page)) <= end) {
815 __set_page_dirty_nobuffers(async_chunk->locked_page);
816
817 }
818
819 if (redirty)
820 extent_range_redirty_for_io(inode, start, end);
821 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
822 BTRFS_COMPRESS_NONE);
823 compressed_extents++;
824
825 return compressed_extents;
826}
827
828static void free_async_extent_pages(struct async_extent *async_extent)
829{
830 int i;
831
832 if (!async_extent->pages)
833 return;
834
835 for (i = 0; i < async_extent->nr_pages; i++) {
836 WARN_ON(async_extent->pages[i]->mapping);
837 put_page(async_extent->pages[i]);
838 }
839 kfree(async_extent->pages);
840 async_extent->nr_pages = 0;
841 async_extent->pages = NULL;
842}
843
844
845
846
847
848
849
850static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
851{
852 struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
853 struct btrfs_fs_info *fs_info = inode->root->fs_info;
854 struct async_extent *async_extent;
855 u64 alloc_hint = 0;
856 struct btrfs_key ins;
857 struct extent_map *em;
858 struct btrfs_root *root = inode->root;
859 struct extent_io_tree *io_tree = &inode->io_tree;
860 int ret = 0;
861
862again:
863 while (!list_empty(&async_chunk->extents)) {
864 async_extent = list_entry(async_chunk->extents.next,
865 struct async_extent, list);
866 list_del(&async_extent->list);
867
868retry:
869 lock_extent(io_tree, async_extent->start,
870 async_extent->start + async_extent->ram_size - 1);
871
872 if (!async_extent->pages) {
873 int page_started = 0;
874 unsigned long nr_written = 0;
875
876
877 ret = cow_file_range(inode, async_chunk->locked_page,
878 async_extent->start,
879 async_extent->start +
880 async_extent->ram_size - 1,
881 &page_started, &nr_written, 0);
882
883
884
885
886
887
888
889
890
891 if (!page_started && !ret)
892 extent_write_locked_range(&inode->vfs_inode,
893 async_extent->start,
894 async_extent->start +
895 async_extent->ram_size - 1,
896 WB_SYNC_ALL);
897 else if (ret && async_chunk->locked_page)
898 unlock_page(async_chunk->locked_page);
899 kfree(async_extent);
900 cond_resched();
901 continue;
902 }
903
904 ret = btrfs_reserve_extent(root, async_extent->ram_size,
905 async_extent->compressed_size,
906 async_extent->compressed_size,
907 0, alloc_hint, &ins, 1, 1);
908 if (ret) {
909 free_async_extent_pages(async_extent);
910
911 if (ret == -ENOSPC) {
912 unlock_extent(io_tree, async_extent->start,
913 async_extent->start +
914 async_extent->ram_size - 1);
915
916
917
918
919
920
921
922 extent_range_redirty_for_io(&inode->vfs_inode,
923 async_extent->start,
924 async_extent->start +
925 async_extent->ram_size - 1);
926
927 goto retry;
928 }
929 goto out_free;
930 }
931
932
933
934
935 em = create_io_em(inode, async_extent->start,
936 async_extent->ram_size,
937 async_extent->start,
938 ins.objectid,
939 ins.offset,
940 ins.offset,
941 async_extent->ram_size,
942 async_extent->compress_type,
943 BTRFS_ORDERED_COMPRESSED);
944 if (IS_ERR(em))
945
946 goto out_free_reserve;
947 free_extent_map(em);
948
949 ret = btrfs_add_ordered_extent_compress(inode,
950 async_extent->start,
951 ins.objectid,
952 async_extent->ram_size,
953 ins.offset,
954 async_extent->compress_type);
955 if (ret) {
956 btrfs_drop_extent_cache(inode, async_extent->start,
957 async_extent->start +
958 async_extent->ram_size - 1, 0);
959 goto out_free_reserve;
960 }
961 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
962
963
964
965
966 extent_clear_unlock_delalloc(inode, async_extent->start,
967 async_extent->start +
968 async_extent->ram_size - 1,
969 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
970 PAGE_UNLOCK | PAGE_START_WRITEBACK);
971 if (btrfs_submit_compressed_write(inode, async_extent->start,
972 async_extent->ram_size,
973 ins.objectid,
974 ins.offset, async_extent->pages,
975 async_extent->nr_pages,
976 async_chunk->write_flags,
977 async_chunk->blkcg_css)) {
978 struct page *p = async_extent->pages[0];
979 const u64 start = async_extent->start;
980 const u64 end = start + async_extent->ram_size - 1;
981
982 p->mapping = inode->vfs_inode.i_mapping;
983 btrfs_writepage_endio_finish_ordered(inode, p, start,
984 end, false);
985
986 p->mapping = NULL;
987 extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
988 PAGE_END_WRITEBACK |
989 PAGE_SET_ERROR);
990 free_async_extent_pages(async_extent);
991 }
992 alloc_hint = ins.objectid + ins.offset;
993 kfree(async_extent);
994 cond_resched();
995 }
996 return;
997out_free_reserve:
998 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
999 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1000out_free:
1001 extent_clear_unlock_delalloc(inode, async_extent->start,
1002 async_extent->start +
1003 async_extent->ram_size - 1,
1004 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
1005 EXTENT_DELALLOC_NEW |
1006 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
1007 PAGE_UNLOCK | PAGE_START_WRITEBACK |
1008 PAGE_END_WRITEBACK | PAGE_SET_ERROR);
1009 free_async_extent_pages(async_extent);
1010 kfree(async_extent);
1011 goto again;
1012}
1013
1014static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
1015 u64 num_bytes)
1016{
1017 struct extent_map_tree *em_tree = &inode->extent_tree;
1018 struct extent_map *em;
1019 u64 alloc_hint = 0;
1020
1021 read_lock(&em_tree->lock);
1022 em = search_extent_mapping(em_tree, start, num_bytes);
1023 if (em) {
1024
1025
1026
1027
1028
1029 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
1030 free_extent_map(em);
1031 em = search_extent_mapping(em_tree, 0, 0);
1032 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
1033 alloc_hint = em->block_start;
1034 if (em)
1035 free_extent_map(em);
1036 } else {
1037 alloc_hint = em->block_start;
1038 free_extent_map(em);
1039 }
1040 }
1041 read_unlock(&em_tree->lock);
1042
1043 return alloc_hint;
1044}
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059static noinline int cow_file_range(struct btrfs_inode *inode,
1060 struct page *locked_page,
1061 u64 start, u64 end, int *page_started,
1062 unsigned long *nr_written, int unlock)
1063{
1064 struct btrfs_root *root = inode->root;
1065 struct btrfs_fs_info *fs_info = root->fs_info;
1066 u64 alloc_hint = 0;
1067 u64 num_bytes;
1068 unsigned long ram_size;
1069 u64 cur_alloc_size = 0;
1070 u64 min_alloc_size;
1071 u64 blocksize = fs_info->sectorsize;
1072 struct btrfs_key ins;
1073 struct extent_map *em;
1074 unsigned clear_bits;
1075 unsigned long page_ops;
1076 bool extent_reserved = false;
1077 int ret = 0;
1078
1079 if (btrfs_is_free_space_inode(inode)) {
1080 WARN_ON_ONCE(1);
1081 ret = -EINVAL;
1082 goto out_unlock;
1083 }
1084
1085 num_bytes = ALIGN(end - start + 1, blocksize);
1086 num_bytes = max(blocksize, num_bytes);
1087 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
1088
1089 inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
1102
1103 ret = cow_file_range_inline(inode, start, end, 0,
1104 BTRFS_COMPRESS_NONE, NULL);
1105 if (ret == 0) {
1106
1107
1108
1109
1110
1111
1112 extent_clear_unlock_delalloc(inode, start, end,
1113 locked_page,
1114 EXTENT_LOCKED | EXTENT_DELALLOC |
1115 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1116 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1117 PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
1118 *nr_written = *nr_written +
1119 (end - start + PAGE_SIZE) / PAGE_SIZE;
1120 *page_started = 1;
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133 unlock_page(locked_page);
1134 goto out;
1135 } else if (ret < 0) {
1136 goto out_unlock;
1137 }
1138 }
1139
1140 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
1141 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1155 min_alloc_size = num_bytes;
1156 else
1157 min_alloc_size = fs_info->sectorsize;
1158
1159 while (num_bytes > 0) {
1160 cur_alloc_size = num_bytes;
1161 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
1162 min_alloc_size, 0, alloc_hint,
1163 &ins, 1, 1);
1164 if (ret < 0)
1165 goto out_unlock;
1166 cur_alloc_size = ins.offset;
1167 extent_reserved = true;
1168
1169 ram_size = ins.offset;
1170 em = create_io_em(inode, start, ins.offset,
1171 start,
1172 ins.objectid,
1173 ins.offset,
1174 ins.offset,
1175 ram_size,
1176 BTRFS_COMPRESS_NONE,
1177 BTRFS_ORDERED_REGULAR );
1178 if (IS_ERR(em)) {
1179 ret = PTR_ERR(em);
1180 goto out_reserve;
1181 }
1182 free_extent_map(em);
1183
1184 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1185 ram_size, cur_alloc_size,
1186 BTRFS_ORDERED_REGULAR);
1187 if (ret)
1188 goto out_drop_extent_cache;
1189
1190 if (root->root_key.objectid ==
1191 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1192 ret = btrfs_reloc_clone_csums(inode, start,
1193 cur_alloc_size);
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205 if (ret)
1206 btrfs_drop_extent_cache(inode, start,
1207 start + ram_size - 1, 0);
1208 }
1209
1210 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220 page_ops = unlock ? PAGE_UNLOCK : 0;
1221 page_ops |= PAGE_SET_ORDERED;
1222
1223 extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
1224 locked_page,
1225 EXTENT_LOCKED | EXTENT_DELALLOC,
1226 page_ops);
1227 if (num_bytes < cur_alloc_size)
1228 num_bytes = 0;
1229 else
1230 num_bytes -= cur_alloc_size;
1231 alloc_hint = ins.objectid + ins.offset;
1232 start += cur_alloc_size;
1233 extent_reserved = false;
1234
1235
1236
1237
1238
1239
1240 if (ret)
1241 goto out_unlock;
1242 }
1243out:
1244 return ret;
1245
1246out_drop_extent_cache:
1247 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1248out_reserve:
1249 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1250 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1251out_unlock:
1252 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1253 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1254 page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265 if (extent_reserved) {
1266 extent_clear_unlock_delalloc(inode, start,
1267 start + cur_alloc_size - 1,
1268 locked_page,
1269 clear_bits,
1270 page_ops);
1271 start += cur_alloc_size;
1272 if (start >= end)
1273 goto out;
1274 }
1275 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1276 clear_bits | EXTENT_CLEAR_DATA_RESV,
1277 page_ops);
1278 goto out;
1279}
1280
1281
1282
1283
1284static noinline void async_cow_start(struct btrfs_work *work)
1285{
1286 struct async_chunk *async_chunk;
1287 int compressed_extents;
1288
1289 async_chunk = container_of(work, struct async_chunk, work);
1290
1291 compressed_extents = compress_file_range(async_chunk);
1292 if (compressed_extents == 0) {
1293 btrfs_add_delayed_iput(async_chunk->inode);
1294 async_chunk->inode = NULL;
1295 }
1296}
1297
1298
1299
1300
1301static noinline void async_cow_submit(struct btrfs_work *work)
1302{
1303 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1304 work);
1305 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1306 unsigned long nr_pages;
1307
1308 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1309 PAGE_SHIFT;
1310
1311
1312
1313
1314
1315
1316
1317 if (async_chunk->inode)
1318 submit_compressed_extents(async_chunk);
1319
1320
1321 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1322 5 * SZ_1M)
1323 cond_wake_up_nomb(&fs_info->async_submit_wait);
1324}
1325
1326static noinline void async_cow_free(struct btrfs_work *work)
1327{
1328 struct async_chunk *async_chunk;
1329
1330 async_chunk = container_of(work, struct async_chunk, work);
1331 if (async_chunk->inode)
1332 btrfs_add_delayed_iput(async_chunk->inode);
1333 if (async_chunk->blkcg_css)
1334 css_put(async_chunk->blkcg_css);
1335
1336
1337
1338
1339 if (atomic_dec_and_test(async_chunk->pending))
1340 kvfree(async_chunk->pending);
1341}
1342
1343static int cow_file_range_async(struct btrfs_inode *inode,
1344 struct writeback_control *wbc,
1345 struct page *locked_page,
1346 u64 start, u64 end, int *page_started,
1347 unsigned long *nr_written)
1348{
1349 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1350 struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
1351 struct async_cow *ctx;
1352 struct async_chunk *async_chunk;
1353 unsigned long nr_pages;
1354 u64 cur_end;
1355 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1356 int i;
1357 bool should_compress;
1358 unsigned nofs_flag;
1359 const unsigned int write_flags = wbc_to_write_flags(wbc);
1360
1361 unlock_extent(&inode->io_tree, start, end);
1362
1363 if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
1364 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1365 num_chunks = 1;
1366 should_compress = false;
1367 } else {
1368 should_compress = true;
1369 }
1370
1371 nofs_flag = memalloc_nofs_save();
1372 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1373 memalloc_nofs_restore(nofs_flag);
1374
1375 if (!ctx) {
1376 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1377 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1378 EXTENT_DO_ACCOUNTING;
1379 unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
1380 PAGE_END_WRITEBACK | PAGE_SET_ERROR;
1381
1382 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1383 clear_bits, page_ops);
1384 return -ENOMEM;
1385 }
1386
1387 async_chunk = ctx->chunks;
1388 atomic_set(&ctx->num_chunks, num_chunks);
1389
1390 for (i = 0; i < num_chunks; i++) {
1391 if (should_compress)
1392 cur_end = min(end, start + SZ_512K - 1);
1393 else
1394 cur_end = end;
1395
1396
1397
1398
1399
1400 ihold(&inode->vfs_inode);
1401 async_chunk[i].pending = &ctx->num_chunks;
1402 async_chunk[i].inode = &inode->vfs_inode;
1403 async_chunk[i].start = start;
1404 async_chunk[i].end = cur_end;
1405 async_chunk[i].write_flags = write_flags;
1406 INIT_LIST_HEAD(&async_chunk[i].extents);
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417 if (locked_page) {
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427 wbc_account_cgroup_owner(wbc, locked_page,
1428 cur_end - start);
1429 async_chunk[i].locked_page = locked_page;
1430 locked_page = NULL;
1431 } else {
1432 async_chunk[i].locked_page = NULL;
1433 }
1434
1435 if (blkcg_css != blkcg_root_css) {
1436 css_get(blkcg_css);
1437 async_chunk[i].blkcg_css = blkcg_css;
1438 } else {
1439 async_chunk[i].blkcg_css = NULL;
1440 }
1441
1442 btrfs_init_work(&async_chunk[i].work, async_cow_start,
1443 async_cow_submit, async_cow_free);
1444
1445 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1446 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1447
1448 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1449
1450 *nr_written += nr_pages;
1451 start = cur_end + 1;
1452 }
1453 *page_started = 1;
1454 return 0;
1455}
1456
1457static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
1458 struct page *locked_page, u64 start,
1459 u64 end, int *page_started,
1460 unsigned long *nr_written)
1461{
1462 int ret;
1463
1464 ret = cow_file_range(inode, locked_page, start, end, page_started,
1465 nr_written, 0);
1466 if (ret)
1467 return ret;
1468
1469 if (*page_started)
1470 return 0;
1471
1472 __set_page_dirty_nobuffers(locked_page);
1473 account_page_redirty(locked_page);
1474 extent_write_locked_range(&inode->vfs_inode, start, end, WB_SYNC_ALL);
1475 *page_started = 1;
1476
1477 return 0;
1478}
1479
1480static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1481 u64 bytenr, u64 num_bytes)
1482{
1483 int ret;
1484 struct btrfs_ordered_sum *sums;
1485 LIST_HEAD(list);
1486
1487 ret = btrfs_lookup_csums_range(fs_info->csum_root, bytenr,
1488 bytenr + num_bytes - 1, &list, 0);
1489 if (ret == 0 && list_empty(&list))
1490 return 0;
1491
1492 while (!list_empty(&list)) {
1493 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1494 list_del(&sums->list);
1495 kfree(sums);
1496 }
1497 if (ret < 0)
1498 return ret;
1499 return 1;
1500}
1501
1502static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
1503 const u64 start, const u64 end,
1504 int *page_started, unsigned long *nr_written)
1505{
1506 const bool is_space_ino = btrfs_is_free_space_inode(inode);
1507 const bool is_reloc_ino = (inode->root->root_key.objectid ==
1508 BTRFS_DATA_RELOC_TREE_OBJECTID);
1509 const u64 range_bytes = end + 1 - start;
1510 struct extent_io_tree *io_tree = &inode->io_tree;
1511 u64 range_start = start;
1512 u64 count;
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546 count = count_range_bits(io_tree, &range_start, end, range_bytes,
1547 EXTENT_NORESERVE, 0);
1548 if (count > 0 || is_space_ino || is_reloc_ino) {
1549 u64 bytes = count;
1550 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1551 struct btrfs_space_info *sinfo = fs_info->data_sinfo;
1552
1553 if (is_space_ino || is_reloc_ino)
1554 bytes = range_bytes;
1555
1556 spin_lock(&sinfo->lock);
1557 btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
1558 spin_unlock(&sinfo->lock);
1559
1560 if (count > 0)
1561 clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
1562 0, 0, NULL);
1563 }
1564
1565 return cow_file_range(inode, locked_page, start, end, page_started,
1566 nr_written, 1);
1567}
1568
1569
1570
1571
1572
1573
1574
1575
1576static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
1577 struct page *locked_page,
1578 const u64 start, const u64 end,
1579 int *page_started,
1580 unsigned long *nr_written)
1581{
1582 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1583 struct btrfs_root *root = inode->root;
1584 struct btrfs_path *path;
1585 u64 cow_start = (u64)-1;
1586 u64 cur_offset = start;
1587 int ret;
1588 bool check_prev = true;
1589 const bool freespace_inode = btrfs_is_free_space_inode(inode);
1590 u64 ino = btrfs_ino(inode);
1591 bool nocow = false;
1592 u64 disk_bytenr = 0;
1593 const bool force = inode->flags & BTRFS_INODE_NODATACOW;
1594
1595 path = btrfs_alloc_path();
1596 if (!path) {
1597 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1598 EXTENT_LOCKED | EXTENT_DELALLOC |
1599 EXTENT_DO_ACCOUNTING |
1600 EXTENT_DEFRAG, PAGE_UNLOCK |
1601 PAGE_START_WRITEBACK |
1602 PAGE_END_WRITEBACK);
1603 return -ENOMEM;
1604 }
1605
1606 while (1) {
1607 struct btrfs_key found_key;
1608 struct btrfs_file_extent_item *fi;
1609 struct extent_buffer *leaf;
1610 u64 extent_end;
1611 u64 extent_offset;
1612 u64 num_bytes = 0;
1613 u64 disk_num_bytes;
1614 u64 ram_bytes;
1615 int extent_type;
1616
1617 nocow = false;
1618
1619 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1620 cur_offset, 0);
1621 if (ret < 0)
1622 goto error;
1623
1624
1625
1626
1627
1628
1629 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1630 leaf = path->nodes[0];
1631 btrfs_item_key_to_cpu(leaf, &found_key,
1632 path->slots[0] - 1);
1633 if (found_key.objectid == ino &&
1634 found_key.type == BTRFS_EXTENT_DATA_KEY)
1635 path->slots[0]--;
1636 }
1637 check_prev = false;
1638next_slot:
1639
1640 leaf = path->nodes[0];
1641 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1642 ret = btrfs_next_leaf(root, path);
1643 if (ret < 0) {
1644 if (cow_start != (u64)-1)
1645 cur_offset = cow_start;
1646 goto error;
1647 }
1648 if (ret > 0)
1649 break;
1650 leaf = path->nodes[0];
1651 }
1652
1653 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1654
1655
1656 if (found_key.objectid > ino)
1657 break;
1658
1659
1660
1661
1662 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1663 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1664 path->slots[0]++;
1665 goto next_slot;
1666 }
1667
1668
1669 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1670 found_key.offset > end)
1671 break;
1672
1673
1674
1675
1676
1677 if (found_key.offset > cur_offset) {
1678 extent_end = found_key.offset;
1679 extent_type = 0;
1680 goto out_check;
1681 }
1682
1683
1684
1685
1686
1687 fi = btrfs_item_ptr(leaf, path->slots[0],
1688 struct btrfs_file_extent_item);
1689 extent_type = btrfs_file_extent_type(leaf, fi);
1690
1691 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1692 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1693 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1694 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1695 extent_offset = btrfs_file_extent_offset(leaf, fi);
1696 extent_end = found_key.offset +
1697 btrfs_file_extent_num_bytes(leaf, fi);
1698 disk_num_bytes =
1699 btrfs_file_extent_disk_num_bytes(leaf, fi);
1700
1701
1702
1703
1704 if (extent_end <= cur_offset) {
1705 path->slots[0]++;
1706 goto next_slot;
1707 }
1708
1709 if (disk_bytenr == 0)
1710 goto out_check;
1711
1712 if (btrfs_file_extent_compression(leaf, fi) ||
1713 btrfs_file_extent_encryption(leaf, fi) ||
1714 btrfs_file_extent_other_encoding(leaf, fi))
1715 goto out_check;
1716
1717
1718
1719
1720
1721
1722
1723 if (!freespace_inode &&
1724 btrfs_file_extent_generation(leaf, fi) <=
1725 btrfs_root_last_snapshot(&root->root_item))
1726 goto out_check;
1727 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1728 goto out_check;
1729
1730
1731
1732
1733
1734
1735
1736 btrfs_release_path(path);
1737
1738 ret = btrfs_cross_ref_exist(root, ino,
1739 found_key.offset -
1740 extent_offset, disk_bytenr, false);
1741 if (ret) {
1742
1743
1744
1745
1746 if (ret < 0) {
1747 if (cow_start != (u64)-1)
1748 cur_offset = cow_start;
1749 goto error;
1750 }
1751
1752 WARN_ON_ONCE(freespace_inode);
1753 goto out_check;
1754 }
1755 disk_bytenr += extent_offset;
1756 disk_bytenr += cur_offset - found_key.offset;
1757 num_bytes = min(end + 1, extent_end) - cur_offset;
1758
1759
1760
1761
1762 if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
1763 goto out_check;
1764
1765
1766
1767
1768
1769 ret = csum_exist_in_range(fs_info, disk_bytenr,
1770 num_bytes);
1771 if (ret) {
1772
1773
1774
1775
1776 if (ret < 0) {
1777 if (cow_start != (u64)-1)
1778 cur_offset = cow_start;
1779 goto error;
1780 }
1781 WARN_ON_ONCE(freespace_inode);
1782 goto out_check;
1783 }
1784
1785 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
1786 goto out_check;
1787 nocow = true;
1788 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1789 extent_end = found_key.offset + ram_bytes;
1790 extent_end = ALIGN(extent_end, fs_info->sectorsize);
1791
1792 if (extent_end <= start) {
1793 path->slots[0]++;
1794 goto next_slot;
1795 }
1796 } else {
1797
1798 BUG();
1799 }
1800out_check:
1801
1802
1803
1804
1805 if (!nocow) {
1806 if (cow_start == (u64)-1)
1807 cow_start = cur_offset;
1808 cur_offset = extent_end;
1809 if (cur_offset > end)
1810 break;
1811 if (!path->nodes[0])
1812 continue;
1813 path->slots[0]++;
1814 goto next_slot;
1815 }
1816
1817
1818
1819
1820
1821
1822 if (cow_start != (u64)-1) {
1823 ret = fallback_to_cow(inode, locked_page,
1824 cow_start, found_key.offset - 1,
1825 page_started, nr_written);
1826 if (ret)
1827 goto error;
1828 cow_start = (u64)-1;
1829 }
1830
1831 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1832 u64 orig_start = found_key.offset - extent_offset;
1833 struct extent_map *em;
1834
1835 em = create_io_em(inode, cur_offset, num_bytes,
1836 orig_start,
1837 disk_bytenr,
1838 num_bytes,
1839 disk_num_bytes,
1840 ram_bytes, BTRFS_COMPRESS_NONE,
1841 BTRFS_ORDERED_PREALLOC);
1842 if (IS_ERR(em)) {
1843 ret = PTR_ERR(em);
1844 goto error;
1845 }
1846 free_extent_map(em);
1847 ret = btrfs_add_ordered_extent(inode, cur_offset,
1848 disk_bytenr, num_bytes,
1849 num_bytes,
1850 BTRFS_ORDERED_PREALLOC);
1851 if (ret) {
1852 btrfs_drop_extent_cache(inode, cur_offset,
1853 cur_offset + num_bytes - 1,
1854 0);
1855 goto error;
1856 }
1857 } else {
1858 ret = btrfs_add_ordered_extent(inode, cur_offset,
1859 disk_bytenr, num_bytes,
1860 num_bytes,
1861 BTRFS_ORDERED_NOCOW);
1862 if (ret)
1863 goto error;
1864 }
1865
1866 if (nocow)
1867 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1868 nocow = false;
1869
1870 if (root->root_key.objectid ==
1871 BTRFS_DATA_RELOC_TREE_OBJECTID)
1872
1873
1874
1875
1876
1877 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1878 num_bytes);
1879
1880 extent_clear_unlock_delalloc(inode, cur_offset,
1881 cur_offset + num_bytes - 1,
1882 locked_page, EXTENT_LOCKED |
1883 EXTENT_DELALLOC |
1884 EXTENT_CLEAR_DATA_RESV,
1885 PAGE_UNLOCK | PAGE_SET_ORDERED);
1886
1887 cur_offset = extent_end;
1888
1889
1890
1891
1892
1893
1894 if (ret)
1895 goto error;
1896 if (cur_offset > end)
1897 break;
1898 }
1899 btrfs_release_path(path);
1900
1901 if (cur_offset <= end && cow_start == (u64)-1)
1902 cow_start = cur_offset;
1903
1904 if (cow_start != (u64)-1) {
1905 cur_offset = end;
1906 ret = fallback_to_cow(inode, locked_page, cow_start, end,
1907 page_started, nr_written);
1908 if (ret)
1909 goto error;
1910 }
1911
1912error:
1913 if (nocow)
1914 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1915
1916 if (ret && cur_offset < end)
1917 extent_clear_unlock_delalloc(inode, cur_offset, end,
1918 locked_page, EXTENT_LOCKED |
1919 EXTENT_DELALLOC | EXTENT_DEFRAG |
1920 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1921 PAGE_START_WRITEBACK |
1922 PAGE_END_WRITEBACK);
1923 btrfs_free_path(path);
1924 return ret;
1925}
1926
1927static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
1928{
1929 if (inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)) {
1930 if (inode->defrag_bytes &&
1931 test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG,
1932 0, NULL))
1933 return false;
1934 return true;
1935 }
1936 return false;
1937}
1938
1939
1940
1941
1942
1943int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
1944 u64 start, u64 end, int *page_started, unsigned long *nr_written,
1945 struct writeback_control *wbc)
1946{
1947 int ret;
1948 const bool zoned = btrfs_is_zoned(inode->root->fs_info);
1949
1950 if (should_nocow(inode, start, end)) {
1951 ASSERT(!zoned);
1952 ret = run_delalloc_nocow(inode, locked_page, start, end,
1953 page_started, nr_written);
1954 } else if (!inode_can_compress(inode) ||
1955 !inode_need_compress(inode, start, end)) {
1956 if (zoned)
1957 ret = run_delalloc_zoned(inode, locked_page, start, end,
1958 page_started, nr_written);
1959 else
1960 ret = cow_file_range(inode, locked_page, start, end,
1961 page_started, nr_written, 1);
1962 } else {
1963 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
1964 ret = cow_file_range_async(inode, wbc, locked_page, start, end,
1965 page_started, nr_written);
1966 }
1967 ASSERT(ret <= 0);
1968 if (ret)
1969 btrfs_cleanup_ordered_extents(inode, locked_page, start,
1970 end - start + 1);
1971 return ret;
1972}
1973
1974void btrfs_split_delalloc_extent(struct inode *inode,
1975 struct extent_state *orig, u64 split)
1976{
1977 u64 size;
1978
1979
1980 if (!(orig->state & EXTENT_DELALLOC))
1981 return;
1982
1983 size = orig->end - orig->start + 1;
1984 if (size > BTRFS_MAX_EXTENT_SIZE) {
1985 u32 num_extents;
1986 u64 new_size;
1987
1988
1989
1990
1991
1992 new_size = orig->end - split + 1;
1993 num_extents = count_max_extents(new_size);
1994 new_size = split - orig->start;
1995 num_extents += count_max_extents(new_size);
1996 if (count_max_extents(size) >= num_extents)
1997 return;
1998 }
1999
2000 spin_lock(&BTRFS_I(inode)->lock);
2001 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
2002 spin_unlock(&BTRFS_I(inode)->lock);
2003}
2004
2005
2006
2007
2008
2009
2010void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
2011 struct extent_state *other)
2012{
2013 u64 new_size, old_size;
2014 u32 num_extents;
2015
2016
2017 if (!(other->state & EXTENT_DELALLOC))
2018 return;
2019
2020 if (new->start > other->start)
2021 new_size = new->end - other->start + 1;
2022 else
2023 new_size = other->end - new->start + 1;
2024
2025
2026 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
2027 spin_lock(&BTRFS_I(inode)->lock);
2028 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
2029 spin_unlock(&BTRFS_I(inode)->lock);
2030 return;
2031 }
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051 old_size = other->end - other->start + 1;
2052 num_extents = count_max_extents(old_size);
2053 old_size = new->end - new->start + 1;
2054 num_extents += count_max_extents(old_size);
2055 if (count_max_extents(new_size) >= num_extents)
2056 return;
2057
2058 spin_lock(&BTRFS_I(inode)->lock);
2059 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
2060 spin_unlock(&BTRFS_I(inode)->lock);
2061}
2062
2063static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
2064 struct inode *inode)
2065{
2066 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2067
2068 spin_lock(&root->delalloc_lock);
2069 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
2070 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
2071 &root->delalloc_inodes);
2072 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2073 &BTRFS_I(inode)->runtime_flags);
2074 root->nr_delalloc_inodes++;
2075 if (root->nr_delalloc_inodes == 1) {
2076 spin_lock(&fs_info->delalloc_root_lock);
2077 BUG_ON(!list_empty(&root->delalloc_root));
2078 list_add_tail(&root->delalloc_root,
2079 &fs_info->delalloc_roots);
2080 spin_unlock(&fs_info->delalloc_root_lock);
2081 }
2082 }
2083 spin_unlock(&root->delalloc_lock);
2084}
2085
2086
2087void __btrfs_del_delalloc_inode(struct btrfs_root *root,
2088 struct btrfs_inode *inode)
2089{
2090 struct btrfs_fs_info *fs_info = root->fs_info;
2091
2092 if (!list_empty(&inode->delalloc_inodes)) {
2093 list_del_init(&inode->delalloc_inodes);
2094 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2095 &inode->runtime_flags);
2096 root->nr_delalloc_inodes--;
2097 if (!root->nr_delalloc_inodes) {
2098 ASSERT(list_empty(&root->delalloc_inodes));
2099 spin_lock(&fs_info->delalloc_root_lock);
2100 BUG_ON(list_empty(&root->delalloc_root));
2101 list_del_init(&root->delalloc_root);
2102 spin_unlock(&fs_info->delalloc_root_lock);
2103 }
2104 }
2105}
2106
2107static void btrfs_del_delalloc_inode(struct btrfs_root *root,
2108 struct btrfs_inode *inode)
2109{
2110 spin_lock(&root->delalloc_lock);
2111 __btrfs_del_delalloc_inode(root, inode);
2112 spin_unlock(&root->delalloc_lock);
2113}
2114
2115
2116
2117
2118
2119void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
2120 unsigned *bits)
2121{
2122 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2123
2124 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
2125 WARN_ON(1);
2126
2127
2128
2129
2130
2131 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
2132 struct btrfs_root *root = BTRFS_I(inode)->root;
2133 u64 len = state->end + 1 - state->start;
2134 u32 num_extents = count_max_extents(len);
2135 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
2136
2137 spin_lock(&BTRFS_I(inode)->lock);
2138 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
2139 spin_unlock(&BTRFS_I(inode)->lock);
2140
2141
2142 if (btrfs_is_testing(fs_info))
2143 return;
2144
2145 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
2146 fs_info->delalloc_batch);
2147 spin_lock(&BTRFS_I(inode)->lock);
2148 BTRFS_I(inode)->delalloc_bytes += len;
2149 if (*bits & EXTENT_DEFRAG)
2150 BTRFS_I(inode)->defrag_bytes += len;
2151 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2152 &BTRFS_I(inode)->runtime_flags))
2153 btrfs_add_delalloc_inodes(root, inode);
2154 spin_unlock(&BTRFS_I(inode)->lock);
2155 }
2156
2157 if (!(state->state & EXTENT_DELALLOC_NEW) &&
2158 (*bits & EXTENT_DELALLOC_NEW)) {
2159 spin_lock(&BTRFS_I(inode)->lock);
2160 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
2161 state->start;
2162 spin_unlock(&BTRFS_I(inode)->lock);
2163 }
2164}
2165
2166
2167
2168
2169
2170void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
2171 struct extent_state *state, unsigned *bits)
2172{
2173 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
2174 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
2175 u64 len = state->end + 1 - state->start;
2176 u32 num_extents = count_max_extents(len);
2177
2178 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
2179 spin_lock(&inode->lock);
2180 inode->defrag_bytes -= len;
2181 spin_unlock(&inode->lock);
2182 }
2183
2184
2185
2186
2187
2188
2189 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
2190 struct btrfs_root *root = inode->root;
2191 bool do_list = !btrfs_is_free_space_inode(inode);
2192
2193 spin_lock(&inode->lock);
2194 btrfs_mod_outstanding_extents(inode, -num_extents);
2195 spin_unlock(&inode->lock);
2196
2197
2198
2199
2200
2201
2202 if (*bits & EXTENT_CLEAR_META_RESV &&
2203 root != fs_info->tree_root)
2204 btrfs_delalloc_release_metadata(inode, len, false);
2205
2206
2207 if (btrfs_is_testing(fs_info))
2208 return;
2209
2210 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2211 do_list && !(state->state & EXTENT_NORESERVE) &&
2212 (*bits & EXTENT_CLEAR_DATA_RESV))
2213 btrfs_free_reserved_data_space_noquota(fs_info, len);
2214
2215 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
2216 fs_info->delalloc_batch);
2217 spin_lock(&inode->lock);
2218 inode->delalloc_bytes -= len;
2219 if (do_list && inode->delalloc_bytes == 0 &&
2220 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2221 &inode->runtime_flags))
2222 btrfs_del_delalloc_inode(root, inode);
2223 spin_unlock(&inode->lock);
2224 }
2225
2226 if ((state->state & EXTENT_DELALLOC_NEW) &&
2227 (*bits & EXTENT_DELALLOC_NEW)) {
2228 spin_lock(&inode->lock);
2229 ASSERT(inode->new_delalloc_bytes >= len);
2230 inode->new_delalloc_bytes -= len;
2231 if (*bits & EXTENT_ADD_INODE_BYTES)
2232 inode_add_bytes(&inode->vfs_inode, len);
2233 spin_unlock(&inode->lock);
2234 }
2235}
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
2252 unsigned long bio_flags)
2253{
2254 struct inode *inode = page->mapping->host;
2255 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2256 u64 logical = bio->bi_iter.bi_sector << 9;
2257 u32 bio_len = bio->bi_iter.bi_size;
2258 struct extent_map *em;
2259 int ret = 0;
2260 struct btrfs_io_geometry geom;
2261
2262 if (bio_flags & EXTENT_BIO_COMPRESSED)
2263 return 0;
2264
2265 em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
2266 if (IS_ERR(em))
2267 return PTR_ERR(em);
2268 ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical, &geom);
2269 if (ret < 0)
2270 goto out;
2271
2272 if (geom.len < bio_len + size)
2273 ret = 1;
2274out:
2275 free_extent_map(em);
2276 return ret;
2277}
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
2288 u64 dio_file_offset)
2289{
2290 return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
2291}
2292
2293
2294
2295
2296
2297
2298static int split_zoned_em(struct btrfs_inode *inode, u64 start, u64 len,
2299 u64 pre, u64 post)
2300{
2301 struct extent_map_tree *em_tree = &inode->extent_tree;
2302 struct extent_map *em;
2303 struct extent_map *split_pre = NULL;
2304 struct extent_map *split_mid = NULL;
2305 struct extent_map *split_post = NULL;
2306 int ret = 0;
2307 unsigned long flags;
2308
2309
2310 if (pre == 0 && post == 0)
2311 return 0;
2312
2313 split_pre = alloc_extent_map();
2314 if (pre)
2315 split_mid = alloc_extent_map();
2316 if (post)
2317 split_post = alloc_extent_map();
2318 if (!split_pre || (pre && !split_mid) || (post && !split_post)) {
2319 ret = -ENOMEM;
2320 goto out;
2321 }
2322
2323 ASSERT(pre + post < len);
2324
2325 lock_extent(&inode->io_tree, start, start + len - 1);
2326 write_lock(&em_tree->lock);
2327 em = lookup_extent_mapping(em_tree, start, len);
2328 if (!em) {
2329 ret = -EIO;
2330 goto out_unlock;
2331 }
2332
2333 ASSERT(em->len == len);
2334 ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
2335 ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
2336 ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
2337 ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
2338 ASSERT(!list_empty(&em->list));
2339
2340 flags = em->flags;
2341 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
2342
2343
2344 split_pre->start = em->start;
2345 split_pre->len = (pre ? pre : em->len - post);
2346 split_pre->orig_start = split_pre->start;
2347 split_pre->block_start = em->block_start;
2348 split_pre->block_len = split_pre->len;
2349 split_pre->orig_block_len = split_pre->block_len;
2350 split_pre->ram_bytes = split_pre->len;
2351 split_pre->flags = flags;
2352 split_pre->compress_type = em->compress_type;
2353 split_pre->generation = em->generation;
2354
2355 replace_extent_mapping(em_tree, em, split_pre, 1);
2356
2357
2358
2359
2360
2361
2362
2363 if (pre) {
2364
2365 split_mid->start = em->start + pre;
2366 split_mid->len = em->len - pre - post;
2367 split_mid->orig_start = split_mid->start;
2368 split_mid->block_start = em->block_start + pre;
2369 split_mid->block_len = split_mid->len;
2370 split_mid->orig_block_len = split_mid->block_len;
2371 split_mid->ram_bytes = split_mid->len;
2372 split_mid->flags = flags;
2373 split_mid->compress_type = em->compress_type;
2374 split_mid->generation = em->generation;
2375 add_extent_mapping(em_tree, split_mid, 1);
2376 }
2377
2378 if (post) {
2379 split_post->start = em->start + em->len - post;
2380 split_post->len = post;
2381 split_post->orig_start = split_post->start;
2382 split_post->block_start = em->block_start + em->len - post;
2383 split_post->block_len = split_post->len;
2384 split_post->orig_block_len = split_post->block_len;
2385 split_post->ram_bytes = split_post->len;
2386 split_post->flags = flags;
2387 split_post->compress_type = em->compress_type;
2388 split_post->generation = em->generation;
2389 add_extent_mapping(em_tree, split_post, 1);
2390 }
2391
2392
2393 free_extent_map(em);
2394
2395 free_extent_map(em);
2396
2397out_unlock:
2398 write_unlock(&em_tree->lock);
2399 unlock_extent(&inode->io_tree, start, start + len - 1);
2400out:
2401 free_extent_map(split_pre);
2402 free_extent_map(split_mid);
2403 free_extent_map(split_post);
2404
2405 return ret;
2406}
2407
2408static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
2409 struct bio *bio, loff_t file_offset)
2410{
2411 struct btrfs_ordered_extent *ordered;
2412 u64 start = (u64)bio->bi_iter.bi_sector << SECTOR_SHIFT;
2413 u64 file_len;
2414 u64 len = bio->bi_iter.bi_size;
2415 u64 end = start + len;
2416 u64 ordered_end;
2417 u64 pre, post;
2418 int ret = 0;
2419
2420 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
2421 if (WARN_ON_ONCE(!ordered))
2422 return BLK_STS_IOERR;
2423
2424
2425 if (ordered->disk_num_bytes == len)
2426 goto out;
2427
2428
2429 if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) {
2430 ret = -EINVAL;
2431 goto out;
2432 }
2433
2434
2435 if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) {
2436 ret = -EINVAL;
2437 goto out;
2438 }
2439
2440 ordered_end = ordered->disk_bytenr + ordered->disk_num_bytes;
2441
2442 if (WARN_ON_ONCE(start < ordered->disk_bytenr || end > ordered_end)) {
2443 ret = -EINVAL;
2444 goto out;
2445 }
2446
2447
2448 if (WARN_ON_ONCE(!list_empty(&ordered->list))) {
2449 ret = -EINVAL;
2450 goto out;
2451 }
2452
2453 file_len = ordered->num_bytes;
2454 pre = start - ordered->disk_bytenr;
2455 post = ordered_end - end;
2456
2457 ret = btrfs_split_ordered_extent(ordered, pre, post);
2458 if (ret)
2459 goto out;
2460 ret = split_zoned_em(inode, file_offset, file_len, pre, post);
2461
2462out:
2463 btrfs_put_ordered_extent(ordered);
2464
2465 return errno_to_blk_status(ret);
2466}
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
2487 int mirror_num, unsigned long bio_flags)
2488
2489{
2490 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2491 struct btrfs_root *root = BTRFS_I(inode)->root;
2492 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
2493 blk_status_t ret = 0;
2494 int skip_sum;
2495 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
2496
2497 skip_sum = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
2498 !fs_info->csum_root;
2499
2500 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
2501 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
2502
2503 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
2504 struct page *page = bio_first_bvec_all(bio)->bv_page;
2505 loff_t file_offset = page_offset(page);
2506
2507 ret = extract_ordered_extent(BTRFS_I(inode), bio, file_offset);
2508 if (ret)
2509 goto out;
2510 }
2511
2512 if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
2513 ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
2514 if (ret)
2515 goto out;
2516
2517 if (bio_flags & EXTENT_BIO_COMPRESSED) {
2518 ret = btrfs_submit_compressed_read(inode, bio,
2519 mirror_num,
2520 bio_flags);
2521 goto out;
2522 } else {
2523
2524
2525
2526
2527
2528 ret = btrfs_lookup_bio_sums(inode, bio, NULL);
2529 if (ret)
2530 goto out;
2531 }
2532 goto mapit;
2533 } else if (async && !skip_sum) {
2534
2535 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2536 goto mapit;
2537
2538 ret = btrfs_wq_submit_bio(inode, bio, mirror_num, bio_flags,
2539 0, btrfs_submit_bio_start);
2540 goto out;
2541 } else if (!skip_sum) {
2542 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
2543 if (ret)
2544 goto out;
2545 }
2546
2547mapit:
2548 ret = btrfs_map_bio(fs_info, bio, mirror_num);
2549
2550out:
2551 if (ret) {
2552 bio->bi_status = ret;
2553 bio_endio(bio);
2554 }
2555 return ret;
2556}
2557
2558
2559
2560
2561
2562static int add_pending_csums(struct btrfs_trans_handle *trans,
2563 struct list_head *list)
2564{
2565 struct btrfs_ordered_sum *sum;
2566 int ret;
2567
2568 list_for_each_entry(sum, list, list) {
2569 trans->adding_csums = true;
2570 ret = btrfs_csum_file_blocks(trans, trans->fs_info->csum_root, sum);
2571 trans->adding_csums = false;
2572 if (ret)
2573 return ret;
2574 }
2575 return 0;
2576}
2577
2578static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
2579 const u64 start,
2580 const u64 len,
2581 struct extent_state **cached_state)
2582{
2583 u64 search_start = start;
2584 const u64 end = start + len - 1;
2585
2586 while (search_start < end) {
2587 const u64 search_len = end - search_start + 1;
2588 struct extent_map *em;
2589 u64 em_len;
2590 int ret = 0;
2591
2592 em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
2593 if (IS_ERR(em))
2594 return PTR_ERR(em);
2595
2596 if (em->block_start != EXTENT_MAP_HOLE)
2597 goto next;
2598
2599 em_len = em->len;
2600 if (em->start < search_start)
2601 em_len -= search_start - em->start;
2602 if (em_len > search_len)
2603 em_len = search_len;
2604
2605 ret = set_extent_bit(&inode->io_tree, search_start,
2606 search_start + em_len - 1,
2607 EXTENT_DELALLOC_NEW, 0, NULL, cached_state,
2608 GFP_NOFS, NULL);
2609next:
2610 search_start = extent_map_end(em);
2611 free_extent_map(em);
2612 if (ret)
2613 return ret;
2614 }
2615 return 0;
2616}
2617
2618int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2619 unsigned int extra_bits,
2620 struct extent_state **cached_state)
2621{
2622 WARN_ON(PAGE_ALIGNED(end));
2623
2624 if (start >= i_size_read(&inode->vfs_inode) &&
2625 !(inode->flags & BTRFS_INODE_PREALLOC)) {
2626
2627
2628
2629
2630 extra_bits |= EXTENT_DELALLOC_NEW;
2631 } else {
2632 int ret;
2633
2634 ret = btrfs_find_new_delalloc_bytes(inode, start,
2635 end + 1 - start,
2636 cached_state);
2637 if (ret)
2638 return ret;
2639 }
2640
2641 return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
2642 cached_state);
2643}
2644
2645
2646struct btrfs_writepage_fixup {
2647 struct page *page;
2648 struct inode *inode;
2649 struct btrfs_work work;
2650};
2651
2652static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2653{
2654 struct btrfs_writepage_fixup *fixup;
2655 struct btrfs_ordered_extent *ordered;
2656 struct extent_state *cached_state = NULL;
2657 struct extent_changeset *data_reserved = NULL;
2658 struct page *page;
2659 struct btrfs_inode *inode;
2660 u64 page_start;
2661 u64 page_end;
2662 int ret = 0;
2663 bool free_delalloc_space = true;
2664
2665 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2666 page = fixup->page;
2667 inode = BTRFS_I(fixup->inode);
2668 page_start = page_offset(page);
2669 page_end = page_offset(page) + PAGE_SIZE - 1;
2670
2671
2672
2673
2674
2675 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2676 PAGE_SIZE);
2677again:
2678 lock_page(page);
2679
2680
2681
2682
2683
2684
2685 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703 if (!ret) {
2704 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2705 btrfs_delalloc_release_space(inode, data_reserved,
2706 page_start, PAGE_SIZE,
2707 true);
2708 }
2709 ret = 0;
2710 goto out_page;
2711 }
2712
2713
2714
2715
2716
2717 if (ret)
2718 goto out_page;
2719
2720 lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
2721
2722
2723 if (PageOrdered(page))
2724 goto out_reserved;
2725
2726 ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
2727 if (ordered) {
2728 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2729 &cached_state);
2730 unlock_page(page);
2731 btrfs_start_ordered_extent(ordered, 1);
2732 btrfs_put_ordered_extent(ordered);
2733 goto again;
2734 }
2735
2736 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2737 &cached_state);
2738 if (ret)
2739 goto out_reserved;
2740
2741
2742
2743
2744
2745
2746
2747
2748 BUG_ON(!PageDirty(page));
2749 free_delalloc_space = false;
2750out_reserved:
2751 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2752 if (free_delalloc_space)
2753 btrfs_delalloc_release_space(inode, data_reserved, page_start,
2754 PAGE_SIZE, true);
2755 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2756 &cached_state);
2757out_page:
2758 if (ret) {
2759
2760
2761
2762
2763 mapping_set_error(page->mapping, ret);
2764 end_extent_writepage(page, ret, page_start, page_end);
2765 clear_page_dirty_for_io(page);
2766 SetPageError(page);
2767 }
2768 ClearPageChecked(page);
2769 unlock_page(page);
2770 put_page(page);
2771 kfree(fixup);
2772 extent_changeset_free(data_reserved);
2773
2774
2775
2776
2777
2778 btrfs_add_delayed_iput(&inode->vfs_inode);
2779}
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792int btrfs_writepage_cow_fixup(struct page *page)
2793{
2794 struct inode *inode = page->mapping->host;
2795 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2796 struct btrfs_writepage_fixup *fixup;
2797
2798
2799 if (PageOrdered(page))
2800 return 0;
2801
2802
2803
2804
2805
2806
2807
2808
2809 if (PageChecked(page))
2810 return -EAGAIN;
2811
2812 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2813 if (!fixup)
2814 return -EAGAIN;
2815
2816
2817
2818
2819
2820
2821
2822 ihold(inode);
2823 SetPageChecked(page);
2824 get_page(page);
2825 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
2826 fixup->page = page;
2827 fixup->inode = inode;
2828 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
2829
2830 return -EAGAIN;
2831}
2832
2833static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2834 struct btrfs_inode *inode, u64 file_pos,
2835 struct btrfs_file_extent_item *stack_fi,
2836 const bool update_inode_bytes,
2837 u64 qgroup_reserved)
2838{
2839 struct btrfs_root *root = inode->root;
2840 const u64 sectorsize = root->fs_info->sectorsize;
2841 struct btrfs_path *path;
2842 struct extent_buffer *leaf;
2843 struct btrfs_key ins;
2844 u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
2845 u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
2846 u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
2847 u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
2848 struct btrfs_drop_extents_args drop_args = { 0 };
2849 int ret;
2850
2851 path = btrfs_alloc_path();
2852 if (!path)
2853 return -ENOMEM;
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864 drop_args.path = path;
2865 drop_args.start = file_pos;
2866 drop_args.end = file_pos + num_bytes;
2867 drop_args.replace_extent = true;
2868 drop_args.extent_item_size = sizeof(*stack_fi);
2869 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
2870 if (ret)
2871 goto out;
2872
2873 if (!drop_args.extent_inserted) {
2874 ins.objectid = btrfs_ino(inode);
2875 ins.offset = file_pos;
2876 ins.type = BTRFS_EXTENT_DATA_KEY;
2877
2878 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2879 sizeof(*stack_fi));
2880 if (ret)
2881 goto out;
2882 }
2883 leaf = path->nodes[0];
2884 btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
2885 write_extent_buffer(leaf, stack_fi,
2886 btrfs_item_ptr_offset(leaf, path->slots[0]),
2887 sizeof(struct btrfs_file_extent_item));
2888
2889 btrfs_mark_buffer_dirty(leaf);
2890 btrfs_release_path(path);
2891
2892
2893
2894
2895
2896
2897
2898
2899 if (file_pos == 0 && !IS_ALIGNED(drop_args.bytes_found, sectorsize)) {
2900 u64 inline_size = round_down(drop_args.bytes_found, sectorsize);
2901
2902 inline_size = drop_args.bytes_found - inline_size;
2903 btrfs_update_inode_bytes(inode, sectorsize, inline_size);
2904 drop_args.bytes_found -= inline_size;
2905 num_bytes -= sectorsize;
2906 }
2907
2908 if (update_inode_bytes)
2909 btrfs_update_inode_bytes(inode, num_bytes, drop_args.bytes_found);
2910
2911 ins.objectid = disk_bytenr;
2912 ins.offset = disk_num_bytes;
2913 ins.type = BTRFS_EXTENT_ITEM_KEY;
2914
2915 ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
2916 if (ret)
2917 goto out;
2918
2919 ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
2920 file_pos, qgroup_reserved, &ins);
2921out:
2922 btrfs_free_path(path);
2923
2924 return ret;
2925}
2926
2927static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
2928 u64 start, u64 len)
2929{
2930 struct btrfs_block_group *cache;
2931
2932 cache = btrfs_lookup_block_group(fs_info, start);
2933 ASSERT(cache);
2934
2935 spin_lock(&cache->lock);
2936 cache->delalloc_bytes -= len;
2937 spin_unlock(&cache->lock);
2938
2939 btrfs_put_block_group(cache);
2940}
2941
2942static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
2943 struct btrfs_ordered_extent *oe)
2944{
2945 struct btrfs_file_extent_item stack_fi;
2946 u64 logical_len;
2947 bool update_inode_bytes;
2948
2949 memset(&stack_fi, 0, sizeof(stack_fi));
2950 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
2951 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
2952 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
2953 oe->disk_num_bytes);
2954 if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
2955 logical_len = oe->truncated_len;
2956 else
2957 logical_len = oe->num_bytes;
2958 btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
2959 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
2960 btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
2961
2962
2963
2964
2965
2966
2967
2968
2969 update_inode_bytes = test_bit(BTRFS_ORDERED_DIRECT, &oe->flags) ||
2970 test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags);
2971
2972 return insert_reserved_file_extent(trans, BTRFS_I(oe->inode),
2973 oe->file_offset, &stack_fi,
2974 update_inode_bytes, oe->qgroup_rsv);
2975}
2976
2977
2978
2979
2980
2981
2982static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2983{
2984 struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
2985 struct btrfs_root *root = inode->root;
2986 struct btrfs_fs_info *fs_info = root->fs_info;
2987 struct btrfs_trans_handle *trans = NULL;
2988 struct extent_io_tree *io_tree = &inode->io_tree;
2989 struct extent_state *cached_state = NULL;
2990 u64 start, end;
2991 int compress_type = 0;
2992 int ret = 0;
2993 u64 logical_len = ordered_extent->num_bytes;
2994 bool freespace_inode;
2995 bool truncated = false;
2996 bool clear_reserved_extent = true;
2997 unsigned int clear_bits = EXTENT_DEFRAG;
2998
2999 start = ordered_extent->file_offset;
3000 end = start + ordered_extent->num_bytes - 1;
3001
3002 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3003 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
3004 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
3005 clear_bits |= EXTENT_DELALLOC_NEW;
3006
3007 freespace_inode = btrfs_is_free_space_inode(inode);
3008
3009 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
3010 ret = -EIO;
3011 goto out;
3012 }
3013
3014 if (ordered_extent->bdev)
3015 btrfs_rewrite_logical_zoned(ordered_extent);
3016
3017 btrfs_free_io_failure_record(inode, start, end);
3018
3019 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
3020 truncated = true;
3021 logical_len = ordered_extent->truncated_len;
3022
3023 if (!logical_len)
3024 goto out;
3025 }
3026
3027 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
3028 BUG_ON(!list_empty(&ordered_extent->list));
3029
3030 btrfs_inode_safe_disk_i_size_write(inode, 0);
3031 if (freespace_inode)
3032 trans = btrfs_join_transaction_spacecache(root);
3033 else
3034 trans = btrfs_join_transaction(root);
3035 if (IS_ERR(trans)) {
3036 ret = PTR_ERR(trans);
3037 trans = NULL;
3038 goto out;
3039 }
3040 trans->block_rsv = &inode->block_rsv;
3041 ret = btrfs_update_inode_fallback(trans, root, inode);
3042 if (ret)
3043 btrfs_abort_transaction(trans, ret);
3044 goto out;
3045 }
3046
3047 clear_bits |= EXTENT_LOCKED;
3048 lock_extent_bits(io_tree, start, end, &cached_state);
3049
3050 if (freespace_inode)
3051 trans = btrfs_join_transaction_spacecache(root);
3052 else
3053 trans = btrfs_join_transaction(root);
3054 if (IS_ERR(trans)) {
3055 ret = PTR_ERR(trans);
3056 trans = NULL;
3057 goto out;
3058 }
3059
3060 trans->block_rsv = &inode->block_rsv;
3061
3062 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
3063 compress_type = ordered_extent->compress_type;
3064 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3065 BUG_ON(compress_type);
3066 ret = btrfs_mark_extent_written(trans, inode,
3067 ordered_extent->file_offset,
3068 ordered_extent->file_offset +
3069 logical_len);
3070 } else {
3071 BUG_ON(root == fs_info->tree_root);
3072 ret = insert_ordered_extent_file_extent(trans, ordered_extent);
3073 if (!ret) {
3074 clear_reserved_extent = false;
3075 btrfs_release_delalloc_bytes(fs_info,
3076 ordered_extent->disk_bytenr,
3077 ordered_extent->disk_num_bytes);
3078 }
3079 }
3080 unpin_extent_cache(&inode->extent_tree, ordered_extent->file_offset,
3081 ordered_extent->num_bytes, trans->transid);
3082 if (ret < 0) {
3083 btrfs_abort_transaction(trans, ret);
3084 goto out;
3085 }
3086
3087 ret = add_pending_csums(trans, &ordered_extent->list);
3088 if (ret) {
3089 btrfs_abort_transaction(trans, ret);
3090 goto out;
3091 }
3092
3093
3094
3095
3096
3097
3098 if ((clear_bits & EXTENT_DELALLOC_NEW) &&
3099 !test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags))
3100 clear_extent_bit(&inode->io_tree, start, end,
3101 EXTENT_DELALLOC_NEW | EXTENT_ADD_INODE_BYTES,
3102 0, 0, &cached_state);
3103
3104 btrfs_inode_safe_disk_i_size_write(inode, 0);
3105 ret = btrfs_update_inode_fallback(trans, root, inode);
3106 if (ret) {
3107 btrfs_abort_transaction(trans, ret);
3108 goto out;
3109 }
3110 ret = 0;
3111out:
3112 clear_extent_bit(&inode->io_tree, start, end, clear_bits,
3113 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
3114 &cached_state);
3115
3116 if (trans)
3117 btrfs_end_transaction(trans);
3118
3119 if (ret || truncated) {
3120 u64 unwritten_start = start;
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130 if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
3131 &ordered_extent->flags))
3132 mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
3133
3134 if (truncated)
3135 unwritten_start += logical_len;
3136 clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
3137
3138
3139 btrfs_drop_extent_cache(inode, unwritten_start, end, 0);
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151 if ((ret || !logical_len) &&
3152 clear_reserved_extent &&
3153 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3154 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3155
3156
3157
3158
3159 if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC))
3160 btrfs_discard_extent(fs_info,
3161 ordered_extent->disk_bytenr,
3162 ordered_extent->disk_num_bytes,
3163 NULL);
3164 btrfs_free_reserved_extent(fs_info,
3165 ordered_extent->disk_bytenr,
3166 ordered_extent->disk_num_bytes, 1);
3167 }
3168 }
3169
3170
3171
3172
3173
3174 btrfs_remove_ordered_extent(inode, ordered_extent);
3175
3176
3177 btrfs_put_ordered_extent(ordered_extent);
3178
3179 btrfs_put_ordered_extent(ordered_extent);
3180
3181 return ret;
3182}
3183
3184static void finish_ordered_fn(struct btrfs_work *work)
3185{
3186 struct btrfs_ordered_extent *ordered_extent;
3187 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3188 btrfs_finish_ordered_io(ordered_extent);
3189}
3190
3191void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
3192 struct page *page, u64 start,
3193 u64 end, bool uptodate)
3194{
3195 trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
3196
3197 btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start,
3198 finish_ordered_fn, uptodate);
3199}
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
3213 u32 bio_offset, struct page *page, u32 pgoff,
3214 u64 start)
3215{
3216 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3217 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
3218 char *kaddr;
3219 u32 len = fs_info->sectorsize;
3220 const u32 csum_size = fs_info->csum_size;
3221 unsigned int offset_sectors;
3222 u8 *csum_expected;
3223 u8 csum[BTRFS_CSUM_SIZE];
3224
3225 ASSERT(pgoff + len <= PAGE_SIZE);
3226
3227 offset_sectors = bio_offset >> fs_info->sectorsize_bits;
3228 csum_expected = ((u8 *)io_bio->csum) + offset_sectors * csum_size;
3229
3230 kaddr = kmap_atomic(page);
3231 shash->tfm = fs_info->csum_shash;
3232
3233 crypto_shash_digest(shash, kaddr + pgoff, len, csum);
3234
3235 if (memcmp(csum, csum_expected, csum_size))
3236 goto zeroit;
3237
3238 kunmap_atomic(kaddr);
3239 return 0;
3240zeroit:
3241 btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
3242 io_bio->mirror_num);
3243 if (io_bio->device)
3244 btrfs_dev_stat_inc_and_print(io_bio->device,
3245 BTRFS_DEV_STAT_CORRUPTION_ERRS);
3246 memset(kaddr + pgoff, 1, len);
3247 flush_dcache_page(page);
3248 kunmap_atomic(kaddr);
3249 return -EIO;
3250}
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
3265 struct page *page, u64 start, u64 end)
3266{
3267 struct inode *inode = page->mapping->host;
3268 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3269 struct btrfs_root *root = BTRFS_I(inode)->root;
3270 const u32 sectorsize = root->fs_info->sectorsize;
3271 u32 pg_off;
3272 unsigned int result = 0;
3273
3274 if (PageChecked(page)) {
3275 ClearPageChecked(page);
3276 return 0;
3277 }
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290 if (io_bio->csum == NULL)
3291 return 0;
3292
3293 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3294 return 0;
3295
3296 if (!root->fs_info->csum_root)
3297 return 0;
3298
3299 ASSERT(page_offset(page) <= start &&
3300 end <= page_offset(page) + PAGE_SIZE - 1);
3301 for (pg_off = offset_in_page(start);
3302 pg_off < offset_in_page(end);
3303 pg_off += sectorsize, bio_offset += sectorsize) {
3304 u64 file_offset = pg_off + page_offset(page);
3305 int ret;
3306
3307 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
3308 test_range_bit(io_tree, file_offset,
3309 file_offset + sectorsize - 1,
3310 EXTENT_NODATASUM, 1, NULL)) {
3311
3312 clear_extent_bits(io_tree, file_offset,
3313 file_offset + sectorsize - 1,
3314 EXTENT_NODATASUM);
3315 continue;
3316 }
3317 ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
3318 page_offset(page) + pg_off);
3319 if (ret < 0) {
3320 const int nr_bit = (pg_off - offset_in_page(start)) >>
3321 root->fs_info->sectorsize_bits;
3322
3323 result |= (1U << nr_bit);
3324 }
3325 }
3326 return result;
3327}
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339void btrfs_add_delayed_iput(struct inode *inode)
3340{
3341 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3342 struct btrfs_inode *binode = BTRFS_I(inode);
3343
3344 if (atomic_add_unless(&inode->i_count, -1, 1))
3345 return;
3346
3347 atomic_inc(&fs_info->nr_delayed_iputs);
3348 spin_lock(&fs_info->delayed_iput_lock);
3349 ASSERT(list_empty(&binode->delayed_iput));
3350 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3351 spin_unlock(&fs_info->delayed_iput_lock);
3352 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3353 wake_up_process(fs_info->cleaner_kthread);
3354}
3355
3356static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
3357 struct btrfs_inode *inode)
3358{
3359 list_del_init(&inode->delayed_iput);
3360 spin_unlock(&fs_info->delayed_iput_lock);
3361 iput(&inode->vfs_inode);
3362 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
3363 wake_up(&fs_info->delayed_iputs_wait);
3364 spin_lock(&fs_info->delayed_iput_lock);
3365}
3366
3367static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
3368 struct btrfs_inode *inode)
3369{
3370 if (!list_empty(&inode->delayed_iput)) {
3371 spin_lock(&fs_info->delayed_iput_lock);
3372 if (!list_empty(&inode->delayed_iput))
3373 run_delayed_iput_locked(fs_info, inode);
3374 spin_unlock(&fs_info->delayed_iput_lock);
3375 }
3376}
3377
3378void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
3379{
3380
3381 spin_lock(&fs_info->delayed_iput_lock);
3382 while (!list_empty(&fs_info->delayed_iputs)) {
3383 struct btrfs_inode *inode;
3384
3385 inode = list_first_entry(&fs_info->delayed_iputs,
3386 struct btrfs_inode, delayed_iput);
3387 run_delayed_iput_locked(fs_info, inode);
3388 cond_resched_lock(&fs_info->delayed_iput_lock);
3389 }
3390 spin_unlock(&fs_info->delayed_iput_lock);
3391}
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
3406{
3407 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
3408 atomic_read(&fs_info->nr_delayed_iputs) == 0);
3409 if (ret)
3410 return -EINTR;
3411 return 0;
3412}
3413
3414
3415
3416
3417
3418int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3419 struct btrfs_inode *inode)
3420{
3421 int ret;
3422
3423 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
3424 if (ret && ret != -EEXIST) {
3425 btrfs_abort_transaction(trans, ret);
3426 return ret;
3427 }
3428
3429 return 0;
3430}
3431
3432
3433
3434
3435
3436static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3437 struct btrfs_inode *inode)
3438{
3439 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
3440}
3441
3442
3443
3444
3445
3446int btrfs_orphan_cleanup(struct btrfs_root *root)
3447{
3448 struct btrfs_fs_info *fs_info = root->fs_info;
3449 struct btrfs_path *path;
3450 struct extent_buffer *leaf;
3451 struct btrfs_key key, found_key;
3452 struct btrfs_trans_handle *trans;
3453 struct inode *inode;
3454 u64 last_objectid = 0;
3455 int ret = 0, nr_unlink = 0;
3456
3457 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3458 return 0;
3459
3460 path = btrfs_alloc_path();
3461 if (!path) {
3462 ret = -ENOMEM;
3463 goto out;
3464 }
3465 path->reada = READA_BACK;
3466
3467 key.objectid = BTRFS_ORPHAN_OBJECTID;
3468 key.type = BTRFS_ORPHAN_ITEM_KEY;
3469 key.offset = (u64)-1;
3470
3471 while (1) {
3472 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3473 if (ret < 0)
3474 goto out;
3475
3476
3477
3478
3479
3480
3481 if (ret > 0) {
3482 ret = 0;
3483 if (path->slots[0] == 0)
3484 break;
3485 path->slots[0]--;
3486 }
3487
3488
3489 leaf = path->nodes[0];
3490 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3491
3492
3493 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3494 break;
3495 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3496 break;
3497
3498
3499 btrfs_release_path(path);
3500
3501
3502
3503
3504
3505
3506
3507 if (found_key.offset == last_objectid) {
3508 btrfs_err(fs_info,
3509 "Error removing orphan entry, stopping orphan cleanup");
3510 ret = -EINVAL;
3511 goto out;
3512 }
3513
3514 last_objectid = found_key.offset;
3515
3516 found_key.objectid = found_key.offset;
3517 found_key.type = BTRFS_INODE_ITEM_KEY;
3518 found_key.offset = 0;
3519 inode = btrfs_iget(fs_info->sb, last_objectid, root);
3520 ret = PTR_ERR_OR_ZERO(inode);
3521 if (ret && ret != -ENOENT)
3522 goto out;
3523
3524 if (ret == -ENOENT && root == fs_info->tree_root) {
3525 struct btrfs_root *dead_root;
3526 int is_dead_root = 0;
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544 spin_lock(&fs_info->fs_roots_radix_lock);
3545 dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
3546 (unsigned long)found_key.objectid);
3547 if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
3548 is_dead_root = 1;
3549 spin_unlock(&fs_info->fs_roots_radix_lock);
3550
3551 if (is_dead_root) {
3552
3553 key.offset = found_key.objectid - 1;
3554 continue;
3555 }
3556
3557 }
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585 if (ret == -ENOENT || inode->i_nlink) {
3586 if (!ret) {
3587 ret = btrfs_drop_verity_items(BTRFS_I(inode));
3588 iput(inode);
3589 if (ret)
3590 goto out;
3591 }
3592 trans = btrfs_start_transaction(root, 1);
3593 if (IS_ERR(trans)) {
3594 ret = PTR_ERR(trans);
3595 goto out;
3596 }
3597 btrfs_debug(fs_info, "auto deleting %Lu",
3598 found_key.objectid);
3599 ret = btrfs_del_orphan_item(trans, root,
3600 found_key.objectid);
3601 btrfs_end_transaction(trans);
3602 if (ret)
3603 goto out;
3604 continue;
3605 }
3606
3607 nr_unlink++;
3608
3609
3610 iput(inode);
3611 }
3612
3613 btrfs_release_path(path);
3614
3615 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3616
3617 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3618 trans = btrfs_join_transaction(root);
3619 if (!IS_ERR(trans))
3620 btrfs_end_transaction(trans);
3621 }
3622
3623 if (nr_unlink)
3624 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3625
3626out:
3627 if (ret)
3628 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3629 btrfs_free_path(path);
3630 return ret;
3631}
3632
3633
3634
3635
3636
3637
3638
3639static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3640 int slot, u64 objectid,
3641 int *first_xattr_slot)
3642{
3643 u32 nritems = btrfs_header_nritems(leaf);
3644 struct btrfs_key found_key;
3645 static u64 xattr_access = 0;
3646 static u64 xattr_default = 0;
3647 int scanned = 0;
3648
3649 if (!xattr_access) {
3650 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3651 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3652 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3653 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3654 }
3655
3656 slot++;
3657 *first_xattr_slot = -1;
3658 while (slot < nritems) {
3659 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3660
3661
3662 if (found_key.objectid != objectid)
3663 return 0;
3664
3665
3666 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3667 if (*first_xattr_slot == -1)
3668 *first_xattr_slot = slot;
3669 if (found_key.offset == xattr_access ||
3670 found_key.offset == xattr_default)
3671 return 1;
3672 }
3673
3674
3675
3676
3677
3678 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3679 return 0;
3680
3681 slot++;
3682 scanned++;
3683
3684
3685
3686
3687
3688
3689
3690 if (scanned >= 8)
3691 break;
3692 }
3693
3694
3695
3696
3697 if (*first_xattr_slot == -1)
3698 *first_xattr_slot = slot;
3699 return 1;
3700}
3701
3702
3703
3704
3705static int btrfs_read_locked_inode(struct inode *inode,
3706 struct btrfs_path *in_path)
3707{
3708 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3709 struct btrfs_path *path = in_path;
3710 struct extent_buffer *leaf;
3711 struct btrfs_inode_item *inode_item;
3712 struct btrfs_root *root = BTRFS_I(inode)->root;
3713 struct btrfs_key location;
3714 unsigned long ptr;
3715 int maybe_acls;
3716 u32 rdev;
3717 int ret;
3718 bool filled = false;
3719 int first_xattr_slot;
3720
3721 ret = btrfs_fill_inode(inode, &rdev);
3722 if (!ret)
3723 filled = true;
3724
3725 if (!path) {
3726 path = btrfs_alloc_path();
3727 if (!path)
3728 return -ENOMEM;
3729 }
3730
3731 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3732
3733 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3734 if (ret) {
3735 if (path != in_path)
3736 btrfs_free_path(path);
3737 return ret;
3738 }
3739
3740 leaf = path->nodes[0];
3741
3742 if (filled)
3743 goto cache_index;
3744
3745 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3746 struct btrfs_inode_item);
3747 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3748 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3749 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3750 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3751 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3752 btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
3753 round_up(i_size_read(inode), fs_info->sectorsize));
3754
3755 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3756 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3757
3758 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3759 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3760
3761 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3762 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3763
3764 BTRFS_I(inode)->i_otime.tv_sec =
3765 btrfs_timespec_sec(leaf, &inode_item->otime);
3766 BTRFS_I(inode)->i_otime.tv_nsec =
3767 btrfs_timespec_nsec(leaf, &inode_item->otime);
3768
3769 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3770 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3771 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3772
3773 inode_set_iversion_queried(inode,
3774 btrfs_inode_sequence(leaf, inode_item));
3775 inode->i_generation = BTRFS_I(inode)->generation;
3776 inode->i_rdev = 0;
3777 rdev = btrfs_inode_rdev(leaf, inode_item);
3778
3779 BTRFS_I(inode)->index_cnt = (u64)-1;
3780 btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
3781 &BTRFS_I(inode)->flags, &BTRFS_I(inode)->ro_flags);
3782
3783cache_index:
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793 if (BTRFS_I(inode)->last_trans == fs_info->generation)
3794 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3795 &BTRFS_I(inode)->runtime_flags);
3796
3797
3798
3799
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3825
3826
3827
3828
3829
3830
3831
3832 BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
3833
3834 path->slots[0]++;
3835 if (inode->i_nlink != 1 ||
3836 path->slots[0] >= btrfs_header_nritems(leaf))
3837 goto cache_acl;
3838
3839 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3840 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
3841 goto cache_acl;
3842
3843 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3844 if (location.type == BTRFS_INODE_REF_KEY) {
3845 struct btrfs_inode_ref *ref;
3846
3847 ref = (struct btrfs_inode_ref *)ptr;
3848 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3849 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3850 struct btrfs_inode_extref *extref;
3851
3852 extref = (struct btrfs_inode_extref *)ptr;
3853 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3854 extref);
3855 }
3856cache_acl:
3857
3858
3859
3860
3861 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3862 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
3863 if (first_xattr_slot != -1) {
3864 path->slots[0] = first_xattr_slot;
3865 ret = btrfs_load_inode_props(inode, path);
3866 if (ret)
3867 btrfs_err(fs_info,
3868 "error loading props for ino %llu (root %llu): %d",
3869 btrfs_ino(BTRFS_I(inode)),
3870 root->root_key.objectid, ret);
3871 }
3872 if (path != in_path)
3873 btrfs_free_path(path);
3874
3875 if (!maybe_acls)
3876 cache_no_acl(inode);
3877
3878 switch (inode->i_mode & S_IFMT) {
3879 case S_IFREG:
3880 inode->i_mapping->a_ops = &btrfs_aops;
3881 inode->i_fop = &btrfs_file_operations;
3882 inode->i_op = &btrfs_file_inode_operations;
3883 break;
3884 case S_IFDIR:
3885 inode->i_fop = &btrfs_dir_file_operations;
3886 inode->i_op = &btrfs_dir_inode_operations;
3887 break;
3888 case S_IFLNK:
3889 inode->i_op = &btrfs_symlink_inode_operations;
3890 inode_nohighmem(inode);
3891 inode->i_mapping->a_ops = &btrfs_aops;
3892 break;
3893 default:
3894 inode->i_op = &btrfs_special_inode_operations;
3895 init_special_inode(inode, inode->i_mode, rdev);
3896 break;
3897 }
3898
3899 btrfs_sync_inode_flags_to_i_flags(inode);
3900 return 0;
3901}
3902
3903
3904
3905
3906static void fill_inode_item(struct btrfs_trans_handle *trans,
3907 struct extent_buffer *leaf,
3908 struct btrfs_inode_item *item,
3909 struct inode *inode)
3910{
3911 struct btrfs_map_token token;
3912 u64 flags;
3913
3914 btrfs_init_map_token(&token, leaf);
3915
3916 btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
3917 btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
3918 btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
3919 btrfs_set_token_inode_mode(&token, item, inode->i_mode);
3920 btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
3921
3922 btrfs_set_token_timespec_sec(&token, &item->atime,
3923 inode->i_atime.tv_sec);
3924 btrfs_set_token_timespec_nsec(&token, &item->atime,
3925 inode->i_atime.tv_nsec);
3926
3927 btrfs_set_token_timespec_sec(&token, &item->mtime,
3928 inode->i_mtime.tv_sec);
3929 btrfs_set_token_timespec_nsec(&token, &item->mtime,
3930 inode->i_mtime.tv_nsec);
3931
3932 btrfs_set_token_timespec_sec(&token, &item->ctime,
3933 inode->i_ctime.tv_sec);
3934 btrfs_set_token_timespec_nsec(&token, &item->ctime,
3935 inode->i_ctime.tv_nsec);
3936
3937 btrfs_set_token_timespec_sec(&token, &item->otime,
3938 BTRFS_I(inode)->i_otime.tv_sec);
3939 btrfs_set_token_timespec_nsec(&token, &item->otime,
3940 BTRFS_I(inode)->i_otime.tv_nsec);
3941
3942 btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
3943 btrfs_set_token_inode_generation(&token, item,
3944 BTRFS_I(inode)->generation);
3945 btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
3946 btrfs_set_token_inode_transid(&token, item, trans->transid);
3947 btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
3948 flags = btrfs_inode_combine_flags(BTRFS_I(inode)->flags,
3949 BTRFS_I(inode)->ro_flags);
3950 btrfs_set_token_inode_flags(&token, item, flags);
3951 btrfs_set_token_inode_block_group(&token, item, 0);
3952}
3953
3954
3955
3956
3957static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3958 struct btrfs_root *root,
3959 struct btrfs_inode *inode)
3960{
3961 struct btrfs_inode_item *inode_item;
3962 struct btrfs_path *path;
3963 struct extent_buffer *leaf;
3964 int ret;
3965
3966 path = btrfs_alloc_path();
3967 if (!path)
3968 return -ENOMEM;
3969
3970 ret = btrfs_lookup_inode(trans, root, path, &inode->location, 1);
3971 if (ret) {
3972 if (ret > 0)
3973 ret = -ENOENT;
3974 goto failed;
3975 }
3976
3977 leaf = path->nodes[0];
3978 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3979 struct btrfs_inode_item);
3980
3981 fill_inode_item(trans, leaf, inode_item, &inode->vfs_inode);
3982 btrfs_mark_buffer_dirty(leaf);
3983 btrfs_set_inode_last_trans(trans, inode);
3984 ret = 0;
3985failed:
3986 btrfs_free_path(path);
3987 return ret;
3988}
3989
3990
3991
3992
3993noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3994 struct btrfs_root *root,
3995 struct btrfs_inode *inode)
3996{
3997 struct btrfs_fs_info *fs_info = root->fs_info;
3998 int ret;
3999
4000
4001
4002
4003
4004
4005
4006
4007 if (!btrfs_is_free_space_inode(inode)
4008 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
4009 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
4010 btrfs_update_root_times(trans, root);
4011
4012 ret = btrfs_delayed_update_inode(trans, root, inode);
4013 if (!ret)
4014 btrfs_set_inode_last_trans(trans, inode);
4015 return ret;
4016 }
4017
4018 return btrfs_update_inode_item(trans, root, inode);
4019}
4020
4021int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
4022 struct btrfs_root *root, struct btrfs_inode *inode)
4023{
4024 int ret;
4025
4026 ret = btrfs_update_inode(trans, root, inode);
4027 if (ret == -ENOSPC)
4028 return btrfs_update_inode_item(trans, root, inode);
4029 return ret;
4030}
4031
4032
4033
4034
4035
4036
4037static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4038 struct btrfs_root *root,
4039 struct btrfs_inode *dir,
4040 struct btrfs_inode *inode,
4041 const char *name, int name_len)
4042{
4043 struct btrfs_fs_info *fs_info = root->fs_info;
4044 struct btrfs_path *path;
4045 int ret = 0;
4046 struct btrfs_dir_item *di;
4047 u64 index;
4048 u64 ino = btrfs_ino(inode);
4049 u64 dir_ino = btrfs_ino(dir);
4050
4051 path = btrfs_alloc_path();
4052 if (!path) {
4053 ret = -ENOMEM;
4054 goto out;
4055 }
4056
4057 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4058 name, name_len, -1);
4059 if (IS_ERR_OR_NULL(di)) {
4060 ret = di ? PTR_ERR(di) : -ENOENT;
4061 goto err;
4062 }
4063 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4064 if (ret)
4065 goto err;
4066 btrfs_release_path(path);
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078 if (inode->dir_index) {
4079 ret = btrfs_delayed_delete_inode_ref(inode);
4080 if (!ret) {
4081 index = inode->dir_index;
4082 goto skip_backref;
4083 }
4084 }
4085
4086 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
4087 dir_ino, &index);
4088 if (ret) {
4089 btrfs_info(fs_info,
4090 "failed to delete reference to %.*s, inode %llu parent %llu",
4091 name_len, name, ino, dir_ino);
4092 btrfs_abort_transaction(trans, ret);
4093 goto err;
4094 }
4095skip_backref:
4096 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
4097 if (ret) {
4098 btrfs_abort_transaction(trans, ret);
4099 goto err;
4100 }
4101
4102 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
4103 dir_ino);
4104 if (ret != 0 && ret != -ENOENT) {
4105 btrfs_abort_transaction(trans, ret);
4106 goto err;
4107 }
4108
4109 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
4110 index);
4111 if (ret == -ENOENT)
4112 ret = 0;
4113 else if (ret)
4114 btrfs_abort_transaction(trans, ret);
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125 btrfs_run_delayed_iput(fs_info, inode);
4126err:
4127 btrfs_free_path(path);
4128 if (ret)
4129 goto out;
4130
4131 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
4132 inode_inc_iversion(&inode->vfs_inode);
4133 inode_inc_iversion(&dir->vfs_inode);
4134 inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
4135 dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
4136 ret = btrfs_update_inode(trans, root, dir);
4137out:
4138 return ret;
4139}
4140
4141int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4142 struct btrfs_root *root,
4143 struct btrfs_inode *dir, struct btrfs_inode *inode,
4144 const char *name, int name_len)
4145{
4146 int ret;
4147 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
4148 if (!ret) {
4149 drop_nlink(&inode->vfs_inode);
4150 ret = btrfs_update_inode(trans, root, inode);
4151 }
4152 return ret;
4153}
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4164{
4165 struct btrfs_root *root = BTRFS_I(dir)->root;
4166
4167
4168
4169
4170
4171
4172
4173
4174 return btrfs_start_transaction_fallback_global_rsv(root, 5);
4175}
4176
4177static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4178{
4179 struct btrfs_root *root = BTRFS_I(dir)->root;
4180 struct btrfs_trans_handle *trans;
4181 struct inode *inode = d_inode(dentry);
4182 int ret;
4183
4184 trans = __unlink_start_trans(dir);
4185 if (IS_ERR(trans))
4186 return PTR_ERR(trans);
4187
4188 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
4189 0);
4190
4191 ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4192 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4193 dentry->d_name.len);
4194 if (ret)
4195 goto out;
4196
4197 if (inode->i_nlink == 0) {
4198 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
4199 if (ret)
4200 goto out;
4201 }
4202
4203out:
4204 btrfs_end_transaction(trans);
4205 btrfs_btree_balance_dirty(root->fs_info);
4206 return ret;
4207}
4208
4209static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4210 struct inode *dir, struct dentry *dentry)
4211{
4212 struct btrfs_root *root = BTRFS_I(dir)->root;
4213 struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
4214 struct btrfs_path *path;
4215 struct extent_buffer *leaf;
4216 struct btrfs_dir_item *di;
4217 struct btrfs_key key;
4218 const char *name = dentry->d_name.name;
4219 int name_len = dentry->d_name.len;
4220 u64 index;
4221 int ret;
4222 u64 objectid;
4223 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
4224
4225 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
4226 objectid = inode->root->root_key.objectid;
4227 } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
4228 objectid = inode->location.objectid;
4229 } else {
4230 WARN_ON(1);
4231 return -EINVAL;
4232 }
4233
4234 path = btrfs_alloc_path();
4235 if (!path)
4236 return -ENOMEM;
4237
4238 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4239 name, name_len, -1);
4240 if (IS_ERR_OR_NULL(di)) {
4241 ret = di ? PTR_ERR(di) : -ENOENT;
4242 goto out;
4243 }
4244
4245 leaf = path->nodes[0];
4246 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4247 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4248 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4249 if (ret) {
4250 btrfs_abort_transaction(trans, ret);
4251 goto out;
4252 }
4253 btrfs_release_path(path);
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
4265 di = btrfs_search_dir_index_item(root, path, dir_ino,
4266 name, name_len);
4267 if (IS_ERR_OR_NULL(di)) {
4268 if (!di)
4269 ret = -ENOENT;
4270 else
4271 ret = PTR_ERR(di);
4272 btrfs_abort_transaction(trans, ret);
4273 goto out;
4274 }
4275
4276 leaf = path->nodes[0];
4277 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4278 index = key.offset;
4279 btrfs_release_path(path);
4280 } else {
4281 ret = btrfs_del_root_ref(trans, objectid,
4282 root->root_key.objectid, dir_ino,
4283 &index, name, name_len);
4284 if (ret) {
4285 btrfs_abort_transaction(trans, ret);
4286 goto out;
4287 }
4288 }
4289
4290 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4291 if (ret) {
4292 btrfs_abort_transaction(trans, ret);
4293 goto out;
4294 }
4295
4296 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
4297 inode_inc_iversion(dir);
4298 dir->i_mtime = dir->i_ctime = current_time(dir);
4299 ret = btrfs_update_inode_fallback(trans, root, BTRFS_I(dir));
4300 if (ret)
4301 btrfs_abort_transaction(trans, ret);
4302out:
4303 btrfs_free_path(path);
4304 return ret;
4305}
4306
4307
4308
4309
4310
4311static noinline int may_destroy_subvol(struct btrfs_root *root)
4312{
4313 struct btrfs_fs_info *fs_info = root->fs_info;
4314 struct btrfs_path *path;
4315 struct btrfs_dir_item *di;
4316 struct btrfs_key key;
4317 u64 dir_id;
4318 int ret;
4319
4320 path = btrfs_alloc_path();
4321 if (!path)
4322 return -ENOMEM;
4323
4324
4325 dir_id = btrfs_super_root_dir(fs_info->super_copy);
4326 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
4327 dir_id, "default", 7, 0);
4328 if (di && !IS_ERR(di)) {
4329 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
4330 if (key.objectid == root->root_key.objectid) {
4331 ret = -EPERM;
4332 btrfs_err(fs_info,
4333 "deleting default subvolume %llu is not allowed",
4334 key.objectid);
4335 goto out;
4336 }
4337 btrfs_release_path(path);
4338 }
4339
4340 key.objectid = root->root_key.objectid;
4341 key.type = BTRFS_ROOT_REF_KEY;
4342 key.offset = (u64)-1;
4343
4344 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4345 if (ret < 0)
4346 goto out;
4347 BUG_ON(ret == 0);
4348
4349 ret = 0;
4350 if (path->slots[0] > 0) {
4351 path->slots[0]--;
4352 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4353 if (key.objectid == root->root_key.objectid &&
4354 key.type == BTRFS_ROOT_REF_KEY)
4355 ret = -ENOTEMPTY;
4356 }
4357out:
4358 btrfs_free_path(path);
4359 return ret;
4360}
4361
4362
4363static void btrfs_prune_dentries(struct btrfs_root *root)
4364{
4365 struct btrfs_fs_info *fs_info = root->fs_info;
4366 struct rb_node *node;
4367 struct rb_node *prev;
4368 struct btrfs_inode *entry;
4369 struct inode *inode;
4370 u64 objectid = 0;
4371
4372 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
4373 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
4374
4375 spin_lock(&root->inode_lock);
4376again:
4377 node = root->inode_tree.rb_node;
4378 prev = NULL;
4379 while (node) {
4380 prev = node;
4381 entry = rb_entry(node, struct btrfs_inode, rb_node);
4382
4383 if (objectid < btrfs_ino(entry))
4384 node = node->rb_left;
4385 else if (objectid > btrfs_ino(entry))
4386 node = node->rb_right;
4387 else
4388 break;
4389 }
4390 if (!node) {
4391 while (prev) {
4392 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4393 if (objectid <= btrfs_ino(entry)) {
4394 node = prev;
4395 break;
4396 }
4397 prev = rb_next(prev);
4398 }
4399 }
4400 while (node) {
4401 entry = rb_entry(node, struct btrfs_inode, rb_node);
4402 objectid = btrfs_ino(entry) + 1;
4403 inode = igrab(&entry->vfs_inode);
4404 if (inode) {
4405 spin_unlock(&root->inode_lock);
4406 if (atomic_read(&inode->i_count) > 1)
4407 d_prune_aliases(inode);
4408
4409
4410
4411
4412 iput(inode);
4413 cond_resched();
4414 spin_lock(&root->inode_lock);
4415 goto again;
4416 }
4417
4418 if (cond_resched_lock(&root->inode_lock))
4419 goto again;
4420
4421 node = rb_next(node);
4422 }
4423 spin_unlock(&root->inode_lock);
4424}
4425
4426int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4427{
4428 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
4429 struct btrfs_root *root = BTRFS_I(dir)->root;
4430 struct inode *inode = d_inode(dentry);
4431 struct btrfs_root *dest = BTRFS_I(inode)->root;
4432 struct btrfs_trans_handle *trans;
4433 struct btrfs_block_rsv block_rsv;
4434 u64 root_flags;
4435 int ret;
4436
4437
4438
4439
4440
4441
4442 spin_lock(&dest->root_item_lock);
4443 if (dest->send_in_progress) {
4444 spin_unlock(&dest->root_item_lock);
4445 btrfs_warn(fs_info,
4446 "attempt to delete subvolume %llu during send",
4447 dest->root_key.objectid);
4448 return -EPERM;
4449 }
4450 root_flags = btrfs_root_flags(&dest->root_item);
4451 btrfs_set_root_flags(&dest->root_item,
4452 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
4453 spin_unlock(&dest->root_item_lock);
4454
4455 down_write(&fs_info->subvol_sem);
4456
4457 ret = may_destroy_subvol(dest);
4458 if (ret)
4459 goto out_up_write;
4460
4461 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
4462
4463
4464
4465
4466
4467 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
4468 if (ret)
4469 goto out_up_write;
4470
4471 trans = btrfs_start_transaction(root, 0);
4472 if (IS_ERR(trans)) {
4473 ret = PTR_ERR(trans);
4474 goto out_release;
4475 }
4476 trans->block_rsv = &block_rsv;
4477 trans->bytes_reserved = block_rsv.size;
4478
4479 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4480
4481 ret = btrfs_unlink_subvol(trans, dir, dentry);
4482 if (ret) {
4483 btrfs_abort_transaction(trans, ret);
4484 goto out_end_trans;
4485 }
4486
4487 ret = btrfs_record_root_in_trans(trans, dest);
4488 if (ret) {
4489 btrfs_abort_transaction(trans, ret);
4490 goto out_end_trans;
4491 }
4492
4493 memset(&dest->root_item.drop_progress, 0,
4494 sizeof(dest->root_item.drop_progress));
4495 btrfs_set_root_drop_level(&dest->root_item, 0);
4496 btrfs_set_root_refs(&dest->root_item, 0);
4497
4498 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4499 ret = btrfs_insert_orphan_item(trans,
4500 fs_info->tree_root,
4501 dest->root_key.objectid);
4502 if (ret) {
4503 btrfs_abort_transaction(trans, ret);
4504 goto out_end_trans;
4505 }
4506 }
4507
4508 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4509 BTRFS_UUID_KEY_SUBVOL,
4510 dest->root_key.objectid);
4511 if (ret && ret != -ENOENT) {
4512 btrfs_abort_transaction(trans, ret);
4513 goto out_end_trans;
4514 }
4515 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4516 ret = btrfs_uuid_tree_remove(trans,
4517 dest->root_item.received_uuid,
4518 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4519 dest->root_key.objectid);
4520 if (ret && ret != -ENOENT) {
4521 btrfs_abort_transaction(trans, ret);
4522 goto out_end_trans;
4523 }
4524 }
4525
4526 free_anon_bdev(dest->anon_dev);
4527 dest->anon_dev = 0;
4528out_end_trans:
4529 trans->block_rsv = NULL;
4530 trans->bytes_reserved = 0;
4531 ret = btrfs_end_transaction(trans);
4532 inode->i_flags |= S_DEAD;
4533out_release:
4534 btrfs_subvolume_release_metadata(root, &block_rsv);
4535out_up_write:
4536 up_write(&fs_info->subvol_sem);
4537 if (ret) {
4538 spin_lock(&dest->root_item_lock);
4539 root_flags = btrfs_root_flags(&dest->root_item);
4540 btrfs_set_root_flags(&dest->root_item,
4541 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4542 spin_unlock(&dest->root_item_lock);
4543 } else {
4544 d_invalidate(dentry);
4545 btrfs_prune_dentries(dest);
4546 ASSERT(dest->send_in_progress == 0);
4547 }
4548
4549 return ret;
4550}
4551
4552static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4553{
4554 struct inode *inode = d_inode(dentry);
4555 int err = 0;
4556 struct btrfs_root *root = BTRFS_I(dir)->root;
4557 struct btrfs_trans_handle *trans;
4558 u64 last_unlink_trans;
4559
4560 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4561 return -ENOTEMPTY;
4562 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
4563 return btrfs_delete_subvolume(dir, dentry);
4564
4565 trans = __unlink_start_trans(dir);
4566 if (IS_ERR(trans))
4567 return PTR_ERR(trans);
4568
4569 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4570 err = btrfs_unlink_subvol(trans, dir, dentry);
4571 goto out;
4572 }
4573
4574 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4575 if (err)
4576 goto out;
4577
4578 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4579
4580
4581 err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4582 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4583 dentry->d_name.len);
4584 if (!err) {
4585 btrfs_i_size_write(BTRFS_I(inode), 0);
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597 if (last_unlink_trans >= trans->transid)
4598 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4599 }
4600out:
4601 btrfs_end_transaction(trans);
4602 btrfs_btree_balance_dirty(root->fs_info);
4603
4604 return err;
4605}
4606
4607
4608
4609
4610
4611#define NEED_TRUNCATE_BLOCK 1
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4640 struct btrfs_root *root,
4641 struct btrfs_inode *inode,
4642 u64 new_size, u32 min_type,
4643 u64 *extents_found)
4644{
4645 struct btrfs_fs_info *fs_info = root->fs_info;
4646 struct btrfs_path *path;
4647 struct extent_buffer *leaf;
4648 struct btrfs_file_extent_item *fi;
4649 struct btrfs_key key;
4650 struct btrfs_key found_key;
4651 u64 extent_start = 0;
4652 u64 extent_num_bytes = 0;
4653 u64 extent_offset = 0;
4654 u64 item_end = 0;
4655 u64 last_size = new_size;
4656 u32 found_type = (u8)-1;
4657 int found_extent;
4658 int del_item;
4659 int pending_del_nr = 0;
4660 int pending_del_slot = 0;
4661 int extent_type = -1;
4662 int ret;
4663 u64 ino = btrfs_ino(inode);
4664 u64 bytes_deleted = 0;
4665 bool be_nice = false;
4666 bool should_throttle = false;
4667 const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
4668 struct extent_state *cached_state = NULL;
4669
4670 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4671
4672
4673
4674
4675
4676
4677 if (!btrfs_is_free_space_inode(inode) &&
4678 test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
4679 be_nice = true;
4680
4681 path = btrfs_alloc_path();
4682 if (!path)
4683 return -ENOMEM;
4684 path->reada = READA_BACK;
4685
4686 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4687 lock_extent_bits(&inode->io_tree, lock_start, (u64)-1,
4688 &cached_state);
4689
4690
4691
4692
4693
4694
4695 btrfs_drop_extent_cache(inode, ALIGN(new_size,
4696 fs_info->sectorsize),
4697 (u64)-1, 0);
4698 }
4699
4700
4701
4702
4703
4704
4705
4706 if (min_type == 0 && root == inode->root)
4707 btrfs_kill_delayed_inode_items(inode);
4708
4709 key.objectid = ino;
4710 key.offset = (u64)-1;
4711 key.type = (u8)-1;
4712
4713search_again:
4714
4715
4716
4717
4718
4719 if (be_nice && bytes_deleted > SZ_32M &&
4720 btrfs_should_end_transaction(trans)) {
4721 ret = -EAGAIN;
4722 goto out;
4723 }
4724
4725 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4726 if (ret < 0)
4727 goto out;
4728
4729 if (ret > 0) {
4730 ret = 0;
4731
4732
4733
4734 if (path->slots[0] == 0)
4735 goto out;
4736 path->slots[0]--;
4737 }
4738
4739 while (1) {
4740 u64 clear_start = 0, clear_len = 0;
4741
4742 fi = NULL;
4743 leaf = path->nodes[0];
4744 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4745 found_type = found_key.type;
4746
4747 if (found_key.objectid != ino)
4748 break;
4749
4750 if (found_type < min_type)
4751 break;
4752
4753 item_end = found_key.offset;
4754 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4755 fi = btrfs_item_ptr(leaf, path->slots[0],
4756 struct btrfs_file_extent_item);
4757 extent_type = btrfs_file_extent_type(leaf, fi);
4758 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4759 item_end +=
4760 btrfs_file_extent_num_bytes(leaf, fi);
4761
4762 trace_btrfs_truncate_show_fi_regular(
4763 inode, leaf, fi, found_key.offset);
4764 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4765 item_end += btrfs_file_extent_ram_bytes(leaf,
4766 fi);
4767
4768 trace_btrfs_truncate_show_fi_inline(
4769 inode, leaf, fi, path->slots[0],
4770 found_key.offset);
4771 }
4772 item_end--;
4773 }
4774 if (found_type > min_type) {
4775 del_item = 1;
4776 } else {
4777 if (item_end < new_size)
4778 break;
4779 if (found_key.offset >= new_size)
4780 del_item = 1;
4781 else
4782 del_item = 0;
4783 }
4784 found_extent = 0;
4785
4786 if (found_type != BTRFS_EXTENT_DATA_KEY)
4787 goto delete;
4788
4789 if (extents_found != NULL)
4790 (*extents_found)++;
4791
4792 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4793 u64 num_dec;
4794
4795 clear_start = found_key.offset;
4796 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4797 if (!del_item) {
4798 u64 orig_num_bytes =
4799 btrfs_file_extent_num_bytes(leaf, fi);
4800 extent_num_bytes = ALIGN(new_size -
4801 found_key.offset,
4802 fs_info->sectorsize);
4803 clear_start = ALIGN(new_size, fs_info->sectorsize);
4804 btrfs_set_file_extent_num_bytes(leaf, fi,
4805 extent_num_bytes);
4806 num_dec = (orig_num_bytes -
4807 extent_num_bytes);
4808 if (test_bit(BTRFS_ROOT_SHAREABLE,
4809 &root->state) &&
4810 extent_start != 0)
4811 inode_sub_bytes(&inode->vfs_inode,
4812 num_dec);
4813 btrfs_mark_buffer_dirty(leaf);
4814 } else {
4815 extent_num_bytes =
4816 btrfs_file_extent_disk_num_bytes(leaf,
4817 fi);
4818 extent_offset = found_key.offset -
4819 btrfs_file_extent_offset(leaf, fi);
4820
4821
4822 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4823 if (extent_start != 0) {
4824 found_extent = 1;
4825 if (test_bit(BTRFS_ROOT_SHAREABLE,
4826 &root->state))
4827 inode_sub_bytes(&inode->vfs_inode,
4828 num_dec);
4829 }
4830 }
4831 clear_len = num_dec;
4832 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4833
4834
4835
4836
4837 if (!del_item &&
4838 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4839 btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
4840 btrfs_file_extent_compression(leaf, fi) == 0) {
4841 u32 size = (u32)(new_size - found_key.offset);
4842
4843 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4844 size = btrfs_file_extent_calc_inline_size(size);
4845 btrfs_truncate_item(path, size, 1);
4846 } else if (!del_item) {
4847
4848
4849
4850
4851 ret = NEED_TRUNCATE_BLOCK;
4852 break;
4853 } else {
4854
4855
4856
4857
4858
4859 clear_len = fs_info->sectorsize;
4860 }
4861
4862 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
4863 inode_sub_bytes(&inode->vfs_inode,
4864 item_end + 1 - new_size);
4865 }
4866delete:
4867
4868
4869
4870
4871
4872 if (root == inode->root) {
4873 ret = btrfs_inode_clear_file_extent_range(inode,
4874 clear_start, clear_len);
4875 if (ret) {
4876 btrfs_abort_transaction(trans, ret);
4877 break;
4878 }
4879 }
4880
4881 if (del_item)
4882 last_size = found_key.offset;
4883 else
4884 last_size = new_size;
4885 if (del_item) {
4886 if (!pending_del_nr) {
4887
4888 pending_del_slot = path->slots[0];
4889 pending_del_nr = 1;
4890 } else if (pending_del_nr &&
4891 path->slots[0] + 1 == pending_del_slot) {
4892
4893 pending_del_nr++;
4894 pending_del_slot = path->slots[0];
4895 } else {
4896 BUG();
4897 }
4898 } else {
4899 break;
4900 }
4901 should_throttle = false;
4902
4903 if (found_extent &&
4904 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4905 struct btrfs_ref ref = { 0 };
4906
4907 bytes_deleted += extent_num_bytes;
4908
4909 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
4910 extent_start, extent_num_bytes, 0);
4911 ref.real_root = root->root_key.objectid;
4912 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
4913 ino, extent_offset);
4914 ret = btrfs_free_extent(trans, &ref);
4915 if (ret) {
4916 btrfs_abort_transaction(trans, ret);
4917 break;
4918 }
4919 if (be_nice) {
4920 if (btrfs_should_throttle_delayed_refs(trans))
4921 should_throttle = true;
4922 }
4923 }
4924
4925 if (found_type == BTRFS_INODE_ITEM_KEY)
4926 break;
4927
4928 if (path->slots[0] == 0 ||
4929 path->slots[0] != pending_del_slot ||
4930 should_throttle) {
4931 if (pending_del_nr) {
4932 ret = btrfs_del_items(trans, root, path,
4933 pending_del_slot,
4934 pending_del_nr);
4935 if (ret) {
4936 btrfs_abort_transaction(trans, ret);
4937 break;
4938 }
4939 pending_del_nr = 0;
4940 }
4941 btrfs_release_path(path);
4942
4943
4944
4945
4946
4947
4948
4949
4950
4951
4952
4953 if (should_throttle) {
4954 ret = btrfs_delayed_refs_rsv_refill(fs_info,
4955 BTRFS_RESERVE_NO_FLUSH);
4956 if (ret) {
4957 ret = -EAGAIN;
4958 break;
4959 }
4960 }
4961 goto search_again;
4962 } else {
4963 path->slots[0]--;
4964 }
4965 }
4966out:
4967 if (ret >= 0 && pending_del_nr) {
4968 int err;
4969
4970 err = btrfs_del_items(trans, root, path, pending_del_slot,
4971 pending_del_nr);
4972 if (err) {
4973 btrfs_abort_transaction(trans, err);
4974 ret = err;
4975 }
4976 }
4977 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4978 ASSERT(last_size >= new_size);
4979 if (!ret && last_size > new_size)
4980 last_size = new_size;
4981 btrfs_inode_safe_disk_i_size_write(inode, last_size);
4982 unlock_extent_cached(&inode->io_tree, lock_start, (u64)-1,
4983 &cached_state);
4984 }
4985
4986 btrfs_free_path(path);
4987 return ret;
4988}
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
5002 int front)
5003{
5004 struct btrfs_fs_info *fs_info = inode->root->fs_info;
5005 struct address_space *mapping = inode->vfs_inode.i_mapping;
5006 struct extent_io_tree *io_tree = &inode->io_tree;
5007 struct btrfs_ordered_extent *ordered;
5008 struct extent_state *cached_state = NULL;
5009 struct extent_changeset *data_reserved = NULL;
5010 bool only_release_metadata = false;
5011 u32 blocksize = fs_info->sectorsize;
5012 pgoff_t index = from >> PAGE_SHIFT;
5013 unsigned offset = from & (blocksize - 1);
5014 struct page *page;
5015 gfp_t mask = btrfs_alloc_write_mask(mapping);
5016 size_t write_bytes = blocksize;
5017 int ret = 0;
5018 u64 block_start;
5019 u64 block_end;
5020
5021 if (IS_ALIGNED(offset, blocksize) &&
5022 (!len || IS_ALIGNED(len, blocksize)))
5023 goto out;
5024
5025 block_start = round_down(from, blocksize);
5026 block_end = block_start + blocksize - 1;
5027
5028 ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
5029 blocksize);
5030 if (ret < 0) {
5031 if (btrfs_check_nocow_lock(inode, block_start, &write_bytes) > 0) {
5032
5033 only_release_metadata = true;
5034 } else {
5035 goto out;
5036 }
5037 }
5038 ret = btrfs_delalloc_reserve_metadata(inode, blocksize);
5039 if (ret < 0) {
5040 if (!only_release_metadata)
5041 btrfs_free_reserved_data_space(inode, data_reserved,
5042 block_start, blocksize);
5043 goto out;
5044 }
5045again:
5046 page = find_or_create_page(mapping, index, mask);
5047 if (!page) {
5048 btrfs_delalloc_release_space(inode, data_reserved, block_start,
5049 blocksize, true);
5050 btrfs_delalloc_release_extents(inode, blocksize);
5051 ret = -ENOMEM;
5052 goto out;
5053 }
5054 ret = set_page_extent_mapped(page);
5055 if (ret < 0)
5056 goto out_unlock;
5057
5058 if (!PageUptodate(page)) {
5059 ret = btrfs_readpage(NULL, page);
5060 lock_page(page);
5061 if (page->mapping != mapping) {
5062 unlock_page(page);
5063 put_page(page);
5064 goto again;
5065 }
5066 if (!PageUptodate(page)) {
5067 ret = -EIO;
5068 goto out_unlock;
5069 }
5070 }
5071 wait_on_page_writeback(page);
5072
5073 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
5074
5075 ordered = btrfs_lookup_ordered_extent(inode, block_start);
5076 if (ordered) {
5077 unlock_extent_cached(io_tree, block_start, block_end,
5078 &cached_state);
5079 unlock_page(page);
5080 put_page(page);
5081 btrfs_start_ordered_extent(ordered, 1);
5082 btrfs_put_ordered_extent(ordered);
5083 goto again;
5084 }
5085
5086 clear_extent_bit(&inode->io_tree, block_start, block_end,
5087 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
5088 0, 0, &cached_state);
5089
5090 ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
5091 &cached_state);
5092 if (ret) {
5093 unlock_extent_cached(io_tree, block_start, block_end,
5094 &cached_state);
5095 goto out_unlock;
5096 }
5097
5098 if (offset != blocksize) {
5099 if (!len)
5100 len = blocksize - offset;
5101 if (front)
5102 memzero_page(page, (block_start - page_offset(page)),
5103 offset);
5104 else
5105 memzero_page(page, (block_start - page_offset(page)) + offset,
5106 len);
5107 flush_dcache_page(page);
5108 }
5109 ClearPageChecked(page);
5110 btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start);
5111 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
5112
5113 if (only_release_metadata)
5114 set_extent_bit(&inode->io_tree, block_start, block_end,
5115 EXTENT_NORESERVE, 0, NULL, NULL, GFP_NOFS, NULL);
5116
5117out_unlock:
5118 if (ret) {
5119 if (only_release_metadata)
5120 btrfs_delalloc_release_metadata(inode, blocksize, true);
5121 else
5122 btrfs_delalloc_release_space(inode, data_reserved,
5123 block_start, blocksize, true);
5124 }
5125 btrfs_delalloc_release_extents(inode, blocksize);
5126 unlock_page(page);
5127 put_page(page);
5128out:
5129 if (only_release_metadata)
5130 btrfs_check_nocow_unlock(inode);
5131 extent_changeset_free(data_reserved);
5132 return ret;
5133}
5134
5135static int maybe_insert_hole(struct btrfs_root *root, struct btrfs_inode *inode,
5136 u64 offset, u64 len)
5137{
5138 struct btrfs_fs_info *fs_info = root->fs_info;
5139 struct btrfs_trans_handle *trans;
5140 struct btrfs_drop_extents_args drop_args = { 0 };
5141 int ret;
5142
5143
5144
5145
5146
5147
5148
5149 if (btrfs_fs_incompat(fs_info, NO_HOLES))
5150 return 0;
5151
5152
5153
5154
5155
5156
5157 trans = btrfs_start_transaction(root, 3);
5158 if (IS_ERR(trans))
5159 return PTR_ERR(trans);
5160
5161 drop_args.start = offset;
5162 drop_args.end = offset + len;
5163 drop_args.drop_cache = true;
5164
5165 ret = btrfs_drop_extents(trans, root, inode, &drop_args);
5166 if (ret) {
5167 btrfs_abort_transaction(trans, ret);
5168 btrfs_end_transaction(trans);
5169 return ret;
5170 }
5171
5172 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
5173 offset, 0, 0, len, 0, len, 0, 0, 0);
5174 if (ret) {
5175 btrfs_abort_transaction(trans, ret);
5176 } else {
5177 btrfs_update_inode_bytes(inode, 0, drop_args.bytes_found);
5178 btrfs_update_inode(trans, root, inode);
5179 }
5180 btrfs_end_transaction(trans);
5181 return ret;
5182}
5183
5184
5185
5186
5187
5188
5189
5190int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
5191{
5192 struct btrfs_root *root = inode->root;
5193 struct btrfs_fs_info *fs_info = root->fs_info;
5194 struct extent_io_tree *io_tree = &inode->io_tree;
5195 struct extent_map *em = NULL;
5196 struct extent_state *cached_state = NULL;
5197 struct extent_map_tree *em_tree = &inode->extent_tree;
5198 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
5199 u64 block_end = ALIGN(size, fs_info->sectorsize);
5200 u64 last_byte;
5201 u64 cur_offset;
5202 u64 hole_size;
5203 int err = 0;
5204
5205
5206
5207
5208
5209
5210 err = btrfs_truncate_block(inode, oldsize, 0, 0);
5211 if (err)
5212 return err;
5213
5214 if (size <= hole_start)
5215 return 0;
5216
5217 btrfs_lock_and_flush_ordered_range(inode, hole_start, block_end - 1,
5218 &cached_state);
5219 cur_offset = hole_start;
5220 while (1) {
5221 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
5222 block_end - cur_offset);
5223 if (IS_ERR(em)) {
5224 err = PTR_ERR(em);
5225 em = NULL;
5226 break;
5227 }
5228 last_byte = min(extent_map_end(em), block_end);
5229 last_byte = ALIGN(last_byte, fs_info->sectorsize);
5230 hole_size = last_byte - cur_offset;
5231
5232 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
5233 struct extent_map *hole_em;
5234
5235 err = maybe_insert_hole(root, inode, cur_offset,
5236 hole_size);
5237 if (err)
5238 break;
5239
5240 err = btrfs_inode_set_file_extent_range(inode,
5241 cur_offset, hole_size);
5242 if (err)
5243 break;
5244
5245 btrfs_drop_extent_cache(inode, cur_offset,
5246 cur_offset + hole_size - 1, 0);
5247 hole_em = alloc_extent_map();
5248 if (!hole_em) {
5249 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
5250 &inode->runtime_flags);
5251 goto next;
5252 }
5253 hole_em->start = cur_offset;
5254 hole_em->len = hole_size;
5255 hole_em->orig_start = cur_offset;
5256
5257 hole_em->block_start = EXTENT_MAP_HOLE;
5258 hole_em->block_len = 0;
5259 hole_em->orig_block_len = 0;
5260 hole_em->ram_bytes = hole_size;
5261 hole_em->compress_type = BTRFS_COMPRESS_NONE;
5262 hole_em->generation = fs_info->generation;
5263
5264 while (1) {
5265 write_lock(&em_tree->lock);
5266 err = add_extent_mapping(em_tree, hole_em, 1);
5267 write_unlock(&em_tree->lock);
5268 if (err != -EEXIST)
5269 break;
5270 btrfs_drop_extent_cache(inode, cur_offset,
5271 cur_offset +
5272 hole_size - 1, 0);
5273 }
5274 free_extent_map(hole_em);
5275 } else {
5276 err = btrfs_inode_set_file_extent_range(inode,
5277 cur_offset, hole_size);
5278 if (err)
5279 break;
5280 }
5281next:
5282 free_extent_map(em);
5283 em = NULL;
5284 cur_offset = last_byte;
5285 if (cur_offset >= block_end)
5286 break;
5287 }
5288 free_extent_map(em);
5289 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
5290 return err;
5291}
5292
5293static int btrfs_setsize(struct inode *inode, struct iattr *attr)
5294{
5295 struct btrfs_root *root = BTRFS_I(inode)->root;
5296 struct btrfs_trans_handle *trans;
5297 loff_t oldsize = i_size_read(inode);
5298 loff_t newsize = attr->ia_size;
5299 int mask = attr->ia_valid;
5300 int ret;
5301
5302
5303
5304
5305
5306
5307
5308 if (newsize != oldsize) {
5309 inode_inc_iversion(inode);
5310 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
5311 inode->i_ctime = inode->i_mtime =
5312 current_time(inode);
5313 }
5314
5315 if (newsize > oldsize) {
5316
5317
5318
5319
5320
5321
5322
5323 btrfs_drew_write_lock(&root->snapshot_lock);
5324 ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, newsize);
5325 if (ret) {
5326 btrfs_drew_write_unlock(&root->snapshot_lock);
5327 return ret;
5328 }
5329
5330 trans = btrfs_start_transaction(root, 1);
5331 if (IS_ERR(trans)) {
5332 btrfs_drew_write_unlock(&root->snapshot_lock);
5333 return PTR_ERR(trans);
5334 }
5335
5336 i_size_write(inode, newsize);
5337 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
5338 pagecache_isize_extended(inode, oldsize, newsize);
5339 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
5340 btrfs_drew_write_unlock(&root->snapshot_lock);
5341 btrfs_end_transaction(trans);
5342 } else {
5343 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5344
5345 if (btrfs_is_zoned(fs_info)) {
5346 ret = btrfs_wait_ordered_range(inode,
5347 ALIGN(newsize, fs_info->sectorsize),
5348 (u64)-1);
5349 if (ret)
5350 return ret;
5351 }
5352
5353
5354
5355
5356
5357
5358 if (newsize == 0)
5359 set_bit(BTRFS_INODE_FLUSH_ON_CLOSE,
5360 &BTRFS_I(inode)->runtime_flags);
5361
5362 truncate_setsize(inode, newsize);
5363
5364 inode_dio_wait(inode);
5365
5366 ret = btrfs_truncate(inode, newsize == oldsize);
5367 if (ret && inode->i_nlink) {
5368 int err;
5369
5370
5371
5372
5373
5374
5375
5376 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5377 if (err)
5378 return err;
5379 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5380 }
5381 }
5382
5383 return ret;
5384}
5385
5386static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
5387 struct iattr *attr)
5388{
5389 struct inode *inode = d_inode(dentry);
5390 struct btrfs_root *root = BTRFS_I(inode)->root;
5391 int err;
5392
5393 if (btrfs_root_readonly(root))
5394 return -EROFS;
5395
5396 err = setattr_prepare(mnt_userns, dentry, attr);
5397 if (err)
5398 return err;
5399
5400 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5401 err = btrfs_setsize(inode, attr);
5402 if (err)
5403 return err;
5404 }
5405
5406 if (attr->ia_valid) {
5407 setattr_copy(mnt_userns, inode, attr);
5408 inode_inc_iversion(inode);
5409 err = btrfs_dirty_inode(inode);
5410
5411 if (!err && attr->ia_valid & ATTR_MODE)
5412 err = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
5413 }
5414
5415 return err;
5416}
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430static void evict_inode_truncate_pages(struct inode *inode)
5431{
5432 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5433 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5434 struct rb_node *node;
5435
5436 ASSERT(inode->i_state & I_FREEING);
5437 truncate_inode_pages_final(&inode->i_data);
5438
5439 write_lock(&map_tree->lock);
5440 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
5441 struct extent_map *em;
5442
5443 node = rb_first_cached(&map_tree->map);
5444 em = rb_entry(node, struct extent_map, rb_node);
5445 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5446 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5447 remove_extent_mapping(map_tree, em);
5448 free_extent_map(em);
5449 if (need_resched()) {
5450 write_unlock(&map_tree->lock);
5451 cond_resched();
5452 write_lock(&map_tree->lock);
5453 }
5454 }
5455 write_unlock(&map_tree->lock);
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473 spin_lock(&io_tree->lock);
5474 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5475 struct extent_state *state;
5476 struct extent_state *cached_state = NULL;
5477 u64 start;
5478 u64 end;
5479 unsigned state_flags;
5480
5481 node = rb_first(&io_tree->state);
5482 state = rb_entry(node, struct extent_state, rb_node);
5483 start = state->start;
5484 end = state->end;
5485 state_flags = state->state;
5486 spin_unlock(&io_tree->lock);
5487
5488 lock_extent_bits(io_tree, start, end, &cached_state);
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498 if (state_flags & EXTENT_DELALLOC)
5499 btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
5500 end - start + 1);
5501
5502 clear_extent_bit(io_tree, start, end,
5503 EXTENT_LOCKED | EXTENT_DELALLOC |
5504 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
5505 &cached_state);
5506
5507 cond_resched();
5508 spin_lock(&io_tree->lock);
5509 }
5510 spin_unlock(&io_tree->lock);
5511}
5512
5513static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5514 struct btrfs_block_rsv *rsv)
5515{
5516 struct btrfs_fs_info *fs_info = root->fs_info;
5517 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5518 struct btrfs_trans_handle *trans;
5519 u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
5520 int ret;
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535 ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra,
5536 BTRFS_RESERVE_FLUSH_EVICT);
5537 if (ret) {
5538
5539
5540
5541
5542 if (btrfs_check_space_for_delayed_refs(fs_info) ||
5543 btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) {
5544 btrfs_warn(fs_info,
5545 "could not allocate space for delete; will truncate on mount");
5546 return ERR_PTR(-ENOSPC);
5547 }
5548 delayed_refs_extra = 0;
5549 }
5550
5551 trans = btrfs_join_transaction(root);
5552 if (IS_ERR(trans))
5553 return trans;
5554
5555 if (delayed_refs_extra) {
5556 trans->block_rsv = &fs_info->trans_block_rsv;
5557 trans->bytes_reserved = delayed_refs_extra;
5558 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5559 delayed_refs_extra, 1);
5560 }
5561 return trans;
5562}
5563
5564void btrfs_evict_inode(struct inode *inode)
5565{
5566 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5567 struct btrfs_trans_handle *trans;
5568 struct btrfs_root *root = BTRFS_I(inode)->root;
5569 struct btrfs_block_rsv *rsv;
5570 int ret;
5571
5572 trace_btrfs_inode_evict(inode);
5573
5574 if (!root) {
5575 fsverity_cleanup_inode(inode);
5576 clear_inode(inode);
5577 return;
5578 }
5579
5580 evict_inode_truncate_pages(inode);
5581
5582 if (inode->i_nlink &&
5583 ((btrfs_root_refs(&root->root_item) != 0 &&
5584 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5585 btrfs_is_free_space_inode(BTRFS_I(inode))))
5586 goto no_delete;
5587
5588 if (is_bad_inode(inode))
5589 goto no_delete;
5590
5591 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5592
5593 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5594 goto no_delete;
5595
5596 if (inode->i_nlink > 0) {
5597 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5598 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5599 goto no_delete;
5600 }
5601
5602 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5603 if (ret)
5604 goto no_delete;
5605
5606 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5607 if (!rsv)
5608 goto no_delete;
5609 rsv->size = btrfs_calc_metadata_size(fs_info, 1);
5610 rsv->failfast = 1;
5611
5612 btrfs_i_size_write(BTRFS_I(inode), 0);
5613
5614 while (1) {
5615 trans = evict_refill_and_join(root, rsv);
5616 if (IS_ERR(trans))
5617 goto free_rsv;
5618
5619 trans->block_rsv = rsv;
5620
5621 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
5622 0, 0, NULL);
5623 trans->block_rsv = &fs_info->trans_block_rsv;
5624 btrfs_end_transaction(trans);
5625 btrfs_btree_balance_dirty(fs_info);
5626 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5627 goto free_rsv;
5628 else if (!ret)
5629 break;
5630 }
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640
5641 trans = evict_refill_and_join(root, rsv);
5642 if (!IS_ERR(trans)) {
5643 trans->block_rsv = rsv;
5644 btrfs_orphan_del(trans, BTRFS_I(inode));
5645 trans->block_rsv = &fs_info->trans_block_rsv;
5646 btrfs_end_transaction(trans);
5647 }
5648
5649free_rsv:
5650 btrfs_free_block_rsv(fs_info, rsv);
5651no_delete:
5652
5653
5654
5655
5656
5657 btrfs_remove_delayed_node(BTRFS_I(inode));
5658 fsverity_cleanup_inode(inode);
5659 clear_inode(inode);
5660}
5661
5662
5663
5664
5665
5666
5667
5668
5669static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5670 struct btrfs_key *location, u8 *type)
5671{
5672 const char *name = dentry->d_name.name;
5673 int namelen = dentry->d_name.len;
5674 struct btrfs_dir_item *di;
5675 struct btrfs_path *path;
5676 struct btrfs_root *root = BTRFS_I(dir)->root;
5677 int ret = 0;
5678
5679 path = btrfs_alloc_path();
5680 if (!path)
5681 return -ENOMEM;
5682
5683 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5684 name, namelen, 0);
5685 if (IS_ERR_OR_NULL(di)) {
5686 ret = di ? PTR_ERR(di) : -ENOENT;
5687 goto out;
5688 }
5689
5690 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5691 if (location->type != BTRFS_INODE_ITEM_KEY &&
5692 location->type != BTRFS_ROOT_ITEM_KEY) {
5693 ret = -EUCLEAN;
5694 btrfs_warn(root->fs_info,
5695"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5696 __func__, name, btrfs_ino(BTRFS_I(dir)),
5697 location->objectid, location->type, location->offset);
5698 }
5699 if (!ret)
5700 *type = btrfs_dir_type(path->nodes[0], di);
5701out:
5702 btrfs_free_path(path);
5703 return ret;
5704}
5705
5706
5707
5708
5709
5710
5711static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5712 struct inode *dir,
5713 struct dentry *dentry,
5714 struct btrfs_key *location,
5715 struct btrfs_root **sub_root)
5716{
5717 struct btrfs_path *path;
5718 struct btrfs_root *new_root;
5719 struct btrfs_root_ref *ref;
5720 struct extent_buffer *leaf;
5721 struct btrfs_key key;
5722 int ret;
5723 int err = 0;
5724
5725 path = btrfs_alloc_path();
5726 if (!path) {
5727 err = -ENOMEM;
5728 goto out;
5729 }
5730
5731 err = -ENOENT;
5732 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5733 key.type = BTRFS_ROOT_REF_KEY;
5734 key.offset = location->objectid;
5735
5736 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5737 if (ret) {
5738 if (ret < 0)
5739 err = ret;
5740 goto out;
5741 }
5742
5743 leaf = path->nodes[0];
5744 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5745 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5746 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5747 goto out;
5748
5749 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5750 (unsigned long)(ref + 1),
5751 dentry->d_name.len);
5752 if (ret)
5753 goto out;
5754
5755 btrfs_release_path(path);
5756
5757 new_root = btrfs_get_fs_root(fs_info, location->objectid, true);
5758 if (IS_ERR(new_root)) {
5759 err = PTR_ERR(new_root);
5760 goto out;
5761 }
5762
5763 *sub_root = new_root;
5764 location->objectid = btrfs_root_dirid(&new_root->root_item);
5765 location->type = BTRFS_INODE_ITEM_KEY;
5766 location->offset = 0;
5767 err = 0;
5768out:
5769 btrfs_free_path(path);
5770 return err;
5771}
5772
5773static void inode_tree_add(struct inode *inode)
5774{
5775 struct btrfs_root *root = BTRFS_I(inode)->root;
5776 struct btrfs_inode *entry;
5777 struct rb_node **p;
5778 struct rb_node *parent;
5779 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5780 u64 ino = btrfs_ino(BTRFS_I(inode));
5781
5782 if (inode_unhashed(inode))
5783 return;
5784 parent = NULL;
5785 spin_lock(&root->inode_lock);
5786 p = &root->inode_tree.rb_node;
5787 while (*p) {
5788 parent = *p;
5789 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5790
5791 if (ino < btrfs_ino(entry))
5792 p = &parent->rb_left;
5793 else if (ino > btrfs_ino(entry))
5794 p = &parent->rb_right;
5795 else {
5796 WARN_ON(!(entry->vfs_inode.i_state &
5797 (I_WILL_FREE | I_FREEING)));
5798 rb_replace_node(parent, new, &root->inode_tree);
5799 RB_CLEAR_NODE(parent);
5800 spin_unlock(&root->inode_lock);
5801 return;
5802 }
5803 }
5804 rb_link_node(new, parent, p);
5805 rb_insert_color(new, &root->inode_tree);
5806 spin_unlock(&root->inode_lock);
5807}
5808
5809static void inode_tree_del(struct btrfs_inode *inode)
5810{
5811 struct btrfs_root *root = inode->root;
5812 int empty = 0;
5813
5814 spin_lock(&root->inode_lock);
5815 if (!RB_EMPTY_NODE(&inode->rb_node)) {
5816 rb_erase(&inode->rb_node, &root->inode_tree);
5817 RB_CLEAR_NODE(&inode->rb_node);
5818 empty = RB_EMPTY_ROOT(&root->inode_tree);
5819 }
5820 spin_unlock(&root->inode_lock);
5821
5822 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5823 spin_lock(&root->inode_lock);
5824 empty = RB_EMPTY_ROOT(&root->inode_tree);
5825 spin_unlock(&root->inode_lock);
5826 if (empty)
5827 btrfs_add_dead_root(root);
5828 }
5829}
5830
5831
5832static int btrfs_init_locked_inode(struct inode *inode, void *p)
5833{
5834 struct btrfs_iget_args *args = p;
5835
5836 inode->i_ino = args->ino;
5837 BTRFS_I(inode)->location.objectid = args->ino;
5838 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5839 BTRFS_I(inode)->location.offset = 0;
5840 BTRFS_I(inode)->root = btrfs_grab_root(args->root);
5841 BUG_ON(args->root && !BTRFS_I(inode)->root);
5842 return 0;
5843}
5844
5845static int btrfs_find_actor(struct inode *inode, void *opaque)
5846{
5847 struct btrfs_iget_args *args = opaque;
5848
5849 return args->ino == BTRFS_I(inode)->location.objectid &&
5850 args->root == BTRFS_I(inode)->root;
5851}
5852
5853static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
5854 struct btrfs_root *root)
5855{
5856 struct inode *inode;
5857 struct btrfs_iget_args args;
5858 unsigned long hashval = btrfs_inode_hash(ino, root);
5859
5860 args.ino = ino;
5861 args.root = root;
5862
5863 inode = iget5_locked(s, hashval, btrfs_find_actor,
5864 btrfs_init_locked_inode,
5865 (void *)&args);
5866 return inode;
5867}
5868
5869
5870
5871
5872
5873
5874
5875struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
5876 struct btrfs_root *root, struct btrfs_path *path)
5877{
5878 struct inode *inode;
5879
5880 inode = btrfs_iget_locked(s, ino, root);
5881 if (!inode)
5882 return ERR_PTR(-ENOMEM);
5883
5884 if (inode->i_state & I_NEW) {
5885 int ret;
5886
5887 ret = btrfs_read_locked_inode(inode, path);
5888 if (!ret) {
5889 inode_tree_add(inode);
5890 unlock_new_inode(inode);
5891 } else {
5892 iget_failed(inode);
5893
5894
5895
5896
5897
5898 if (ret > 0)
5899 ret = -ENOENT;
5900 inode = ERR_PTR(ret);
5901 }
5902 }
5903
5904 return inode;
5905}
5906
5907struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root)
5908{
5909 return btrfs_iget_path(s, ino, root, NULL);
5910}
5911
5912static struct inode *new_simple_dir(struct super_block *s,
5913 struct btrfs_key *key,
5914 struct btrfs_root *root)
5915{
5916 struct inode *inode = new_inode(s);
5917
5918 if (!inode)
5919 return ERR_PTR(-ENOMEM);
5920
5921 BTRFS_I(inode)->root = btrfs_grab_root(root);
5922 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5923 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5924
5925 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5926
5927
5928
5929
5930 inode->i_op = &simple_dir_inode_operations;
5931 inode->i_opflags &= ~IOP_XATTR;
5932 inode->i_fop = &simple_dir_operations;
5933 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5934 inode->i_mtime = current_time(inode);
5935 inode->i_atime = inode->i_mtime;
5936 inode->i_ctime = inode->i_mtime;
5937 BTRFS_I(inode)->i_otime = inode->i_mtime;
5938
5939 return inode;
5940}
5941
5942static inline u8 btrfs_inode_type(struct inode *inode)
5943{
5944
5945
5946
5947
5948 BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
5949 BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
5950 BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
5951 BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
5952 BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
5953 BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
5954 BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
5955 BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
5956
5957 return fs_umode_to_ftype(inode->i_mode);
5958}
5959
5960struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5961{
5962 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5963 struct inode *inode;
5964 struct btrfs_root *root = BTRFS_I(dir)->root;
5965 struct btrfs_root *sub_root = root;
5966 struct btrfs_key location;
5967 u8 di_type = 0;
5968 int ret = 0;
5969
5970 if (dentry->d_name.len > BTRFS_NAME_LEN)
5971 return ERR_PTR(-ENAMETOOLONG);
5972
5973 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5974 if (ret < 0)
5975 return ERR_PTR(ret);
5976
5977 if (location.type == BTRFS_INODE_ITEM_KEY) {
5978 inode = btrfs_iget(dir->i_sb, location.objectid, root);
5979 if (IS_ERR(inode))
5980 return inode;
5981
5982
5983 if (btrfs_inode_type(inode) != di_type) {
5984 btrfs_crit(fs_info,
5985"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5986 inode->i_mode, btrfs_inode_type(inode),
5987 di_type);
5988 iput(inode);
5989 return ERR_PTR(-EUCLEAN);
5990 }
5991 return inode;
5992 }
5993
5994 ret = fixup_tree_root_location(fs_info, dir, dentry,
5995 &location, &sub_root);
5996 if (ret < 0) {
5997 if (ret != -ENOENT)
5998 inode = ERR_PTR(ret);
5999 else
6000 inode = new_simple_dir(dir->i_sb, &location, sub_root);
6001 } else {
6002 inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
6003 }
6004 if (root != sub_root)
6005 btrfs_put_root(sub_root);
6006
6007 if (!IS_ERR(inode) && root != sub_root) {
6008 down_read(&fs_info->cleanup_work_sem);
6009 if (!sb_rdonly(inode->i_sb))
6010 ret = btrfs_orphan_cleanup(sub_root);
6011 up_read(&fs_info->cleanup_work_sem);
6012 if (ret) {
6013 iput(inode);
6014 inode = ERR_PTR(ret);
6015 }
6016 }
6017
6018 return inode;
6019}
6020
6021static int btrfs_dentry_delete(const struct dentry *dentry)
6022{
6023 struct btrfs_root *root;
6024 struct inode *inode = d_inode(dentry);
6025
6026 if (!inode && !IS_ROOT(dentry))
6027 inode = d_inode(dentry->d_parent);
6028
6029 if (inode) {
6030 root = BTRFS_I(inode)->root;
6031 if (btrfs_root_refs(&root->root_item) == 0)
6032 return 1;
6033
6034 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
6035 return 1;
6036 }
6037 return 0;
6038}
6039
6040static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
6041 unsigned int flags)
6042{
6043 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
6044
6045 if (inode == ERR_PTR(-ENOENT))
6046 inode = NULL;
6047 return d_splice_alias(inode, dentry);
6048}
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059static int btrfs_opendir(struct inode *inode, struct file *file)
6060{
6061 struct btrfs_file_private *private;
6062
6063 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
6064 if (!private)
6065 return -ENOMEM;
6066 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
6067 if (!private->filldir_buf) {
6068 kfree(private);
6069 return -ENOMEM;
6070 }
6071 file->private_data = private;
6072 return 0;
6073}
6074
6075struct dir_entry {
6076 u64 ino;
6077 u64 offset;
6078 unsigned type;
6079 int name_len;
6080};
6081
6082static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
6083{
6084 while (entries--) {
6085 struct dir_entry *entry = addr;
6086 char *name = (char *)(entry + 1);
6087
6088 ctx->pos = get_unaligned(&entry->offset);
6089 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
6090 get_unaligned(&entry->ino),
6091 get_unaligned(&entry->type)))
6092 return 1;
6093 addr += sizeof(struct dir_entry) +
6094 get_unaligned(&entry->name_len);
6095 ctx->pos++;
6096 }
6097 return 0;
6098}
6099
6100static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
6101{
6102 struct inode *inode = file_inode(file);
6103 struct btrfs_root *root = BTRFS_I(inode)->root;
6104 struct btrfs_file_private *private = file->private_data;
6105 struct btrfs_dir_item *di;
6106 struct btrfs_key key;
6107 struct btrfs_key found_key;
6108 struct btrfs_path *path;
6109 void *addr;
6110 struct list_head ins_list;
6111 struct list_head del_list;
6112 int ret;
6113 struct extent_buffer *leaf;
6114 int slot;
6115 char *name_ptr;
6116 int name_len;
6117 int entries = 0;
6118 int total_len = 0;
6119 bool put = false;
6120 struct btrfs_key location;
6121
6122 if (!dir_emit_dots(file, ctx))
6123 return 0;
6124
6125 path = btrfs_alloc_path();
6126 if (!path)
6127 return -ENOMEM;
6128
6129 addr = private->filldir_buf;
6130 path->reada = READA_FORWARD;
6131
6132 INIT_LIST_HEAD(&ins_list);
6133 INIT_LIST_HEAD(&del_list);
6134 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
6135
6136again:
6137 key.type = BTRFS_DIR_INDEX_KEY;
6138 key.offset = ctx->pos;
6139 key.objectid = btrfs_ino(BTRFS_I(inode));
6140
6141 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6142 if (ret < 0)
6143 goto err;
6144
6145 while (1) {
6146 struct dir_entry *entry;
6147
6148 leaf = path->nodes[0];
6149 slot = path->slots[0];
6150 if (slot >= btrfs_header_nritems(leaf)) {
6151 ret = btrfs_next_leaf(root, path);
6152 if (ret < 0)
6153 goto err;
6154 else if (ret > 0)
6155 break;
6156 continue;
6157 }
6158
6159 btrfs_item_key_to_cpu(leaf, &found_key, slot);
6160
6161 if (found_key.objectid != key.objectid)
6162 break;
6163 if (found_key.type != BTRFS_DIR_INDEX_KEY)
6164 break;
6165 if (found_key.offset < ctx->pos)
6166 goto next;
6167 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
6168 goto next;
6169 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
6170 name_len = btrfs_dir_name_len(leaf, di);
6171 if ((total_len + sizeof(struct dir_entry) + name_len) >=
6172 PAGE_SIZE) {
6173 btrfs_release_path(path);
6174 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6175 if (ret)
6176 goto nopos;
6177 addr = private->filldir_buf;
6178 entries = 0;
6179 total_len = 0;
6180 goto again;
6181 }
6182
6183 entry = addr;
6184 put_unaligned(name_len, &entry->name_len);
6185 name_ptr = (char *)(entry + 1);
6186 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
6187 name_len);
6188 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
6189 &entry->type);
6190 btrfs_dir_item_key_to_cpu(leaf, di, &location);
6191 put_unaligned(location.objectid, &entry->ino);
6192 put_unaligned(found_key.offset, &entry->offset);
6193 entries++;
6194 addr += sizeof(struct dir_entry) + name_len;
6195 total_len += sizeof(struct dir_entry) + name_len;
6196next:
6197 path->slots[0]++;
6198 }
6199 btrfs_release_path(path);
6200
6201 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6202 if (ret)
6203 goto nopos;
6204
6205 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
6206 if (ret)
6207 goto nopos;
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219
6220
6221
6222
6223
6224
6225
6226 if (ctx->pos >= INT_MAX)
6227 ctx->pos = LLONG_MAX;
6228 else
6229 ctx->pos = INT_MAX;
6230nopos:
6231 ret = 0;
6232err:
6233 if (put)
6234 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
6235 btrfs_free_path(path);
6236 return ret;
6237}
6238
6239
6240
6241
6242
6243
6244
6245static int btrfs_dirty_inode(struct inode *inode)
6246{
6247 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6248 struct btrfs_root *root = BTRFS_I(inode)->root;
6249 struct btrfs_trans_handle *trans;
6250 int ret;
6251
6252 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6253 return 0;
6254
6255 trans = btrfs_join_transaction(root);
6256 if (IS_ERR(trans))
6257 return PTR_ERR(trans);
6258
6259 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
6260 if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
6261
6262 btrfs_end_transaction(trans);
6263 trans = btrfs_start_transaction(root, 1);
6264 if (IS_ERR(trans))
6265 return PTR_ERR(trans);
6266
6267 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
6268 }
6269 btrfs_end_transaction(trans);
6270 if (BTRFS_I(inode)->delayed_node)
6271 btrfs_balance_delayed_items(fs_info);
6272
6273 return ret;
6274}
6275
6276
6277
6278
6279
6280static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
6281 int flags)
6282{
6283 struct btrfs_root *root = BTRFS_I(inode)->root;
6284 bool dirty = flags & ~S_VERSION;
6285
6286 if (btrfs_root_readonly(root))
6287 return -EROFS;
6288
6289 if (flags & S_VERSION)
6290 dirty |= inode_maybe_inc_iversion(inode, dirty);
6291 if (flags & S_CTIME)
6292 inode->i_ctime = *now;
6293 if (flags & S_MTIME)
6294 inode->i_mtime = *now;
6295 if (flags & S_ATIME)
6296 inode->i_atime = *now;
6297 return dirty ? btrfs_dirty_inode(inode) : 0;
6298}
6299
6300
6301
6302
6303
6304
6305static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
6306{
6307 struct btrfs_root *root = inode->root;
6308 struct btrfs_key key, found_key;
6309 struct btrfs_path *path;
6310 struct extent_buffer *leaf;
6311 int ret;
6312
6313 key.objectid = btrfs_ino(inode);
6314 key.type = BTRFS_DIR_INDEX_KEY;
6315 key.offset = (u64)-1;
6316
6317 path = btrfs_alloc_path();
6318 if (!path)
6319 return -ENOMEM;
6320
6321 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6322 if (ret < 0)
6323 goto out;
6324
6325 if (ret == 0)
6326 goto out;
6327 ret = 0;
6328
6329
6330
6331
6332
6333
6334
6335 if (path->slots[0] == 0) {
6336 inode->index_cnt = 2;
6337 goto out;
6338 }
6339
6340 path->slots[0]--;
6341
6342 leaf = path->nodes[0];
6343 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6344
6345 if (found_key.objectid != btrfs_ino(inode) ||
6346 found_key.type != BTRFS_DIR_INDEX_KEY) {
6347 inode->index_cnt = 2;
6348 goto out;
6349 }
6350
6351 inode->index_cnt = found_key.offset + 1;
6352out:
6353 btrfs_free_path(path);
6354 return ret;
6355}
6356
6357
6358
6359
6360
6361int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
6362{
6363 int ret = 0;
6364
6365 if (dir->index_cnt == (u64)-1) {
6366 ret = btrfs_inode_delayed_dir_index_count(dir);
6367 if (ret) {
6368 ret = btrfs_set_inode_index_count(dir);
6369 if (ret)
6370 return ret;
6371 }
6372 }
6373
6374 *index = dir->index_cnt;
6375 dir->index_cnt++;
6376
6377 return ret;
6378}
6379
6380static int btrfs_insert_inode_locked(struct inode *inode)
6381{
6382 struct btrfs_iget_args args;
6383
6384 args.ino = BTRFS_I(inode)->location.objectid;
6385 args.root = BTRFS_I(inode)->root;
6386
6387 return insert_inode_locked4(inode,
6388 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6389 btrfs_find_actor, &args);
6390}
6391
6392
6393
6394
6395
6396
6397static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
6398{
6399 unsigned int flags;
6400
6401 if (!dir)
6402 return;
6403
6404 flags = BTRFS_I(dir)->flags;
6405
6406 if (flags & BTRFS_INODE_NOCOMPRESS) {
6407 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
6408 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
6409 } else if (flags & BTRFS_INODE_COMPRESS) {
6410 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
6411 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
6412 }
6413
6414 if (flags & BTRFS_INODE_NODATACOW) {
6415 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
6416 if (S_ISREG(inode->i_mode))
6417 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6418 }
6419
6420 btrfs_sync_inode_flags_to_i_flags(inode);
6421}
6422
6423static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6424 struct btrfs_root *root,
6425 struct user_namespace *mnt_userns,
6426 struct inode *dir,
6427 const char *name, int name_len,
6428 u64 ref_objectid, u64 objectid,
6429 umode_t mode, u64 *index)
6430{
6431 struct btrfs_fs_info *fs_info = root->fs_info;
6432 struct inode *inode;
6433 struct btrfs_inode_item *inode_item;
6434 struct btrfs_key *location;
6435 struct btrfs_path *path;
6436 struct btrfs_inode_ref *ref;
6437 struct btrfs_key key[2];
6438 u32 sizes[2];
6439 int nitems = name ? 2 : 1;
6440 unsigned long ptr;
6441 unsigned int nofs_flag;
6442 int ret;
6443
6444 path = btrfs_alloc_path();
6445 if (!path)
6446 return ERR_PTR(-ENOMEM);
6447
6448 nofs_flag = memalloc_nofs_save();
6449 inode = new_inode(fs_info->sb);
6450 memalloc_nofs_restore(nofs_flag);
6451 if (!inode) {
6452 btrfs_free_path(path);
6453 return ERR_PTR(-ENOMEM);
6454 }
6455
6456
6457
6458
6459
6460 if (!name)
6461 set_nlink(inode, 0);
6462
6463
6464
6465
6466
6467 inode->i_ino = objectid;
6468
6469 if (dir && name) {
6470 trace_btrfs_inode_request(dir);
6471
6472 ret = btrfs_set_inode_index(BTRFS_I(dir), index);
6473 if (ret) {
6474 btrfs_free_path(path);
6475 iput(inode);
6476 return ERR_PTR(ret);
6477 }
6478 } else if (dir) {
6479 *index = 0;
6480 }
6481
6482
6483
6484
6485
6486 BTRFS_I(inode)->index_cnt = 2;
6487 BTRFS_I(inode)->dir_index = *index;
6488 BTRFS_I(inode)->root = btrfs_grab_root(root);
6489 BTRFS_I(inode)->generation = trans->transid;
6490 inode->i_generation = BTRFS_I(inode)->generation;
6491
6492
6493
6494
6495
6496
6497
6498 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6499
6500 key[0].objectid = objectid;
6501 key[0].type = BTRFS_INODE_ITEM_KEY;
6502 key[0].offset = 0;
6503
6504 sizes[0] = sizeof(struct btrfs_inode_item);
6505
6506 if (name) {
6507
6508
6509
6510
6511
6512
6513 key[1].objectid = objectid;
6514 key[1].type = BTRFS_INODE_REF_KEY;
6515 key[1].offset = ref_objectid;
6516
6517 sizes[1] = name_len + sizeof(*ref);
6518 }
6519
6520 location = &BTRFS_I(inode)->location;
6521 location->objectid = objectid;
6522 location->offset = 0;
6523 location->type = BTRFS_INODE_ITEM_KEY;
6524
6525 ret = btrfs_insert_inode_locked(inode);
6526 if (ret < 0) {
6527 iput(inode);
6528 goto fail;
6529 }
6530
6531 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
6532 if (ret != 0)
6533 goto fail_unlock;
6534
6535 inode_init_owner(mnt_userns, inode, dir, mode);
6536 inode_set_bytes(inode, 0);
6537
6538 inode->i_mtime = current_time(inode);
6539 inode->i_atime = inode->i_mtime;
6540 inode->i_ctime = inode->i_mtime;
6541 BTRFS_I(inode)->i_otime = inode->i_mtime;
6542
6543 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6544 struct btrfs_inode_item);
6545 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6546 sizeof(*inode_item));
6547 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6548
6549 if (name) {
6550 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6551 struct btrfs_inode_ref);
6552 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6553 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6554 ptr = (unsigned long)(ref + 1);
6555 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6556 }
6557
6558 btrfs_mark_buffer_dirty(path->nodes[0]);
6559 btrfs_free_path(path);
6560
6561 btrfs_inherit_iflags(inode, dir);
6562
6563 if (S_ISREG(mode)) {
6564 if (btrfs_test_opt(fs_info, NODATASUM))
6565 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6566 if (btrfs_test_opt(fs_info, NODATACOW))
6567 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6568 BTRFS_INODE_NODATASUM;
6569 }
6570
6571 inode_tree_add(inode);
6572
6573 trace_btrfs_inode_new(inode);
6574 btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
6575
6576 btrfs_update_root_times(trans, root);
6577
6578 ret = btrfs_inode_inherit_props(trans, inode, dir);
6579 if (ret)
6580 btrfs_err(fs_info,
6581 "error inheriting props for ino %llu (root %llu): %d",
6582 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
6583
6584 return inode;
6585
6586fail_unlock:
6587 discard_new_inode(inode);
6588fail:
6589 if (dir && name)
6590 BTRFS_I(dir)->index_cnt--;
6591 btrfs_free_path(path);
6592 return ERR_PTR(ret);
6593}
6594
6595
6596
6597
6598
6599
6600
6601int btrfs_add_link(struct btrfs_trans_handle *trans,
6602 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6603 const char *name, int name_len, int add_backref, u64 index)
6604{
6605 int ret = 0;
6606 struct btrfs_key key;
6607 struct btrfs_root *root = parent_inode->root;
6608 u64 ino = btrfs_ino(inode);
6609 u64 parent_ino = btrfs_ino(parent_inode);
6610
6611 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6612 memcpy(&key, &inode->root->root_key, sizeof(key));
6613 } else {
6614 key.objectid = ino;
6615 key.type = BTRFS_INODE_ITEM_KEY;
6616 key.offset = 0;
6617 }
6618
6619 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6620 ret = btrfs_add_root_ref(trans, key.objectid,
6621 root->root_key.objectid, parent_ino,
6622 index, name, name_len);
6623 } else if (add_backref) {
6624 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6625 parent_ino, index);
6626 }
6627
6628
6629 if (ret)
6630 return ret;
6631
6632 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6633 btrfs_inode_type(&inode->vfs_inode), index);
6634 if (ret == -EEXIST || ret == -EOVERFLOW)
6635 goto fail_dir_item;
6636 else if (ret) {
6637 btrfs_abort_transaction(trans, ret);
6638 return ret;
6639 }
6640
6641 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6642 name_len * 2);
6643 inode_inc_iversion(&parent_inode->vfs_inode);
6644
6645
6646
6647
6648
6649
6650 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6651 struct timespec64 now = current_time(&parent_inode->vfs_inode);
6652
6653 parent_inode->vfs_inode.i_mtime = now;
6654 parent_inode->vfs_inode.i_ctime = now;
6655 }
6656 ret = btrfs_update_inode(trans, root, parent_inode);
6657 if (ret)
6658 btrfs_abort_transaction(trans, ret);
6659 return ret;
6660
6661fail_dir_item:
6662 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6663 u64 local_index;
6664 int err;
6665 err = btrfs_del_root_ref(trans, key.objectid,
6666 root->root_key.objectid, parent_ino,
6667 &local_index, name, name_len);
6668 if (err)
6669 btrfs_abort_transaction(trans, err);
6670 } else if (add_backref) {
6671 u64 local_index;
6672 int err;
6673
6674 err = btrfs_del_inode_ref(trans, root, name, name_len,
6675 ino, parent_ino, &local_index);
6676 if (err)
6677 btrfs_abort_transaction(trans, err);
6678 }
6679
6680
6681 return ret;
6682}
6683
6684static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6685 struct btrfs_inode *dir, struct dentry *dentry,
6686 struct btrfs_inode *inode, int backref, u64 index)
6687{
6688 int err = btrfs_add_link(trans, dir, inode,
6689 dentry->d_name.name, dentry->d_name.len,
6690 backref, index);
6691 if (err > 0)
6692 err = -EEXIST;
6693 return err;
6694}
6695
6696static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
6697 struct dentry *dentry, umode_t mode, dev_t rdev)
6698{
6699 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6700 struct btrfs_trans_handle *trans;
6701 struct btrfs_root *root = BTRFS_I(dir)->root;
6702 struct inode *inode = NULL;
6703 int err;
6704 u64 objectid;
6705 u64 index = 0;
6706
6707
6708
6709
6710
6711
6712 trans = btrfs_start_transaction(root, 5);
6713 if (IS_ERR(trans))
6714 return PTR_ERR(trans);
6715
6716 err = btrfs_get_free_objectid(root, &objectid);
6717 if (err)
6718 goto out_unlock;
6719
6720 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
6721 dentry->d_name.name, dentry->d_name.len,
6722 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
6723 if (IS_ERR(inode)) {
6724 err = PTR_ERR(inode);
6725 inode = NULL;
6726 goto out_unlock;
6727 }
6728
6729
6730
6731
6732
6733
6734
6735 inode->i_op = &btrfs_special_inode_operations;
6736 init_special_inode(inode, inode->i_mode, rdev);
6737
6738 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6739 if (err)
6740 goto out_unlock;
6741
6742 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6743 0, index);
6744 if (err)
6745 goto out_unlock;
6746
6747 btrfs_update_inode(trans, root, BTRFS_I(inode));
6748 d_instantiate_new(dentry, inode);
6749
6750out_unlock:
6751 btrfs_end_transaction(trans);
6752 btrfs_btree_balance_dirty(fs_info);
6753 if (err && inode) {
6754 inode_dec_link_count(inode);
6755 discard_new_inode(inode);
6756 }
6757 return err;
6758}
6759
6760static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
6761 struct dentry *dentry, umode_t mode, bool excl)
6762{
6763 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6764 struct btrfs_trans_handle *trans;
6765 struct btrfs_root *root = BTRFS_I(dir)->root;
6766 struct inode *inode = NULL;
6767 int err;
6768 u64 objectid;
6769 u64 index = 0;
6770
6771
6772
6773
6774
6775
6776 trans = btrfs_start_transaction(root, 5);
6777 if (IS_ERR(trans))
6778 return PTR_ERR(trans);
6779
6780 err = btrfs_get_free_objectid(root, &objectid);
6781 if (err)
6782 goto out_unlock;
6783
6784 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
6785 dentry->d_name.name, dentry->d_name.len,
6786 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
6787 if (IS_ERR(inode)) {
6788 err = PTR_ERR(inode);
6789 inode = NULL;
6790 goto out_unlock;
6791 }
6792
6793
6794
6795
6796
6797
6798 inode->i_fop = &btrfs_file_operations;
6799 inode->i_op = &btrfs_file_inode_operations;
6800 inode->i_mapping->a_ops = &btrfs_aops;
6801
6802 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6803 if (err)
6804 goto out_unlock;
6805
6806 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6807 if (err)
6808 goto out_unlock;
6809
6810 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6811 0, index);
6812 if (err)
6813 goto out_unlock;
6814
6815 d_instantiate_new(dentry, inode);
6816
6817out_unlock:
6818 btrfs_end_transaction(trans);
6819 if (err && inode) {
6820 inode_dec_link_count(inode);
6821 discard_new_inode(inode);
6822 }
6823 btrfs_btree_balance_dirty(fs_info);
6824 return err;
6825}
6826
6827static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6828 struct dentry *dentry)
6829{
6830 struct btrfs_trans_handle *trans = NULL;
6831 struct btrfs_root *root = BTRFS_I(dir)->root;
6832 struct inode *inode = d_inode(old_dentry);
6833 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6834 u64 index;
6835 int err;
6836 int drop_inode = 0;
6837
6838
6839 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6840 return -EXDEV;
6841
6842 if (inode->i_nlink >= BTRFS_LINK_MAX)
6843 return -EMLINK;
6844
6845 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6846 if (err)
6847 goto fail;
6848
6849
6850
6851
6852
6853
6854
6855 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6856 if (IS_ERR(trans)) {
6857 err = PTR_ERR(trans);
6858 trans = NULL;
6859 goto fail;
6860 }
6861
6862
6863 BTRFS_I(inode)->dir_index = 0ULL;
6864 inc_nlink(inode);
6865 inode_inc_iversion(inode);
6866 inode->i_ctime = current_time(inode);
6867 ihold(inode);
6868 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6869
6870 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6871 1, index);
6872
6873 if (err) {
6874 drop_inode = 1;
6875 } else {
6876 struct dentry *parent = dentry->d_parent;
6877
6878 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6879 if (err)
6880 goto fail;
6881 if (inode->i_nlink == 1) {
6882
6883
6884
6885
6886 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6887 if (err)
6888 goto fail;
6889 }
6890 d_instantiate(dentry, inode);
6891 btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
6892 }
6893
6894fail:
6895 if (trans)
6896 btrfs_end_transaction(trans);
6897 if (drop_inode) {
6898 inode_dec_link_count(inode);
6899 iput(inode);
6900 }
6901 btrfs_btree_balance_dirty(fs_info);
6902 return err;
6903}
6904
6905static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
6906 struct dentry *dentry, umode_t mode)
6907{
6908 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6909 struct inode *inode = NULL;
6910 struct btrfs_trans_handle *trans;
6911 struct btrfs_root *root = BTRFS_I(dir)->root;
6912 int err = 0;
6913 u64 objectid = 0;
6914 u64 index = 0;
6915
6916
6917
6918
6919
6920
6921 trans = btrfs_start_transaction(root, 5);
6922 if (IS_ERR(trans))
6923 return PTR_ERR(trans);
6924
6925 err = btrfs_get_free_objectid(root, &objectid);
6926 if (err)
6927 goto out_fail;
6928
6929 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
6930 dentry->d_name.name, dentry->d_name.len,
6931 btrfs_ino(BTRFS_I(dir)), objectid,
6932 S_IFDIR | mode, &index);
6933 if (IS_ERR(inode)) {
6934 err = PTR_ERR(inode);
6935 inode = NULL;
6936 goto out_fail;
6937 }
6938
6939
6940 inode->i_op = &btrfs_dir_inode_operations;
6941 inode->i_fop = &btrfs_dir_file_operations;
6942
6943 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6944 if (err)
6945 goto out_fail;
6946
6947 btrfs_i_size_write(BTRFS_I(inode), 0);
6948 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
6949 if (err)
6950 goto out_fail;
6951
6952 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6953 dentry->d_name.name,
6954 dentry->d_name.len, 0, index);
6955 if (err)
6956 goto out_fail;
6957
6958 d_instantiate_new(dentry, inode);
6959
6960out_fail:
6961 btrfs_end_transaction(trans);
6962 if (err && inode) {
6963 inode_dec_link_count(inode);
6964 discard_new_inode(inode);
6965 }
6966 btrfs_btree_balance_dirty(fs_info);
6967 return err;
6968}
6969
6970static noinline int uncompress_inline(struct btrfs_path *path,
6971 struct page *page,
6972 size_t pg_offset, u64 extent_offset,
6973 struct btrfs_file_extent_item *item)
6974{
6975 int ret;
6976 struct extent_buffer *leaf = path->nodes[0];
6977 char *tmp;
6978 size_t max_size;
6979 unsigned long inline_size;
6980 unsigned long ptr;
6981 int compress_type;
6982
6983 WARN_ON(pg_offset != 0);
6984 compress_type = btrfs_file_extent_compression(leaf, item);
6985 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6986 inline_size = btrfs_file_extent_inline_item_len(leaf,
6987 btrfs_item_nr(path->slots[0]));
6988 tmp = kmalloc(inline_size, GFP_NOFS);
6989 if (!tmp)
6990 return -ENOMEM;
6991 ptr = btrfs_file_extent_inline_start(item);
6992
6993 read_extent_buffer(leaf, tmp, ptr, inline_size);
6994
6995 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6996 ret = btrfs_decompress(compress_type, tmp, page,
6997 extent_offset, inline_size, max_size);
6998
6999
7000
7001
7002
7003
7004
7005
7006
7007 if (max_size + pg_offset < PAGE_SIZE)
7008 memzero_page(page, pg_offset + max_size,
7009 PAGE_SIZE - max_size - pg_offset);
7010 kfree(tmp);
7011 return ret;
7012}
7013
7014
7015
7016
7017
7018
7019
7020
7021
7022
7023
7024
7025
7026
7027
7028
7029
7030
7031
7032struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
7033 struct page *page, size_t pg_offset,
7034 u64 start, u64 len)
7035{
7036 struct btrfs_fs_info *fs_info = inode->root->fs_info;
7037 int ret = 0;
7038 u64 extent_start = 0;
7039 u64 extent_end = 0;
7040 u64 objectid = btrfs_ino(inode);
7041 int extent_type = -1;
7042 struct btrfs_path *path = NULL;
7043 struct btrfs_root *root = inode->root;
7044 struct btrfs_file_extent_item *item;
7045 struct extent_buffer *leaf;
7046 struct btrfs_key found_key;
7047 struct extent_map *em = NULL;
7048 struct extent_map_tree *em_tree = &inode->extent_tree;
7049 struct extent_io_tree *io_tree = &inode->io_tree;
7050
7051 read_lock(&em_tree->lock);
7052 em = lookup_extent_mapping(em_tree, start, len);
7053 read_unlock(&em_tree->lock);
7054
7055 if (em) {
7056 if (em->start > start || em->start + em->len <= start)
7057 free_extent_map(em);
7058 else if (em->block_start == EXTENT_MAP_INLINE && page)
7059 free_extent_map(em);
7060 else
7061 goto out;
7062 }
7063 em = alloc_extent_map();
7064 if (!em) {
7065 ret = -ENOMEM;
7066 goto out;
7067 }
7068 em->start = EXTENT_MAP_HOLE;
7069 em->orig_start = EXTENT_MAP_HOLE;
7070 em->len = (u64)-1;
7071 em->block_len = (u64)-1;
7072
7073 path = btrfs_alloc_path();
7074 if (!path) {
7075 ret = -ENOMEM;
7076 goto out;
7077 }
7078
7079
7080 path->reada = READA_FORWARD;
7081
7082
7083
7084
7085
7086
7087 if (btrfs_is_free_space_inode(inode)) {
7088 path->search_commit_root = 1;
7089 path->skip_locking = 1;
7090 }
7091
7092 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
7093 if (ret < 0) {
7094 goto out;
7095 } else if (ret > 0) {
7096 if (path->slots[0] == 0)
7097 goto not_found;
7098 path->slots[0]--;
7099 ret = 0;
7100 }
7101
7102 leaf = path->nodes[0];
7103 item = btrfs_item_ptr(leaf, path->slots[0],
7104 struct btrfs_file_extent_item);
7105 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7106 if (found_key.objectid != objectid ||
7107 found_key.type != BTRFS_EXTENT_DATA_KEY) {
7108
7109
7110
7111
7112
7113
7114 extent_end = start;
7115 goto next;
7116 }
7117
7118 extent_type = btrfs_file_extent_type(leaf, item);
7119 extent_start = found_key.offset;
7120 extent_end = btrfs_file_extent_end(path);
7121 if (extent_type == BTRFS_FILE_EXTENT_REG ||
7122 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
7123
7124 if (!S_ISREG(inode->vfs_inode.i_mode)) {
7125 ret = -EUCLEAN;
7126 btrfs_crit(fs_info,
7127 "regular/prealloc extent found for non-regular inode %llu",
7128 btrfs_ino(inode));
7129 goto out;
7130 }
7131 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
7132 extent_start);
7133 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
7134 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
7135 path->slots[0],
7136 extent_start);
7137 }
7138next:
7139 if (start >= extent_end) {
7140 path->slots[0]++;
7141 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
7142 ret = btrfs_next_leaf(root, path);
7143 if (ret < 0)
7144 goto out;
7145 else if (ret > 0)
7146 goto not_found;
7147
7148 leaf = path->nodes[0];
7149 }
7150 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7151 if (found_key.objectid != objectid ||
7152 found_key.type != BTRFS_EXTENT_DATA_KEY)
7153 goto not_found;
7154 if (start + len <= found_key.offset)
7155 goto not_found;
7156 if (start > found_key.offset)
7157 goto next;
7158
7159
7160 em->start = start;
7161 em->orig_start = start;
7162 em->len = found_key.offset - start;
7163 em->block_start = EXTENT_MAP_HOLE;
7164 goto insert;
7165 }
7166
7167 btrfs_extent_item_to_extent_map(inode, path, item, !page, em);
7168
7169 if (extent_type == BTRFS_FILE_EXTENT_REG ||
7170 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
7171 goto insert;
7172 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
7173 unsigned long ptr;
7174 char *map;
7175 size_t size;
7176 size_t extent_offset;
7177 size_t copy_size;
7178
7179 if (!page)
7180 goto out;
7181
7182 size = btrfs_file_extent_ram_bytes(leaf, item);
7183 extent_offset = page_offset(page) + pg_offset - extent_start;
7184 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
7185 size - extent_offset);
7186 em->start = extent_start + extent_offset;
7187 em->len = ALIGN(copy_size, fs_info->sectorsize);
7188 em->orig_block_len = em->len;
7189 em->orig_start = em->start;
7190 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
7191
7192 if (!PageUptodate(page)) {
7193 if (btrfs_file_extent_compression(leaf, item) !=
7194 BTRFS_COMPRESS_NONE) {
7195 ret = uncompress_inline(path, page, pg_offset,
7196 extent_offset, item);
7197 if (ret)
7198 goto out;
7199 } else {
7200 map = kmap_local_page(page);
7201 read_extent_buffer(leaf, map + pg_offset, ptr,
7202 copy_size);
7203 if (pg_offset + copy_size < PAGE_SIZE) {
7204 memset(map + pg_offset + copy_size, 0,
7205 PAGE_SIZE - pg_offset -
7206 copy_size);
7207 }
7208 kunmap_local(map);
7209 }
7210 flush_dcache_page(page);
7211 }
7212 set_extent_uptodate(io_tree, em->start,
7213 extent_map_end(em) - 1, NULL, GFP_NOFS);
7214 goto insert;
7215 }
7216not_found:
7217 em->start = start;
7218 em->orig_start = start;
7219 em->len = len;
7220 em->block_start = EXTENT_MAP_HOLE;
7221insert:
7222 ret = 0;
7223 btrfs_release_path(path);
7224 if (em->start > start || extent_map_end(em) <= start) {
7225 btrfs_err(fs_info,
7226 "bad extent! em: [%llu %llu] passed [%llu %llu]",
7227 em->start, em->len, start, len);
7228 ret = -EIO;
7229 goto out;
7230 }
7231
7232 write_lock(&em_tree->lock);
7233 ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
7234 write_unlock(&em_tree->lock);
7235out:
7236 btrfs_free_path(path);
7237
7238 trace_btrfs_get_extent(root, inode, em);
7239
7240 if (ret) {
7241 free_extent_map(em);
7242 return ERR_PTR(ret);
7243 }
7244 return em;
7245}
7246
7247struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
7248 u64 start, u64 len)
7249{
7250 struct extent_map *em;
7251 struct extent_map *hole_em = NULL;
7252 u64 delalloc_start = start;
7253 u64 end;
7254 u64 delalloc_len;
7255 u64 delalloc_end;
7256 int err = 0;
7257
7258 em = btrfs_get_extent(inode, NULL, 0, start, len);
7259 if (IS_ERR(em))
7260 return em;
7261
7262
7263
7264
7265
7266
7267 if (em->block_start != EXTENT_MAP_HOLE &&
7268 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7269 return em;
7270 else
7271 hole_em = em;
7272
7273
7274 end = start + len;
7275 if (end < start)
7276 end = (u64)-1;
7277 else
7278 end -= 1;
7279
7280 em = NULL;
7281
7282
7283 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
7284 end, len, EXTENT_DELALLOC, 1);
7285 delalloc_end = delalloc_start + delalloc_len;
7286 if (delalloc_end < delalloc_start)
7287 delalloc_end = (u64)-1;
7288
7289
7290
7291
7292
7293 if (delalloc_start > end || delalloc_end <= start) {
7294 em = hole_em;
7295 hole_em = NULL;
7296 goto out;
7297 }
7298
7299
7300
7301
7302
7303 delalloc_start = max(start, delalloc_start);
7304 delalloc_len = delalloc_end - delalloc_start;
7305
7306 if (delalloc_len > 0) {
7307 u64 hole_start;
7308 u64 hole_len;
7309 const u64 hole_end = extent_map_end(hole_em);
7310
7311 em = alloc_extent_map();
7312 if (!em) {
7313 err = -ENOMEM;
7314 goto out;
7315 }
7316
7317 ASSERT(hole_em);
7318
7319
7320
7321
7322
7323
7324
7325 if (hole_end <= start || hole_em->start > end) {
7326 free_extent_map(hole_em);
7327 hole_em = NULL;
7328 } else {
7329 hole_start = max(hole_em->start, start);
7330 hole_len = hole_end - hole_start;
7331 }
7332
7333 if (hole_em && delalloc_start > hole_start) {
7334
7335
7336
7337
7338
7339 em->len = min(hole_len, delalloc_start - hole_start);
7340 em->start = hole_start;
7341 em->orig_start = hole_start;
7342
7343
7344
7345
7346 em->block_start = hole_em->block_start;
7347 em->block_len = hole_len;
7348 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7349 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7350 } else {
7351
7352
7353
7354
7355 em->start = delalloc_start;
7356 em->len = delalloc_len;
7357 em->orig_start = delalloc_start;
7358 em->block_start = EXTENT_MAP_DELALLOC;
7359 em->block_len = delalloc_len;
7360 }
7361 } else {
7362 return hole_em;
7363 }
7364out:
7365
7366 free_extent_map(hole_em);
7367 if (err) {
7368 free_extent_map(em);
7369 return ERR_PTR(err);
7370 }
7371 return em;
7372}
7373
7374static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
7375 const u64 start,
7376 const u64 len,
7377 const u64 orig_start,
7378 const u64 block_start,
7379 const u64 block_len,
7380 const u64 orig_block_len,
7381 const u64 ram_bytes,
7382 const int type)
7383{
7384 struct extent_map *em = NULL;
7385 int ret;
7386
7387 if (type != BTRFS_ORDERED_NOCOW) {
7388 em = create_io_em(inode, start, len, orig_start, block_start,
7389 block_len, orig_block_len, ram_bytes,
7390 BTRFS_COMPRESS_NONE,
7391 type);
7392 if (IS_ERR(em))
7393 goto out;
7394 }
7395 ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
7396 block_len, type);
7397 if (ret) {
7398 if (em) {
7399 free_extent_map(em);
7400 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
7401 }
7402 em = ERR_PTR(ret);
7403 }
7404 out:
7405
7406 return em;
7407}
7408
7409static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
7410 u64 start, u64 len)
7411{
7412 struct btrfs_root *root = inode->root;
7413 struct btrfs_fs_info *fs_info = root->fs_info;
7414 struct extent_map *em;
7415 struct btrfs_key ins;
7416 u64 alloc_hint;
7417 int ret;
7418
7419 alloc_hint = get_extent_allocation_hint(inode, start, len);
7420 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
7421 0, alloc_hint, &ins, 1, 1);
7422 if (ret)
7423 return ERR_PTR(ret);
7424
7425 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7426 ins.objectid, ins.offset, ins.offset,
7427 ins.offset, BTRFS_ORDERED_REGULAR);
7428 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
7429 if (IS_ERR(em))
7430 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
7431 1);
7432
7433 return em;
7434}
7435
7436static bool btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
7437{
7438 struct btrfs_block_group *block_group;
7439 bool readonly = false;
7440
7441 block_group = btrfs_lookup_block_group(fs_info, bytenr);
7442 if (!block_group || block_group->ro)
7443 readonly = true;
7444 if (block_group)
7445 btrfs_put_block_group(block_group);
7446 return readonly;
7447}
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468
7469noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7470 u64 *orig_start, u64 *orig_block_len,
7471 u64 *ram_bytes, bool strict)
7472{
7473 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7474 struct btrfs_path *path;
7475 int ret;
7476 struct extent_buffer *leaf;
7477 struct btrfs_root *root = BTRFS_I(inode)->root;
7478 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7479 struct btrfs_file_extent_item *fi;
7480 struct btrfs_key key;
7481 u64 disk_bytenr;
7482 u64 backref_offset;
7483 u64 extent_end;
7484 u64 num_bytes;
7485 int slot;
7486 int found_type;
7487 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7488
7489 path = btrfs_alloc_path();
7490 if (!path)
7491 return -ENOMEM;
7492
7493 ret = btrfs_lookup_file_extent(NULL, root, path,
7494 btrfs_ino(BTRFS_I(inode)), offset, 0);
7495 if (ret < 0)
7496 goto out;
7497
7498 slot = path->slots[0];
7499 if (ret == 1) {
7500 if (slot == 0) {
7501
7502 ret = 0;
7503 goto out;
7504 }
7505 slot--;
7506 }
7507 ret = 0;
7508 leaf = path->nodes[0];
7509 btrfs_item_key_to_cpu(leaf, &key, slot);
7510 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7511 key.type != BTRFS_EXTENT_DATA_KEY) {
7512
7513 goto out;
7514 }
7515
7516 if (key.offset > offset) {
7517
7518 goto out;
7519 }
7520
7521 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7522 found_type = btrfs_file_extent_type(leaf, fi);
7523 if (found_type != BTRFS_FILE_EXTENT_REG &&
7524 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7525
7526 goto out;
7527 }
7528
7529 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7530 goto out;
7531
7532 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7533 if (extent_end <= offset)
7534 goto out;
7535
7536 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7537 if (disk_bytenr == 0)
7538 goto out;
7539
7540 if (btrfs_file_extent_compression(leaf, fi) ||
7541 btrfs_file_extent_encryption(leaf, fi) ||
7542 btrfs_file_extent_other_encoding(leaf, fi))
7543 goto out;
7544
7545
7546
7547
7548
7549 if (!strict &&
7550 (btrfs_file_extent_generation(leaf, fi) <=
7551 btrfs_root_last_snapshot(&root->root_item)))
7552 goto out;
7553
7554 backref_offset = btrfs_file_extent_offset(leaf, fi);
7555
7556 if (orig_start) {
7557 *orig_start = key.offset - backref_offset;
7558 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7559 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7560 }
7561
7562 if (btrfs_extent_readonly(fs_info, disk_bytenr))
7563 goto out;
7564
7565 num_bytes = min(offset + *len, extent_end) - offset;
7566 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7567 u64 range_end;
7568
7569 range_end = round_up(offset + num_bytes,
7570 root->fs_info->sectorsize) - 1;
7571 ret = test_range_bit(io_tree, offset, range_end,
7572 EXTENT_DELALLOC, 0, NULL);
7573 if (ret) {
7574 ret = -EAGAIN;
7575 goto out;
7576 }
7577 }
7578
7579 btrfs_release_path(path);
7580
7581
7582
7583
7584
7585
7586 ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
7587 key.offset - backref_offset, disk_bytenr,
7588 strict);
7589 if (ret) {
7590 ret = 0;
7591 goto out;
7592 }
7593
7594
7595
7596
7597
7598
7599
7600 disk_bytenr += backref_offset;
7601 disk_bytenr += offset - key.offset;
7602 if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
7603 goto out;
7604
7605
7606
7607
7608 *len = num_bytes;
7609 ret = 1;
7610out:
7611 btrfs_free_path(path);
7612 return ret;
7613}
7614
7615static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7616 struct extent_state **cached_state, bool writing)
7617{
7618 struct btrfs_ordered_extent *ordered;
7619 int ret = 0;
7620
7621 while (1) {
7622 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7623 cached_state);
7624
7625
7626
7627
7628
7629 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7630 lockend - lockstart + 1);
7631
7632
7633
7634
7635
7636
7637
7638
7639 if (!ordered &&
7640 (!writing || !filemap_range_has_page(inode->i_mapping,
7641 lockstart, lockend)))
7642 break;
7643
7644 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7645 cached_state);
7646
7647 if (ordered) {
7648
7649
7650
7651
7652
7653
7654
7655
7656
7657
7658
7659
7660
7661
7662
7663 if (writing ||
7664 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7665 btrfs_start_ordered_extent(ordered, 1);
7666 else
7667 ret = -ENOTBLK;
7668 btrfs_put_ordered_extent(ordered);
7669 } else {
7670
7671
7672
7673
7674
7675
7676
7677
7678
7679
7680
7681
7682
7683 ret = -ENOTBLK;
7684 }
7685
7686 if (ret)
7687 break;
7688
7689 cond_resched();
7690 }
7691
7692 return ret;
7693}
7694
7695
7696static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
7697 u64 len, u64 orig_start, u64 block_start,
7698 u64 block_len, u64 orig_block_len,
7699 u64 ram_bytes, int compress_type,
7700 int type)
7701{
7702 struct extent_map_tree *em_tree;
7703 struct extent_map *em;
7704 int ret;
7705
7706 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7707 type == BTRFS_ORDERED_COMPRESSED ||
7708 type == BTRFS_ORDERED_NOCOW ||
7709 type == BTRFS_ORDERED_REGULAR);
7710
7711 em_tree = &inode->extent_tree;
7712 em = alloc_extent_map();
7713 if (!em)
7714 return ERR_PTR(-ENOMEM);
7715
7716 em->start = start;
7717 em->orig_start = orig_start;
7718 em->len = len;
7719 em->block_len = block_len;
7720 em->block_start = block_start;
7721 em->orig_block_len = orig_block_len;
7722 em->ram_bytes = ram_bytes;
7723 em->generation = -1;
7724 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7725 if (type == BTRFS_ORDERED_PREALLOC) {
7726 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7727 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7728 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7729 em->compress_type = compress_type;
7730 }
7731
7732 do {
7733 btrfs_drop_extent_cache(inode, em->start,
7734 em->start + em->len - 1, 0);
7735 write_lock(&em_tree->lock);
7736 ret = add_extent_mapping(em_tree, em, 1);
7737 write_unlock(&em_tree->lock);
7738
7739
7740
7741
7742 } while (ret == -EEXIST);
7743
7744 if (ret) {
7745 free_extent_map(em);
7746 return ERR_PTR(ret);
7747 }
7748
7749
7750 return em;
7751}
7752
7753
7754static int btrfs_get_blocks_direct_write(struct extent_map **map,
7755 struct inode *inode,
7756 struct btrfs_dio_data *dio_data,
7757 u64 start, u64 len)
7758{
7759 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7760 struct extent_map *em = *map;
7761 int ret = 0;
7762
7763
7764
7765
7766
7767
7768
7769
7770
7771
7772 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7773 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7774 em->block_start != EXTENT_MAP_HOLE)) {
7775 int type;
7776 u64 block_start, orig_start, orig_block_len, ram_bytes;
7777
7778 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7779 type = BTRFS_ORDERED_PREALLOC;
7780 else
7781 type = BTRFS_ORDERED_NOCOW;
7782 len = min(len, em->len - (start - em->start));
7783 block_start = em->block_start + (start - em->start);
7784
7785 if (can_nocow_extent(inode, start, &len, &orig_start,
7786 &orig_block_len, &ram_bytes, false) == 1 &&
7787 btrfs_inc_nocow_writers(fs_info, block_start)) {
7788 struct extent_map *em2;
7789
7790 em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
7791 orig_start, block_start,
7792 len, orig_block_len,
7793 ram_bytes, type);
7794 btrfs_dec_nocow_writers(fs_info, block_start);
7795 if (type == BTRFS_ORDERED_PREALLOC) {
7796 free_extent_map(em);
7797 *map = em = em2;
7798 }
7799
7800 if (em2 && IS_ERR(em2)) {
7801 ret = PTR_ERR(em2);
7802 goto out;
7803 }
7804
7805
7806
7807
7808
7809 btrfs_free_reserved_data_space_noquota(fs_info, len);
7810 goto skip_cow;
7811 }
7812 }
7813
7814
7815 free_extent_map(em);
7816 *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
7817 if (IS_ERR(em)) {
7818 ret = PTR_ERR(em);
7819 goto out;
7820 }
7821
7822 len = min(len, em->len - (start - em->start));
7823
7824skip_cow:
7825
7826
7827
7828
7829 if (start + len > i_size_read(inode))
7830 i_size_write(inode, start + len);
7831
7832 dio_data->reserve -= len;
7833out:
7834 return ret;
7835}
7836
7837static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
7838 loff_t length, unsigned int flags, struct iomap *iomap,
7839 struct iomap *srcmap)
7840{
7841 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7842 struct extent_map *em;
7843 struct extent_state *cached_state = NULL;
7844 struct btrfs_dio_data *dio_data = NULL;
7845 u64 lockstart, lockend;
7846 const bool write = !!(flags & IOMAP_WRITE);
7847 int ret = 0;
7848 u64 len = length;
7849 bool unlock_extents = false;
7850
7851 if (!write)
7852 len = min_t(u64, len, fs_info->sectorsize);
7853
7854 lockstart = start;
7855 lockend = start + len - 1;
7856
7857
7858
7859
7860
7861
7862
7863 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7864 &BTRFS_I(inode)->runtime_flags)) {
7865 ret = filemap_fdatawrite_range(inode->i_mapping, start,
7866 start + length - 1);
7867 if (ret)
7868 return ret;
7869 }
7870
7871 dio_data = kzalloc(sizeof(*dio_data), GFP_NOFS);
7872 if (!dio_data)
7873 return -ENOMEM;
7874
7875 dio_data->length = length;
7876 if (write) {
7877 dio_data->reserve = round_up(length, fs_info->sectorsize);
7878 ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
7879 &dio_data->data_reserved,
7880 start, dio_data->reserve);
7881 if (ret) {
7882 extent_changeset_free(dio_data->data_reserved);
7883 kfree(dio_data);
7884 return ret;
7885 }
7886 }
7887 iomap->private = dio_data;
7888
7889
7890
7891
7892
7893
7894 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, write)) {
7895 ret = -ENOTBLK;
7896 goto err;
7897 }
7898
7899 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
7900 if (IS_ERR(em)) {
7901 ret = PTR_ERR(em);
7902 goto unlock_err;
7903 }
7904
7905
7906
7907
7908
7909
7910
7911
7912
7913
7914
7915
7916
7917
7918
7919 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7920 em->block_start == EXTENT_MAP_INLINE) {
7921 free_extent_map(em);
7922 ret = -ENOTBLK;
7923 goto unlock_err;
7924 }
7925
7926 len = min(len, em->len - (start - em->start));
7927 if (write) {
7928 ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
7929 start, len);
7930 if (ret < 0)
7931 goto unlock_err;
7932 unlock_extents = true;
7933
7934 len = min(len, em->len - (start - em->start));
7935 } else {
7936
7937
7938
7939
7940 lockstart = start + len;
7941 if (lockstart < lockend)
7942 unlock_extents = true;
7943 }
7944
7945 if (unlock_extents)
7946 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
7947 lockstart, lockend, &cached_state);
7948 else
7949 free_extent_state(cached_state);
7950
7951
7952
7953
7954
7955
7956 if ((em->block_start == EXTENT_MAP_HOLE) ||
7957 (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && !write)) {
7958 iomap->addr = IOMAP_NULL_ADDR;
7959 iomap->type = IOMAP_HOLE;
7960 } else {
7961 iomap->addr = em->block_start + (start - em->start);
7962 iomap->type = IOMAP_MAPPED;
7963 }
7964 iomap->offset = start;
7965 iomap->bdev = fs_info->fs_devices->latest_bdev;
7966 iomap->length = len;
7967
7968 if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
7969 iomap->flags |= IOMAP_F_ZONE_APPEND;
7970
7971 free_extent_map(em);
7972
7973 return 0;
7974
7975unlock_err:
7976 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7977 &cached_state);
7978err:
7979 if (dio_data) {
7980 btrfs_delalloc_release_space(BTRFS_I(inode),
7981 dio_data->data_reserved, start,
7982 dio_data->reserve, true);
7983 btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->reserve);
7984 extent_changeset_free(dio_data->data_reserved);
7985 kfree(dio_data);
7986 }
7987 return ret;
7988}
7989
7990static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
7991 ssize_t written, unsigned int flags, struct iomap *iomap)
7992{
7993 int ret = 0;
7994 struct btrfs_dio_data *dio_data = iomap->private;
7995 size_t submitted = dio_data->submitted;
7996 const bool write = !!(flags & IOMAP_WRITE);
7997
7998 if (!write && (iomap->type == IOMAP_HOLE)) {
7999
8000 unlock_extent(&BTRFS_I(inode)->io_tree, pos, pos + length - 1);
8001 goto out;
8002 }
8003
8004 if (submitted < length) {
8005 pos += submitted;
8006 length -= submitted;
8007 if (write)
8008 __endio_write_update_ordered(BTRFS_I(inode), pos,
8009 length, false);
8010 else
8011 unlock_extent(&BTRFS_I(inode)->io_tree, pos,
8012 pos + length - 1);
8013 ret = -ENOTBLK;
8014 }
8015
8016 if (write) {
8017 if (dio_data->reserve)
8018 btrfs_delalloc_release_space(BTRFS_I(inode),
8019 dio_data->data_reserved, pos,
8020 dio_data->reserve, true);
8021 btrfs_delalloc_release_extents(BTRFS_I(inode), dio_data->length);
8022 extent_changeset_free(dio_data->data_reserved);
8023 }
8024out:
8025 kfree(dio_data);
8026 iomap->private = NULL;
8027
8028 return ret;
8029}
8030
8031static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
8032{
8033
8034
8035
8036
8037 if (!refcount_dec_and_test(&dip->refs))
8038 return;
8039
8040 if (btrfs_op(dip->dio_bio) == BTRFS_MAP_WRITE) {
8041 __endio_write_update_ordered(BTRFS_I(dip->inode),
8042 dip->logical_offset,
8043 dip->bytes,
8044 !dip->dio_bio->bi_status);
8045 } else {
8046 unlock_extent(&BTRFS_I(dip->inode)->io_tree,
8047 dip->logical_offset,
8048 dip->logical_offset + dip->bytes - 1);
8049 }
8050
8051 bio_endio(dip->dio_bio);
8052 kfree(dip);
8053}
8054
8055static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
8056 int mirror_num,
8057 unsigned long bio_flags)
8058{
8059 struct btrfs_dio_private *dip = bio->bi_private;
8060 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8061 blk_status_t ret;
8062
8063 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
8064
8065 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
8066 if (ret)
8067 return ret;
8068
8069 refcount_inc(&dip->refs);
8070 ret = btrfs_map_bio(fs_info, bio, mirror_num);
8071 if (ret)
8072 refcount_dec(&dip->refs);
8073 return ret;
8074}
8075
8076static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
8077 struct btrfs_io_bio *io_bio,
8078 const bool uptodate)
8079{
8080 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
8081 const u32 sectorsize = fs_info->sectorsize;
8082 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
8083 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8084 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
8085 struct bio_vec bvec;
8086 struct bvec_iter iter;
8087 u64 start = io_bio->logical;
8088 u32 bio_offset = 0;
8089 blk_status_t err = BLK_STS_OK;
8090
8091 __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) {
8092 unsigned int i, nr_sectors, pgoff;
8093
8094 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
8095 pgoff = bvec.bv_offset;
8096 for (i = 0; i < nr_sectors; i++) {
8097 ASSERT(pgoff < PAGE_SIZE);
8098 if (uptodate &&
8099 (!csum || !check_data_csum(inode, io_bio,
8100 bio_offset, bvec.bv_page,
8101 pgoff, start))) {
8102 clean_io_failure(fs_info, failure_tree, io_tree,
8103 start, bvec.bv_page,
8104 btrfs_ino(BTRFS_I(inode)),
8105 pgoff);
8106 } else {
8107 int ret;
8108
8109 ASSERT((start - io_bio->logical) < UINT_MAX);
8110 ret = btrfs_repair_one_sector(inode,
8111 &io_bio->bio,
8112 start - io_bio->logical,
8113 bvec.bv_page, pgoff,
8114 start, io_bio->mirror_num,
8115 submit_dio_repair_bio);
8116 if (ret)
8117 err = errno_to_blk_status(ret);
8118 }
8119 start += sectorsize;
8120 ASSERT(bio_offset + sectorsize > bio_offset);
8121 bio_offset += sectorsize;
8122 pgoff += sectorsize;
8123 }
8124 }
8125 return err;
8126}
8127
8128static void __endio_write_update_ordered(struct btrfs_inode *inode,
8129 const u64 offset, const u64 bytes,
8130 const bool uptodate)
8131{
8132 btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes,
8133 finish_ordered_fn, uptodate);
8134}
8135
8136static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
8137 struct bio *bio,
8138 u64 dio_file_offset)
8139{
8140 return btrfs_csum_one_bio(BTRFS_I(inode), bio, dio_file_offset, 1);
8141}
8142
8143static void btrfs_end_dio_bio(struct bio *bio)
8144{
8145 struct btrfs_dio_private *dip = bio->bi_private;
8146 blk_status_t err = bio->bi_status;
8147
8148 if (err)
8149 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8150 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
8151 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
8152 bio->bi_opf, bio->bi_iter.bi_sector,
8153 bio->bi_iter.bi_size, err);
8154
8155 if (bio_op(bio) == REQ_OP_READ) {
8156 err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio),
8157 !err);
8158 }
8159
8160 if (err)
8161 dip->dio_bio->bi_status = err;
8162
8163 btrfs_record_physical_zoned(dip->inode, dip->logical_offset, bio);
8164
8165 bio_put(bio);
8166 btrfs_dio_private_put(dip);
8167}
8168
8169static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
8170 struct inode *inode, u64 file_offset, int async_submit)
8171{
8172 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8173 struct btrfs_dio_private *dip = bio->bi_private;
8174 bool write = btrfs_op(bio) == BTRFS_MAP_WRITE;
8175 blk_status_t ret;
8176
8177
8178 if (async_submit)
8179 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
8180
8181 if (!write) {
8182 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
8183 if (ret)
8184 goto err;
8185 }
8186
8187 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
8188 goto map;
8189
8190 if (write && async_submit) {
8191 ret = btrfs_wq_submit_bio(inode, bio, 0, 0, file_offset,
8192 btrfs_submit_bio_start_direct_io);
8193 goto err;
8194 } else if (write) {
8195
8196
8197
8198
8199 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
8200 if (ret)
8201 goto err;
8202 } else {
8203 u64 csum_offset;
8204
8205 csum_offset = file_offset - dip->logical_offset;
8206 csum_offset >>= fs_info->sectorsize_bits;
8207 csum_offset *= fs_info->csum_size;
8208 btrfs_io_bio(bio)->csum = dip->csums + csum_offset;
8209 }
8210map:
8211 ret = btrfs_map_bio(fs_info, bio, 0);
8212err:
8213 return ret;
8214}
8215
8216
8217
8218
8219
8220static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
8221 struct inode *inode,
8222 loff_t file_offset)
8223{
8224 const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
8225 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
8226 size_t dip_size;
8227 struct btrfs_dio_private *dip;
8228
8229 dip_size = sizeof(*dip);
8230 if (!write && csum) {
8231 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8232 size_t nblocks;
8233
8234 nblocks = dio_bio->bi_iter.bi_size >> fs_info->sectorsize_bits;
8235 dip_size += fs_info->csum_size * nblocks;
8236 }
8237
8238 dip = kzalloc(dip_size, GFP_NOFS);
8239 if (!dip)
8240 return NULL;
8241
8242 dip->inode = inode;
8243 dip->logical_offset = file_offset;
8244 dip->bytes = dio_bio->bi_iter.bi_size;
8245 dip->disk_bytenr = dio_bio->bi_iter.bi_sector << 9;
8246 dip->dio_bio = dio_bio;
8247 refcount_set(&dip->refs, 1);
8248 return dip;
8249}
8250
8251static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
8252 struct bio *dio_bio, loff_t file_offset)
8253{
8254 struct inode *inode = iter->inode;
8255 const bool write = (btrfs_op(dio_bio) == BTRFS_MAP_WRITE);
8256 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8257 const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
8258 BTRFS_BLOCK_GROUP_RAID56_MASK);
8259 struct btrfs_dio_private *dip;
8260 struct bio *bio;
8261 u64 start_sector;
8262 int async_submit = 0;
8263 u64 submit_len;
8264 u64 clone_offset = 0;
8265 u64 clone_len;
8266 u64 logical;
8267 int ret;
8268 blk_status_t status;
8269 struct btrfs_io_geometry geom;
8270 struct btrfs_dio_data *dio_data = iter->iomap.private;
8271 struct extent_map *em = NULL;
8272
8273 dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
8274 if (!dip) {
8275 if (!write) {
8276 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8277 file_offset + dio_bio->bi_iter.bi_size - 1);
8278 }
8279 dio_bio->bi_status = BLK_STS_RESOURCE;
8280 bio_endio(dio_bio);
8281 return BLK_QC_T_NONE;
8282 }
8283
8284 if (!write) {
8285
8286
8287
8288
8289
8290
8291 status = btrfs_lookup_bio_sums(inode, dio_bio, dip->csums);
8292 if (status != BLK_STS_OK)
8293 goto out_err;
8294 }
8295
8296 start_sector = dio_bio->bi_iter.bi_sector;
8297 submit_len = dio_bio->bi_iter.bi_size;
8298
8299 do {
8300 logical = start_sector << 9;
8301 em = btrfs_get_chunk_map(fs_info, logical, submit_len);
8302 if (IS_ERR(em)) {
8303 status = errno_to_blk_status(PTR_ERR(em));
8304 em = NULL;
8305 goto out_err_em;
8306 }
8307 ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
8308 logical, &geom);
8309 if (ret) {
8310 status = errno_to_blk_status(ret);
8311 goto out_err_em;
8312 }
8313
8314 clone_len = min(submit_len, geom.len);
8315 ASSERT(clone_len <= UINT_MAX);
8316
8317
8318
8319
8320
8321 bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
8322 bio->bi_private = dip;
8323 bio->bi_end_io = btrfs_end_dio_bio;
8324 btrfs_io_bio(bio)->logical = file_offset;
8325
8326 if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
8327 status = extract_ordered_extent(BTRFS_I(inode), bio,
8328 file_offset);
8329 if (status) {
8330 bio_put(bio);
8331 goto out_err;
8332 }
8333 }
8334
8335 ASSERT(submit_len >= clone_len);
8336 submit_len -= clone_len;
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347 if (submit_len > 0) {
8348 refcount_inc(&dip->refs);
8349
8350
8351
8352
8353
8354
8355 if (!raid56)
8356 async_submit = 1;
8357 }
8358
8359 status = btrfs_submit_dio_bio(bio, inode, file_offset,
8360 async_submit);
8361 if (status) {
8362 bio_put(bio);
8363 if (submit_len > 0)
8364 refcount_dec(&dip->refs);
8365 goto out_err_em;
8366 }
8367
8368 dio_data->submitted += clone_len;
8369 clone_offset += clone_len;
8370 start_sector += clone_len >> 9;
8371 file_offset += clone_len;
8372
8373 free_extent_map(em);
8374 } while (submit_len > 0);
8375 return BLK_QC_T_NONE;
8376
8377out_err_em:
8378 free_extent_map(em);
8379out_err:
8380 dip->dio_bio->bi_status = status;
8381 btrfs_dio_private_put(dip);
8382
8383 return BLK_QC_T_NONE;
8384}
8385
8386const struct iomap_ops btrfs_dio_iomap_ops = {
8387 .iomap_begin = btrfs_dio_iomap_begin,
8388 .iomap_end = btrfs_dio_iomap_end,
8389};
8390
8391const struct iomap_dio_ops btrfs_dio_ops = {
8392 .submit_io = btrfs_submit_direct,
8393};
8394
8395static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8396 u64 start, u64 len)
8397{
8398 int ret;
8399
8400 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
8401 if (ret)
8402 return ret;
8403
8404 return extent_fiemap(BTRFS_I(inode), fieinfo, start, len);
8405}
8406
8407int btrfs_readpage(struct file *file, struct page *page)
8408{
8409 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
8410 u64 start = page_offset(page);
8411 u64 end = start + PAGE_SIZE - 1;
8412 struct btrfs_bio_ctrl bio_ctrl = { 0 };
8413 int ret;
8414
8415 btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
8416
8417 ret = btrfs_do_readpage(page, NULL, &bio_ctrl, 0, NULL);
8418 if (bio_ctrl.bio)
8419 ret = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
8420 return ret;
8421}
8422
8423static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8424{
8425 struct inode *inode = page->mapping->host;
8426 int ret;
8427
8428 if (current->flags & PF_MEMALLOC) {
8429 redirty_page_for_writepage(wbc, page);
8430 unlock_page(page);
8431 return 0;
8432 }
8433
8434
8435
8436
8437
8438
8439 if (!igrab(inode)) {
8440 redirty_page_for_writepage(wbc, page);
8441 return AOP_WRITEPAGE_ACTIVATE;
8442 }
8443 ret = extent_write_full_page(page, wbc);
8444 btrfs_add_delayed_iput(inode);
8445 return ret;
8446}
8447
8448static int btrfs_writepages(struct address_space *mapping,
8449 struct writeback_control *wbc)
8450{
8451 return extent_writepages(mapping, wbc);
8452}
8453
8454static void btrfs_readahead(struct readahead_control *rac)
8455{
8456 extent_readahead(rac);
8457}
8458
8459
8460
8461
8462
8463
8464
8465
8466static void wait_subpage_spinlock(struct page *page)
8467{
8468 struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
8469 struct btrfs_subpage *subpage;
8470
8471 if (fs_info->sectorsize == PAGE_SIZE)
8472 return;
8473
8474 ASSERT(PagePrivate(page) && page->private);
8475 subpage = (struct btrfs_subpage *)page->private;
8476
8477
8478
8479
8480
8481
8482
8483
8484
8485
8486
8487
8488 spin_lock_irq(&subpage->lock);
8489 spin_unlock_irq(&subpage->lock);
8490}
8491
8492static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8493{
8494 int ret = try_release_extent_mapping(page, gfp_flags);
8495
8496 if (ret == 1) {
8497 wait_subpage_spinlock(page);
8498 clear_page_extent_mapped(page);
8499 }
8500 return ret;
8501}
8502
8503static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8504{
8505 if (PageWriteback(page) || PageDirty(page))
8506 return 0;
8507 return __btrfs_releasepage(page, gfp_flags);
8508}
8509
8510#ifdef CONFIG_MIGRATION
8511static int btrfs_migratepage(struct address_space *mapping,
8512 struct page *newpage, struct page *page,
8513 enum migrate_mode mode)
8514{
8515 int ret;
8516
8517 ret = migrate_page_move_mapping(mapping, newpage, page, 0);
8518 if (ret != MIGRATEPAGE_SUCCESS)
8519 return ret;
8520
8521 if (page_has_private(page))
8522 attach_page_private(newpage, detach_page_private(page));
8523
8524 if (PageOrdered(page)) {
8525 ClearPageOrdered(page);
8526 SetPageOrdered(newpage);
8527 }
8528
8529 if (mode != MIGRATE_SYNC_NO_COPY)
8530 migrate_page_copy(newpage, page);
8531 else
8532 migrate_page_states(newpage, page);
8533 return MIGRATEPAGE_SUCCESS;
8534}
8535#endif
8536
8537static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8538 unsigned int length)
8539{
8540 struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
8541 struct btrfs_fs_info *fs_info = inode->root->fs_info;
8542 struct extent_io_tree *tree = &inode->io_tree;
8543 struct extent_state *cached_state = NULL;
8544 u64 page_start = page_offset(page);
8545 u64 page_end = page_start + PAGE_SIZE - 1;
8546 u64 cur;
8547 int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
8548
8549
8550
8551
8552
8553
8554
8555
8556
8557
8558
8559
8560
8561
8562 wait_on_page_writeback(page);
8563 wait_subpage_spinlock(page);
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573
8574
8575
8576
8577 if (!(offset == 0 && length == PAGE_SIZE)) {
8578 btrfs_releasepage(page, GFP_NOFS);
8579 return;
8580 }
8581
8582 if (!inode_evicting)
8583 lock_extent_bits(tree, page_start, page_end, &cached_state);
8584
8585 cur = page_start;
8586 while (cur < page_end) {
8587 struct btrfs_ordered_extent *ordered;
8588 bool delete_states;
8589 u64 range_end;
8590 u32 range_len;
8591
8592 ordered = btrfs_lookup_first_ordered_range(inode, cur,
8593 page_end + 1 - cur);
8594 if (!ordered) {
8595 range_end = page_end;
8596
8597
8598
8599
8600 delete_states = true;
8601 goto next;
8602 }
8603 if (ordered->file_offset > cur) {
8604
8605
8606
8607
8608
8609
8610 range_end = ordered->file_offset - 1;
8611 delete_states = true;
8612 goto next;
8613 }
8614
8615 range_end = min(ordered->file_offset + ordered->num_bytes - 1,
8616 page_end);
8617 ASSERT(range_end + 1 - cur < U32_MAX);
8618 range_len = range_end + 1 - cur;
8619 if (!btrfs_page_test_ordered(fs_info, page, cur, range_len)) {
8620
8621
8622
8623
8624
8625
8626 delete_states = false;
8627 goto next;
8628 }
8629 btrfs_page_clear_ordered(fs_info, page, cur, range_len);
8630
8631
8632
8633
8634
8635
8636
8637
8638
8639 if (!inode_evicting)
8640 clear_extent_bit(tree, cur, range_end,
8641 EXTENT_DELALLOC |
8642 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8643 EXTENT_DEFRAG, 1, 0, &cached_state);
8644
8645 spin_lock_irq(&inode->ordered_tree.lock);
8646 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8647 ordered->truncated_len = min(ordered->truncated_len,
8648 cur - ordered->file_offset);
8649 spin_unlock_irq(&inode->ordered_tree.lock);
8650
8651 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8652 cur, range_end + 1 - cur)) {
8653 btrfs_finish_ordered_io(ordered);
8654
8655
8656
8657
8658 delete_states = true;
8659 } else {
8660
8661
8662
8663
8664
8665 delete_states = false;
8666 }
8667next:
8668 if (ordered)
8669 btrfs_put_ordered_extent(ordered);
8670
8671
8672
8673
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683
8684
8685 btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
8686 if (!inode_evicting) {
8687 clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
8688 EXTENT_DELALLOC | EXTENT_UPTODATE |
8689 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
8690 delete_states, &cached_state);
8691 }
8692 cur = range_end + 1;
8693 }
8694
8695
8696
8697
8698
8699 ASSERT(!PageOrdered(page));
8700 if (!inode_evicting)
8701 __btrfs_releasepage(page, GFP_NOFS);
8702 ClearPageChecked(page);
8703 clear_page_extent_mapped(page);
8704}
8705
8706
8707
8708
8709
8710
8711
8712
8713
8714
8715
8716
8717
8718
8719
8720
8721vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8722{
8723 struct page *page = vmf->page;
8724 struct inode *inode = file_inode(vmf->vma->vm_file);
8725 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8726 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8727 struct btrfs_ordered_extent *ordered;
8728 struct extent_state *cached_state = NULL;
8729 struct extent_changeset *data_reserved = NULL;
8730 unsigned long zero_start;
8731 loff_t size;
8732 vm_fault_t ret;
8733 int ret2;
8734 int reserved = 0;
8735 u64 reserved_space;
8736 u64 page_start;
8737 u64 page_end;
8738 u64 end;
8739
8740 reserved_space = PAGE_SIZE;
8741
8742 sb_start_pagefault(inode->i_sb);
8743 page_start = page_offset(page);
8744 page_end = page_start + PAGE_SIZE - 1;
8745 end = page_end;
8746
8747
8748
8749
8750
8751
8752
8753
8754
8755 ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
8756 page_start, reserved_space);
8757 if (!ret2) {
8758 ret2 = file_update_time(vmf->vma->vm_file);
8759 reserved = 1;
8760 }
8761 if (ret2) {
8762 ret = vmf_error(ret2);
8763 if (reserved)
8764 goto out;
8765 goto out_noreserve;
8766 }
8767
8768 ret = VM_FAULT_NOPAGE;
8769again:
8770 down_read(&BTRFS_I(inode)->i_mmap_lock);
8771 lock_page(page);
8772 size = i_size_read(inode);
8773
8774 if ((page->mapping != inode->i_mapping) ||
8775 (page_start >= size)) {
8776
8777 goto out_unlock;
8778 }
8779 wait_on_page_writeback(page);
8780
8781 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8782 ret2 = set_page_extent_mapped(page);
8783 if (ret2 < 0) {
8784 ret = vmf_error(ret2);
8785 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8786 goto out_unlock;
8787 }
8788
8789
8790
8791
8792
8793 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8794 PAGE_SIZE);
8795 if (ordered) {
8796 unlock_extent_cached(io_tree, page_start, page_end,
8797 &cached_state);
8798 unlock_page(page);
8799 up_read(&BTRFS_I(inode)->i_mmap_lock);
8800 btrfs_start_ordered_extent(ordered, 1);
8801 btrfs_put_ordered_extent(ordered);
8802 goto again;
8803 }
8804
8805 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8806 reserved_space = round_up(size - page_start,
8807 fs_info->sectorsize);
8808 if (reserved_space < PAGE_SIZE) {
8809 end = page_start + reserved_space - 1;
8810 btrfs_delalloc_release_space(BTRFS_I(inode),
8811 data_reserved, page_start,
8812 PAGE_SIZE - reserved_space, true);
8813 }
8814 }
8815
8816
8817
8818
8819
8820
8821
8822
8823 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8824 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8825 EXTENT_DEFRAG, 0, 0, &cached_state);
8826
8827 ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
8828 &cached_state);
8829 if (ret2) {
8830 unlock_extent_cached(io_tree, page_start, page_end,
8831 &cached_state);
8832 ret = VM_FAULT_SIGBUS;
8833 goto out_unlock;
8834 }
8835
8836
8837 if (page_start + PAGE_SIZE > size)
8838 zero_start = offset_in_page(size);
8839 else
8840 zero_start = PAGE_SIZE;
8841
8842 if (zero_start != PAGE_SIZE) {
8843 memzero_page(page, zero_start, PAGE_SIZE - zero_start);
8844 flush_dcache_page(page);
8845 }
8846 ClearPageChecked(page);
8847 btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start);
8848 btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start);
8849
8850 btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
8851
8852 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8853 up_read(&BTRFS_I(inode)->i_mmap_lock);
8854
8855 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8856 sb_end_pagefault(inode->i_sb);
8857 extent_changeset_free(data_reserved);
8858 return VM_FAULT_LOCKED;
8859
8860out_unlock:
8861 unlock_page(page);
8862 up_read(&BTRFS_I(inode)->i_mmap_lock);
8863out:
8864 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8865 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
8866 reserved_space, (ret != 0));
8867out_noreserve:
8868 sb_end_pagefault(inode->i_sb);
8869 extent_changeset_free(data_reserved);
8870 return ret;
8871}
8872
8873static int btrfs_truncate(struct inode *inode, bool skip_writeback)
8874{
8875 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8876 struct btrfs_root *root = BTRFS_I(inode)->root;
8877 struct btrfs_block_rsv *rsv;
8878 int ret;
8879 struct btrfs_trans_handle *trans;
8880 u64 mask = fs_info->sectorsize - 1;
8881 u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
8882 u64 extents_found = 0;
8883
8884 if (!skip_writeback) {
8885 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
8886 (u64)-1);
8887 if (ret)
8888 return ret;
8889 }
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8912
8913
8914
8915
8916
8917
8918
8919 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
8920 if (!rsv)
8921 return -ENOMEM;
8922 rsv->size = min_size;
8923 rsv->failfast = 1;
8924
8925
8926
8927
8928
8929 trans = btrfs_start_transaction(root, 2);
8930 if (IS_ERR(trans)) {
8931 ret = PTR_ERR(trans);
8932 goto out;
8933 }
8934
8935
8936 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
8937 min_size, false);
8938 BUG_ON(ret);
8939
8940 trans->block_rsv = rsv;
8941
8942 while (1) {
8943 ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
8944 inode->i_size,
8945 BTRFS_EXTENT_DATA_KEY,
8946 &extents_found);
8947 trans->block_rsv = &fs_info->trans_block_rsv;
8948 if (ret != -ENOSPC && ret != -EAGAIN)
8949 break;
8950
8951 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
8952 if (ret)
8953 break;
8954
8955 btrfs_end_transaction(trans);
8956 btrfs_btree_balance_dirty(fs_info);
8957
8958 trans = btrfs_start_transaction(root, 2);
8959 if (IS_ERR(trans)) {
8960 ret = PTR_ERR(trans);
8961 trans = NULL;
8962 break;
8963 }
8964
8965 btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
8966 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
8967 rsv, min_size, false);
8968 BUG_ON(ret);
8969 trans->block_rsv = rsv;
8970 }
8971
8972
8973
8974
8975
8976
8977
8978 if (ret == NEED_TRUNCATE_BLOCK) {
8979 btrfs_end_transaction(trans);
8980 btrfs_btree_balance_dirty(fs_info);
8981
8982 ret = btrfs_truncate_block(BTRFS_I(inode), inode->i_size, 0, 0);
8983 if (ret)
8984 goto out;
8985 trans = btrfs_start_transaction(root, 1);
8986 if (IS_ERR(trans)) {
8987 ret = PTR_ERR(trans);
8988 goto out;
8989 }
8990 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
8991 }
8992
8993 if (trans) {
8994 int ret2;
8995
8996 trans->block_rsv = &fs_info->trans_block_rsv;
8997 ret2 = btrfs_update_inode(trans, root, BTRFS_I(inode));
8998 if (ret2 && !ret)
8999 ret = ret2;
9000
9001 ret2 = btrfs_end_transaction(trans);
9002 if (ret2 && !ret)
9003 ret = ret2;
9004 btrfs_btree_balance_dirty(fs_info);
9005 }
9006out:
9007 btrfs_free_block_rsv(fs_info, rsv);
9008
9009
9010
9011
9012
9013
9014
9015
9016
9017
9018
9019
9020
9021
9022 if (extents_found > 0)
9023 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
9024
9025 return ret;
9026}
9027
9028
9029
9030
9031int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
9032 struct btrfs_root *new_root,
9033 struct btrfs_root *parent_root,
9034 struct user_namespace *mnt_userns)
9035{
9036 struct inode *inode;
9037 int err;
9038 u64 index = 0;
9039 u64 ino;
9040
9041 err = btrfs_get_free_objectid(new_root, &ino);
9042 if (err < 0)
9043 return err;
9044
9045 inode = btrfs_new_inode(trans, new_root, mnt_userns, NULL, "..", 2,
9046 ino, ino,
9047 S_IFDIR | (~current_umask() & S_IRWXUGO),
9048 &index);
9049 if (IS_ERR(inode))
9050 return PTR_ERR(inode);
9051 inode->i_op = &btrfs_dir_inode_operations;
9052 inode->i_fop = &btrfs_dir_file_operations;
9053
9054 set_nlink(inode, 1);
9055 btrfs_i_size_write(BTRFS_I(inode), 0);
9056 unlock_new_inode(inode);
9057
9058 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
9059 if (err)
9060 btrfs_err(new_root->fs_info,
9061 "error inheriting subvolume %llu properties: %d",
9062 new_root->root_key.objectid, err);
9063
9064 err = btrfs_update_inode(trans, new_root, BTRFS_I(inode));
9065
9066 iput(inode);
9067 return err;
9068}
9069
9070struct inode *btrfs_alloc_inode(struct super_block *sb)
9071{
9072 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
9073 struct btrfs_inode *ei;
9074 struct inode *inode;
9075
9076 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
9077 if (!ei)
9078 return NULL;
9079
9080 ei->root = NULL;
9081 ei->generation = 0;
9082 ei->last_trans = 0;
9083 ei->last_sub_trans = 0;
9084 ei->logged_trans = 0;
9085 ei->delalloc_bytes = 0;
9086 ei->new_delalloc_bytes = 0;
9087 ei->defrag_bytes = 0;
9088 ei->disk_i_size = 0;
9089 ei->flags = 0;
9090 ei->ro_flags = 0;
9091 ei->csum_bytes = 0;
9092 ei->index_cnt = (u64)-1;
9093 ei->dir_index = 0;
9094 ei->last_unlink_trans = 0;
9095 ei->last_reflink_trans = 0;
9096 ei->last_log_commit = 0;
9097
9098 spin_lock_init(&ei->lock);
9099 ei->outstanding_extents = 0;
9100 if (sb->s_magic != BTRFS_TEST_MAGIC)
9101 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
9102 BTRFS_BLOCK_RSV_DELALLOC);
9103 ei->runtime_flags = 0;
9104 ei->prop_compress = BTRFS_COMPRESS_NONE;
9105 ei->defrag_compress = BTRFS_COMPRESS_NONE;
9106
9107 ei->delayed_node = NULL;
9108
9109 ei->i_otime.tv_sec = 0;
9110 ei->i_otime.tv_nsec = 0;
9111
9112 inode = &ei->vfs_inode;
9113 extent_map_tree_init(&ei->extent_tree);
9114 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
9115 extent_io_tree_init(fs_info, &ei->io_failure_tree,
9116 IO_TREE_INODE_IO_FAILURE, inode);
9117 extent_io_tree_init(fs_info, &ei->file_extent_tree,
9118 IO_TREE_INODE_FILE_EXTENT, inode);
9119 ei->io_tree.track_uptodate = true;
9120 ei->io_failure_tree.track_uptodate = true;
9121 atomic_set(&ei->sync_writers, 0);
9122 mutex_init(&ei->log_mutex);
9123 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
9124 INIT_LIST_HEAD(&ei->delalloc_inodes);
9125 INIT_LIST_HEAD(&ei->delayed_iput);
9126 RB_CLEAR_NODE(&ei->rb_node);
9127 init_rwsem(&ei->i_mmap_lock);
9128
9129 return inode;
9130}
9131
9132#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
9133void btrfs_test_destroy_inode(struct inode *inode)
9134{
9135 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9136 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9137}
9138#endif
9139
9140void btrfs_free_inode(struct inode *inode)
9141{
9142 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9143}
9144
9145void btrfs_destroy_inode(struct inode *vfs_inode)
9146{
9147 struct btrfs_ordered_extent *ordered;
9148 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
9149 struct btrfs_root *root = inode->root;
9150
9151 WARN_ON(!hlist_empty(&vfs_inode->i_dentry));
9152 WARN_ON(vfs_inode->i_data.nrpages);
9153 WARN_ON(inode->block_rsv.reserved);
9154 WARN_ON(inode->block_rsv.size);
9155 WARN_ON(inode->outstanding_extents);
9156 WARN_ON(inode->delalloc_bytes);
9157 WARN_ON(inode->new_delalloc_bytes);
9158 WARN_ON(inode->csum_bytes);
9159 WARN_ON(inode->defrag_bytes);
9160
9161
9162
9163
9164
9165
9166 if (!root)
9167 return;
9168
9169 while (1) {
9170 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
9171 if (!ordered)
9172 break;
9173 else {
9174 btrfs_err(root->fs_info,
9175 "found ordered extent %llu %llu on inode cleanup",
9176 ordered->file_offset, ordered->num_bytes);
9177 btrfs_remove_ordered_extent(inode, ordered);
9178 btrfs_put_ordered_extent(ordered);
9179 btrfs_put_ordered_extent(ordered);
9180 }
9181 }
9182 btrfs_qgroup_check_reserved_leak(inode);
9183 inode_tree_del(inode);
9184 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
9185 btrfs_inode_clear_file_extent_range(inode, 0, (u64)-1);
9186 btrfs_put_root(inode->root);
9187}
9188
9189int btrfs_drop_inode(struct inode *inode)
9190{
9191 struct btrfs_root *root = BTRFS_I(inode)->root;
9192
9193 if (root == NULL)
9194 return 1;
9195
9196
9197 if (btrfs_root_refs(&root->root_item) == 0)
9198 return 1;
9199 else
9200 return generic_drop_inode(inode);
9201}
9202
9203static void init_once(void *foo)
9204{
9205 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
9206
9207 inode_init_once(&ei->vfs_inode);
9208}
9209
9210void __cold btrfs_destroy_cachep(void)
9211{
9212
9213
9214
9215
9216 rcu_barrier();
9217 kmem_cache_destroy(btrfs_inode_cachep);
9218 kmem_cache_destroy(btrfs_trans_handle_cachep);
9219 kmem_cache_destroy(btrfs_path_cachep);
9220 kmem_cache_destroy(btrfs_free_space_cachep);
9221 kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
9222}
9223
9224int __init btrfs_init_cachep(void)
9225{
9226 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9227 sizeof(struct btrfs_inode), 0,
9228 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
9229 init_once);
9230 if (!btrfs_inode_cachep)
9231 goto fail;
9232
9233 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9234 sizeof(struct btrfs_trans_handle), 0,
9235 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9236 if (!btrfs_trans_handle_cachep)
9237 goto fail;
9238
9239 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9240 sizeof(struct btrfs_path), 0,
9241 SLAB_MEM_SPREAD, NULL);
9242 if (!btrfs_path_cachep)
9243 goto fail;
9244
9245 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9246 sizeof(struct btrfs_free_space), 0,
9247 SLAB_MEM_SPREAD, NULL);
9248 if (!btrfs_free_space_cachep)
9249 goto fail;
9250
9251 btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
9252 PAGE_SIZE, PAGE_SIZE,
9253 SLAB_MEM_SPREAD, NULL);
9254 if (!btrfs_free_space_bitmap_cachep)
9255 goto fail;
9256
9257 return 0;
9258fail:
9259 btrfs_destroy_cachep();
9260 return -ENOMEM;
9261}
9262
9263static int btrfs_getattr(struct user_namespace *mnt_userns,
9264 const struct path *path, struct kstat *stat,
9265 u32 request_mask, unsigned int flags)
9266{
9267 u64 delalloc_bytes;
9268 u64 inode_bytes;
9269 struct inode *inode = d_inode(path->dentry);
9270 u32 blocksize = inode->i_sb->s_blocksize;
9271 u32 bi_flags = BTRFS_I(inode)->flags;
9272 u32 bi_ro_flags = BTRFS_I(inode)->ro_flags;
9273
9274 stat->result_mask |= STATX_BTIME;
9275 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
9276 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
9277 if (bi_flags & BTRFS_INODE_APPEND)
9278 stat->attributes |= STATX_ATTR_APPEND;
9279 if (bi_flags & BTRFS_INODE_COMPRESS)
9280 stat->attributes |= STATX_ATTR_COMPRESSED;
9281 if (bi_flags & BTRFS_INODE_IMMUTABLE)
9282 stat->attributes |= STATX_ATTR_IMMUTABLE;
9283 if (bi_flags & BTRFS_INODE_NODUMP)
9284 stat->attributes |= STATX_ATTR_NODUMP;
9285 if (bi_ro_flags & BTRFS_INODE_RO_VERITY)
9286 stat->attributes |= STATX_ATTR_VERITY;
9287
9288 stat->attributes_mask |= (STATX_ATTR_APPEND |
9289 STATX_ATTR_COMPRESSED |
9290 STATX_ATTR_IMMUTABLE |
9291 STATX_ATTR_NODUMP);
9292
9293 generic_fillattr(mnt_userns, inode, stat);
9294 stat->dev = BTRFS_I(inode)->root->anon_dev;
9295
9296 spin_lock(&BTRFS_I(inode)->lock);
9297 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9298 inode_bytes = inode_get_bytes(inode);
9299 spin_unlock(&BTRFS_I(inode)->lock);
9300 stat->blocks = (ALIGN(inode_bytes, blocksize) +
9301 ALIGN(delalloc_bytes, blocksize)) >> 9;
9302 return 0;
9303}
9304
9305static int btrfs_rename_exchange(struct inode *old_dir,
9306 struct dentry *old_dentry,
9307 struct inode *new_dir,
9308 struct dentry *new_dentry)
9309{
9310 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9311 struct btrfs_trans_handle *trans;
9312 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9313 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9314 struct inode *new_inode = new_dentry->d_inode;
9315 struct inode *old_inode = old_dentry->d_inode;
9316 struct timespec64 ctime = current_time(old_inode);
9317 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9318 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
9319 u64 old_idx = 0;
9320 u64 new_idx = 0;
9321 int ret;
9322 int ret2;
9323 bool root_log_pinned = false;
9324 bool dest_log_pinned = false;
9325 bool need_abort = false;
9326
9327
9328
9329
9330
9331
9332 if (root != dest &&
9333 (old_ino != BTRFS_FIRST_FREE_OBJECTID ||
9334 new_ino != BTRFS_FIRST_FREE_OBJECTID))
9335 return -EXDEV;
9336
9337
9338 if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
9339 new_ino == BTRFS_FIRST_FREE_OBJECTID)
9340 down_read(&fs_info->subvol_sem);
9341
9342
9343
9344
9345
9346
9347
9348
9349
9350 trans = btrfs_start_transaction(root, 12);
9351 if (IS_ERR(trans)) {
9352 ret = PTR_ERR(trans);
9353 goto out_notrans;
9354 }
9355
9356 if (dest != root) {
9357 ret = btrfs_record_root_in_trans(trans, dest);
9358 if (ret)
9359 goto out_fail;
9360 }
9361
9362
9363
9364
9365
9366 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
9367 if (ret)
9368 goto out_fail;
9369 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
9370 if (ret)
9371 goto out_fail;
9372
9373 BTRFS_I(old_inode)->dir_index = 0ULL;
9374 BTRFS_I(new_inode)->dir_index = 0ULL;
9375
9376
9377 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9378
9379 btrfs_set_log_full_commit(trans);
9380 } else {
9381 ret = btrfs_insert_inode_ref(trans, dest,
9382 new_dentry->d_name.name,
9383 new_dentry->d_name.len,
9384 old_ino,
9385 btrfs_ino(BTRFS_I(new_dir)),
9386 old_idx);
9387 if (ret)
9388 goto out_fail;
9389 need_abort = true;
9390 }
9391
9392
9393 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9394
9395 btrfs_set_log_full_commit(trans);
9396 } else {
9397 ret = btrfs_insert_inode_ref(trans, root,
9398 old_dentry->d_name.name,
9399 old_dentry->d_name.len,
9400 new_ino,
9401 btrfs_ino(BTRFS_I(old_dir)),
9402 new_idx);
9403 if (ret) {
9404 if (need_abort)
9405 btrfs_abort_transaction(trans, ret);
9406 goto out_fail;
9407 }
9408 }
9409
9410
9411 inode_inc_iversion(old_dir);
9412 inode_inc_iversion(new_dir);
9413 inode_inc_iversion(old_inode);
9414 inode_inc_iversion(new_inode);
9415 old_dir->i_ctime = old_dir->i_mtime = ctime;
9416 new_dir->i_ctime = new_dir->i_mtime = ctime;
9417 old_inode->i_ctime = ctime;
9418 new_inode->i_ctime = ctime;
9419
9420 if (old_dentry->d_parent != new_dentry->d_parent) {
9421 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9422 BTRFS_I(old_inode), 1);
9423 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
9424 BTRFS_I(new_inode), 1);
9425 }
9426
9427
9428
9429
9430
9431
9432
9433
9434
9435
9436
9437
9438
9439
9440
9441 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
9442 btrfs_pin_log_trans(root);
9443 root_log_pinned = true;
9444 }
9445 if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
9446 btrfs_pin_log_trans(dest);
9447 dest_log_pinned = true;
9448 }
9449
9450
9451 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9452 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9453 } else {
9454 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9455 BTRFS_I(old_dentry->d_inode),
9456 old_dentry->d_name.name,
9457 old_dentry->d_name.len);
9458 if (!ret)
9459 ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
9460 }
9461 if (ret) {
9462 btrfs_abort_transaction(trans, ret);
9463 goto out_fail;
9464 }
9465
9466
9467 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9468 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9469 } else {
9470 ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9471 BTRFS_I(new_dentry->d_inode),
9472 new_dentry->d_name.name,
9473 new_dentry->d_name.len);
9474 if (!ret)
9475 ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
9476 }
9477 if (ret) {
9478 btrfs_abort_transaction(trans, ret);
9479 goto out_fail;
9480 }
9481
9482 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9483 new_dentry->d_name.name,
9484 new_dentry->d_name.len, 0, old_idx);
9485 if (ret) {
9486 btrfs_abort_transaction(trans, ret);
9487 goto out_fail;
9488 }
9489
9490 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
9491 old_dentry->d_name.name,
9492 old_dentry->d_name.len, 0, new_idx);
9493 if (ret) {
9494 btrfs_abort_transaction(trans, ret);
9495 goto out_fail;
9496 }
9497
9498 if (old_inode->i_nlink == 1)
9499 BTRFS_I(old_inode)->dir_index = old_idx;
9500 if (new_inode->i_nlink == 1)
9501 BTRFS_I(new_inode)->dir_index = new_idx;
9502
9503 if (root_log_pinned) {
9504 btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
9505 new_dentry->d_parent);
9506 btrfs_end_log_trans(root);
9507 root_log_pinned = false;
9508 }
9509 if (dest_log_pinned) {
9510 btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
9511 old_dentry->d_parent);
9512 btrfs_end_log_trans(dest);
9513 dest_log_pinned = false;
9514 }
9515out_fail:
9516
9517
9518
9519
9520
9521
9522
9523
9524
9525
9526
9527 if (ret && (root_log_pinned || dest_log_pinned)) {
9528 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9529 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9530 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9531 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation))
9532 btrfs_set_log_full_commit(trans);
9533
9534 if (root_log_pinned) {
9535 btrfs_end_log_trans(root);
9536 root_log_pinned = false;
9537 }
9538 if (dest_log_pinned) {
9539 btrfs_end_log_trans(dest);
9540 dest_log_pinned = false;
9541 }
9542 }
9543 ret2 = btrfs_end_transaction(trans);
9544 ret = ret ? ret : ret2;
9545out_notrans:
9546 if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
9547 old_ino == BTRFS_FIRST_FREE_OBJECTID)
9548 up_read(&fs_info->subvol_sem);
9549
9550 return ret;
9551}
9552
9553static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9554 struct btrfs_root *root,
9555 struct user_namespace *mnt_userns,
9556 struct inode *dir,
9557 struct dentry *dentry)
9558{
9559 int ret;
9560 struct inode *inode;
9561 u64 objectid;
9562 u64 index;
9563
9564 ret = btrfs_get_free_objectid(root, &objectid);
9565 if (ret)
9566 return ret;
9567
9568 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
9569 dentry->d_name.name,
9570 dentry->d_name.len,
9571 btrfs_ino(BTRFS_I(dir)),
9572 objectid,
9573 S_IFCHR | WHITEOUT_MODE,
9574 &index);
9575
9576 if (IS_ERR(inode)) {
9577 ret = PTR_ERR(inode);
9578 return ret;
9579 }
9580
9581 inode->i_op = &btrfs_special_inode_operations;
9582 init_special_inode(inode, inode->i_mode,
9583 WHITEOUT_DEV);
9584
9585 ret = btrfs_init_inode_security(trans, inode, dir,
9586 &dentry->d_name);
9587 if (ret)
9588 goto out;
9589
9590 ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9591 BTRFS_I(inode), 0, index);
9592 if (ret)
9593 goto out;
9594
9595 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
9596out:
9597 unlock_new_inode(inode);
9598 if (ret)
9599 inode_dec_link_count(inode);
9600 iput(inode);
9601
9602 return ret;
9603}
9604
9605static int btrfs_rename(struct user_namespace *mnt_userns,
9606 struct inode *old_dir, struct dentry *old_dentry,
9607 struct inode *new_dir, struct dentry *new_dentry,
9608 unsigned int flags)
9609{
9610 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9611 struct btrfs_trans_handle *trans;
9612 unsigned int trans_num_items;
9613 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9614 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9615 struct inode *new_inode = d_inode(new_dentry);
9616 struct inode *old_inode = d_inode(old_dentry);
9617 u64 index = 0;
9618 int ret;
9619 int ret2;
9620 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9621 bool log_pinned = false;
9622
9623 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9624 return -EPERM;
9625
9626
9627 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9628 return -EXDEV;
9629
9630 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9631 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9632 return -ENOTEMPTY;
9633
9634 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9635 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9636 return -ENOTEMPTY;
9637
9638
9639
9640 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9641 new_dentry->d_name.name,
9642 new_dentry->d_name.len);
9643
9644 if (ret) {
9645 if (ret == -EEXIST) {
9646
9647
9648 if (WARN_ON(!new_inode)) {
9649 return ret;
9650 }
9651 } else {
9652
9653 return ret;
9654 }
9655 }
9656 ret = 0;
9657
9658
9659
9660
9661
9662 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9663 filemap_flush(old_inode->i_mapping);
9664
9665
9666 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9667 down_read(&fs_info->subvol_sem);
9668
9669
9670
9671
9672
9673
9674
9675
9676
9677
9678
9679 trans_num_items = 11;
9680 if (flags & RENAME_WHITEOUT)
9681 trans_num_items += 5;
9682 trans = btrfs_start_transaction(root, trans_num_items);
9683 if (IS_ERR(trans)) {
9684 ret = PTR_ERR(trans);
9685 goto out_notrans;
9686 }
9687
9688 if (dest != root) {
9689 ret = btrfs_record_root_in_trans(trans, dest);
9690 if (ret)
9691 goto out_fail;
9692 }
9693
9694 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9695 if (ret)
9696 goto out_fail;
9697
9698 BTRFS_I(old_inode)->dir_index = 0ULL;
9699 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9700
9701 btrfs_set_log_full_commit(trans);
9702 } else {
9703 ret = btrfs_insert_inode_ref(trans, dest,
9704 new_dentry->d_name.name,
9705 new_dentry->d_name.len,
9706 old_ino,
9707 btrfs_ino(BTRFS_I(new_dir)), index);
9708 if (ret)
9709 goto out_fail;
9710 }
9711
9712 inode_inc_iversion(old_dir);
9713 inode_inc_iversion(new_dir);
9714 inode_inc_iversion(old_inode);
9715 old_dir->i_ctime = old_dir->i_mtime =
9716 new_dir->i_ctime = new_dir->i_mtime =
9717 old_inode->i_ctime = current_time(old_dir);
9718
9719 if (old_dentry->d_parent != new_dentry->d_parent)
9720 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9721 BTRFS_I(old_inode), 1);
9722
9723 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9724 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9725 } else {
9726
9727
9728
9729
9730
9731
9732
9733
9734
9735
9736
9737
9738
9739
9740
9741
9742
9743 btrfs_pin_log_trans(root);
9744 log_pinned = true;
9745 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9746 BTRFS_I(d_inode(old_dentry)),
9747 old_dentry->d_name.name,
9748 old_dentry->d_name.len);
9749 if (!ret)
9750 ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
9751 }
9752 if (ret) {
9753 btrfs_abort_transaction(trans, ret);
9754 goto out_fail;
9755 }
9756
9757 if (new_inode) {
9758 inode_inc_iversion(new_inode);
9759 new_inode->i_ctime = current_time(new_inode);
9760 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9761 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9762 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9763 BUG_ON(new_inode->i_nlink == 0);
9764 } else {
9765 ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9766 BTRFS_I(d_inode(new_dentry)),
9767 new_dentry->d_name.name,
9768 new_dentry->d_name.len);
9769 }
9770 if (!ret && new_inode->i_nlink == 0)
9771 ret = btrfs_orphan_add(trans,
9772 BTRFS_I(d_inode(new_dentry)));
9773 if (ret) {
9774 btrfs_abort_transaction(trans, ret);
9775 goto out_fail;
9776 }
9777 }
9778
9779 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9780 new_dentry->d_name.name,
9781 new_dentry->d_name.len, 0, index);
9782 if (ret) {
9783 btrfs_abort_transaction(trans, ret);
9784 goto out_fail;
9785 }
9786
9787 if (old_inode->i_nlink == 1)
9788 BTRFS_I(old_inode)->dir_index = index;
9789
9790 if (log_pinned) {
9791 btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
9792 new_dentry->d_parent);
9793 btrfs_end_log_trans(root);
9794 log_pinned = false;
9795 }
9796
9797 if (flags & RENAME_WHITEOUT) {
9798 ret = btrfs_whiteout_for_rename(trans, root, mnt_userns,
9799 old_dir, old_dentry);
9800
9801 if (ret) {
9802 btrfs_abort_transaction(trans, ret);
9803 goto out_fail;
9804 }
9805 }
9806out_fail:
9807
9808
9809
9810
9811
9812
9813
9814
9815
9816
9817
9818 if (ret && log_pinned) {
9819 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9820 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9821 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9822 (new_inode &&
9823 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9824 btrfs_set_log_full_commit(trans);
9825
9826 btrfs_end_log_trans(root);
9827 log_pinned = false;
9828 }
9829 ret2 = btrfs_end_transaction(trans);
9830 ret = ret ? ret : ret2;
9831out_notrans:
9832 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9833 up_read(&fs_info->subvol_sem);
9834
9835 return ret;
9836}
9837
9838static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
9839 struct dentry *old_dentry, struct inode *new_dir,
9840 struct dentry *new_dentry, unsigned int flags)
9841{
9842 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
9843 return -EINVAL;
9844
9845 if (flags & RENAME_EXCHANGE)
9846 return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9847 new_dentry);
9848
9849 return btrfs_rename(mnt_userns, old_dir, old_dentry, new_dir,
9850 new_dentry, flags);
9851}
9852
9853struct btrfs_delalloc_work {
9854 struct inode *inode;
9855 struct completion completion;
9856 struct list_head list;
9857 struct btrfs_work work;
9858};
9859
9860static void btrfs_run_delalloc_work(struct btrfs_work *work)
9861{
9862 struct btrfs_delalloc_work *delalloc_work;
9863 struct inode *inode;
9864
9865 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9866 work);
9867 inode = delalloc_work->inode;
9868 filemap_flush(inode->i_mapping);
9869 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9870 &BTRFS_I(inode)->runtime_flags))
9871 filemap_flush(inode->i_mapping);
9872
9873 iput(inode);
9874 complete(&delalloc_work->completion);
9875}
9876
9877static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
9878{
9879 struct btrfs_delalloc_work *work;
9880
9881 work = kmalloc(sizeof(*work), GFP_NOFS);
9882 if (!work)
9883 return NULL;
9884
9885 init_completion(&work->completion);
9886 INIT_LIST_HEAD(&work->list);
9887 work->inode = inode;
9888 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
9889
9890 return work;
9891}
9892
9893
9894
9895
9896
9897static int start_delalloc_inodes(struct btrfs_root *root,
9898 struct writeback_control *wbc, bool snapshot,
9899 bool in_reclaim_context)
9900{
9901 struct btrfs_inode *binode;
9902 struct inode *inode;
9903 struct btrfs_delalloc_work *work, *next;
9904 struct list_head works;
9905 struct list_head splice;
9906 int ret = 0;
9907 bool full_flush = wbc->nr_to_write == LONG_MAX;
9908
9909 INIT_LIST_HEAD(&works);
9910 INIT_LIST_HEAD(&splice);
9911
9912 mutex_lock(&root->delalloc_mutex);
9913 spin_lock(&root->delalloc_lock);
9914 list_splice_init(&root->delalloc_inodes, &splice);
9915 while (!list_empty(&splice)) {
9916 binode = list_entry(splice.next, struct btrfs_inode,
9917 delalloc_inodes);
9918
9919 list_move_tail(&binode->delalloc_inodes,
9920 &root->delalloc_inodes);
9921
9922 if (in_reclaim_context &&
9923 test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
9924 continue;
9925
9926 inode = igrab(&binode->vfs_inode);
9927 if (!inode) {
9928 cond_resched_lock(&root->delalloc_lock);
9929 continue;
9930 }
9931 spin_unlock(&root->delalloc_lock);
9932
9933 if (snapshot)
9934 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
9935 &binode->runtime_flags);
9936 if (full_flush) {
9937 work = btrfs_alloc_delalloc_work(inode);
9938 if (!work) {
9939 iput(inode);
9940 ret = -ENOMEM;
9941 goto out;
9942 }
9943 list_add_tail(&work->list, &works);
9944 btrfs_queue_work(root->fs_info->flush_workers,
9945 &work->work);
9946 } else {
9947 ret = filemap_fdatawrite_wbc(inode->i_mapping, wbc);
9948 btrfs_add_delayed_iput(inode);
9949 if (ret || wbc->nr_to_write <= 0)
9950 goto out;
9951 }
9952 cond_resched();
9953 spin_lock(&root->delalloc_lock);
9954 }
9955 spin_unlock(&root->delalloc_lock);
9956
9957out:
9958 list_for_each_entry_safe(work, next, &works, list) {
9959 list_del_init(&work->list);
9960 wait_for_completion(&work->completion);
9961 kfree(work);
9962 }
9963
9964 if (!list_empty(&splice)) {
9965 spin_lock(&root->delalloc_lock);
9966 list_splice_tail(&splice, &root->delalloc_inodes);
9967 spin_unlock(&root->delalloc_lock);
9968 }
9969 mutex_unlock(&root->delalloc_mutex);
9970 return ret;
9971}
9972
9973int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
9974{
9975 struct writeback_control wbc = {
9976 .nr_to_write = LONG_MAX,
9977 .sync_mode = WB_SYNC_NONE,
9978 .range_start = 0,
9979 .range_end = LLONG_MAX,
9980 };
9981 struct btrfs_fs_info *fs_info = root->fs_info;
9982
9983 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
9984 return -EROFS;
9985
9986 return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
9987}
9988
9989int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
9990 bool in_reclaim_context)
9991{
9992 struct writeback_control wbc = {
9993 .nr_to_write = nr,
9994 .sync_mode = WB_SYNC_NONE,
9995 .range_start = 0,
9996 .range_end = LLONG_MAX,
9997 };
9998 struct btrfs_root *root;
9999 struct list_head splice;
10000 int ret;
10001
10002 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10003 return -EROFS;
10004
10005 INIT_LIST_HEAD(&splice);
10006
10007 mutex_lock(&fs_info->delalloc_root_mutex);
10008 spin_lock(&fs_info->delalloc_root_lock);
10009 list_splice_init(&fs_info->delalloc_roots, &splice);
10010 while (!list_empty(&splice)) {
10011
10012
10013
10014
10015 if (nr == LONG_MAX)
10016 wbc.nr_to_write = LONG_MAX;
10017
10018 root = list_first_entry(&splice, struct btrfs_root,
10019 delalloc_root);
10020 root = btrfs_grab_root(root);
10021 BUG_ON(!root);
10022 list_move_tail(&root->delalloc_root,
10023 &fs_info->delalloc_roots);
10024 spin_unlock(&fs_info->delalloc_root_lock);
10025
10026 ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
10027 btrfs_put_root(root);
10028 if (ret < 0 || wbc.nr_to_write <= 0)
10029 goto out;
10030 spin_lock(&fs_info->delalloc_root_lock);
10031 }
10032 spin_unlock(&fs_info->delalloc_root_lock);
10033
10034 ret = 0;
10035out:
10036 if (!list_empty(&splice)) {
10037 spin_lock(&fs_info->delalloc_root_lock);
10038 list_splice_tail(&splice, &fs_info->delalloc_roots);
10039 spin_unlock(&fs_info->delalloc_root_lock);
10040 }
10041 mutex_unlock(&fs_info->delalloc_root_mutex);
10042 return ret;
10043}
10044
10045static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
10046 struct dentry *dentry, const char *symname)
10047{
10048 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10049 struct btrfs_trans_handle *trans;
10050 struct btrfs_root *root = BTRFS_I(dir)->root;
10051 struct btrfs_path *path;
10052 struct btrfs_key key;
10053 struct inode *inode = NULL;
10054 int err;
10055 u64 objectid;
10056 u64 index = 0;
10057 int name_len;
10058 int datasize;
10059 unsigned long ptr;
10060 struct btrfs_file_extent_item *ei;
10061 struct extent_buffer *leaf;
10062
10063 name_len = strlen(symname);
10064 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
10065 return -ENAMETOOLONG;
10066
10067
10068
10069
10070
10071
10072
10073
10074 trans = btrfs_start_transaction(root, 7);
10075 if (IS_ERR(trans))
10076 return PTR_ERR(trans);
10077
10078 err = btrfs_get_free_objectid(root, &objectid);
10079 if (err)
10080 goto out_unlock;
10081
10082 inode = btrfs_new_inode(trans, root, mnt_userns, dir,
10083 dentry->d_name.name, dentry->d_name.len,
10084 btrfs_ino(BTRFS_I(dir)), objectid,
10085 S_IFLNK | S_IRWXUGO, &index);
10086 if (IS_ERR(inode)) {
10087 err = PTR_ERR(inode);
10088 inode = NULL;
10089 goto out_unlock;
10090 }
10091
10092
10093
10094
10095
10096
10097
10098 inode->i_fop = &btrfs_file_operations;
10099 inode->i_op = &btrfs_file_inode_operations;
10100 inode->i_mapping->a_ops = &btrfs_aops;
10101
10102 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
10103 if (err)
10104 goto out_unlock;
10105
10106 path = btrfs_alloc_path();
10107 if (!path) {
10108 err = -ENOMEM;
10109 goto out_unlock;
10110 }
10111 key.objectid = btrfs_ino(BTRFS_I(inode));
10112 key.offset = 0;
10113 key.type = BTRFS_EXTENT_DATA_KEY;
10114 datasize = btrfs_file_extent_calc_inline_size(name_len);
10115 err = btrfs_insert_empty_item(trans, root, path, &key,
10116 datasize);
10117 if (err) {
10118 btrfs_free_path(path);
10119 goto out_unlock;
10120 }
10121 leaf = path->nodes[0];
10122 ei = btrfs_item_ptr(leaf, path->slots[0],
10123 struct btrfs_file_extent_item);
10124 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
10125 btrfs_set_file_extent_type(leaf, ei,
10126 BTRFS_FILE_EXTENT_INLINE);
10127 btrfs_set_file_extent_encryption(leaf, ei, 0);
10128 btrfs_set_file_extent_compression(leaf, ei, 0);
10129 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
10130 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
10131
10132 ptr = btrfs_file_extent_inline_start(ei);
10133 write_extent_buffer(leaf, symname, ptr, name_len);
10134 btrfs_mark_buffer_dirty(leaf);
10135 btrfs_free_path(path);
10136
10137 inode->i_op = &btrfs_symlink_inode_operations;
10138 inode_nohighmem(inode);
10139 inode_set_bytes(inode, name_len);
10140 btrfs_i_size_write(BTRFS_I(inode), name_len);
10141 err = btrfs_update_inode(trans, root, BTRFS_I(inode));
10142
10143
10144
10145
10146
10147 if (!err)
10148 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
10149 BTRFS_I(inode), 0, index);
10150 if (err)
10151 goto out_unlock;
10152
10153 d_instantiate_new(dentry, inode);
10154
10155out_unlock:
10156 btrfs_end_transaction(trans);
10157 if (err && inode) {
10158 inode_dec_link_count(inode);
10159 discard_new_inode(inode);
10160 }
10161 btrfs_btree_balance_dirty(fs_info);
10162 return err;
10163}
10164
10165static struct btrfs_trans_handle *insert_prealloc_file_extent(
10166 struct btrfs_trans_handle *trans_in,
10167 struct btrfs_inode *inode,
10168 struct btrfs_key *ins,
10169 u64 file_offset)
10170{
10171 struct btrfs_file_extent_item stack_fi;
10172 struct btrfs_replace_extent_info extent_info;
10173 struct btrfs_trans_handle *trans = trans_in;
10174 struct btrfs_path *path;
10175 u64 start = ins->objectid;
10176 u64 len = ins->offset;
10177 int qgroup_released;
10178 int ret;
10179
10180 memset(&stack_fi, 0, sizeof(stack_fi));
10181
10182 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
10183 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
10184 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
10185 btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
10186 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
10187 btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
10188
10189
10190 qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
10191 if (qgroup_released < 0)
10192 return ERR_PTR(qgroup_released);
10193
10194 if (trans) {
10195 ret = insert_reserved_file_extent(trans, inode,
10196 file_offset, &stack_fi,
10197 true, qgroup_released);
10198 if (ret)
10199 goto free_qgroup;
10200 return trans;
10201 }
10202
10203 extent_info.disk_offset = start;
10204 extent_info.disk_len = len;
10205 extent_info.data_offset = 0;
10206 extent_info.data_len = len;
10207 extent_info.file_offset = file_offset;
10208 extent_info.extent_buf = (char *)&stack_fi;
10209 extent_info.is_new_extent = true;
10210 extent_info.qgroup_reserved = qgroup_released;
10211 extent_info.insertions = 0;
10212
10213 path = btrfs_alloc_path();
10214 if (!path) {
10215 ret = -ENOMEM;
10216 goto free_qgroup;
10217 }
10218
10219 ret = btrfs_replace_file_extents(inode, path, file_offset,
10220 file_offset + len - 1, &extent_info,
10221 &trans);
10222 btrfs_free_path(path);
10223 if (ret)
10224 goto free_qgroup;
10225 return trans;
10226
10227free_qgroup:
10228
10229
10230
10231
10232
10233
10234
10235 btrfs_qgroup_free_refroot(inode->root->fs_info,
10236 inode->root->root_key.objectid, qgroup_released,
10237 BTRFS_QGROUP_RSV_DATA);
10238 return ERR_PTR(ret);
10239}
10240
10241static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10242 u64 start, u64 num_bytes, u64 min_size,
10243 loff_t actual_len, u64 *alloc_hint,
10244 struct btrfs_trans_handle *trans)
10245{
10246 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
10247 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
10248 struct extent_map *em;
10249 struct btrfs_root *root = BTRFS_I(inode)->root;
10250 struct btrfs_key ins;
10251 u64 cur_offset = start;
10252 u64 clear_offset = start;
10253 u64 i_size;
10254 u64 cur_bytes;
10255 u64 last_alloc = (u64)-1;
10256 int ret = 0;
10257 bool own_trans = true;
10258 u64 end = start + num_bytes - 1;
10259
10260 if (trans)
10261 own_trans = false;
10262 while (num_bytes > 0) {
10263 cur_bytes = min_t(u64, num_bytes, SZ_256M);
10264 cur_bytes = max(cur_bytes, min_size);
10265
10266
10267
10268
10269
10270
10271 cur_bytes = min(cur_bytes, last_alloc);
10272 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10273 min_size, 0, *alloc_hint, &ins, 1, 0);
10274 if (ret)
10275 break;
10276
10277
10278
10279
10280
10281
10282
10283
10284 clear_offset += ins.offset;
10285
10286 last_alloc = ins.offset;
10287 trans = insert_prealloc_file_extent(trans, BTRFS_I(inode),
10288 &ins, cur_offset);
10289
10290
10291
10292
10293
10294
10295 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10296 if (IS_ERR(trans)) {
10297 ret = PTR_ERR(trans);
10298 btrfs_free_reserved_extent(fs_info, ins.objectid,
10299 ins.offset, 0);
10300 break;
10301 }
10302
10303 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10304 cur_offset + ins.offset -1, 0);
10305
10306 em = alloc_extent_map();
10307 if (!em) {
10308 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
10309 &BTRFS_I(inode)->runtime_flags);
10310 goto next;
10311 }
10312
10313 em->start = cur_offset;
10314 em->orig_start = cur_offset;
10315 em->len = ins.offset;
10316 em->block_start = ins.objectid;
10317 em->block_len = ins.offset;
10318 em->orig_block_len = ins.offset;
10319 em->ram_bytes = ins.offset;
10320 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
10321 em->generation = trans->transid;
10322
10323 while (1) {
10324 write_lock(&em_tree->lock);
10325 ret = add_extent_mapping(em_tree, em, 1);
10326 write_unlock(&em_tree->lock);
10327 if (ret != -EEXIST)
10328 break;
10329 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10330 cur_offset + ins.offset - 1,
10331 0);
10332 }
10333 free_extent_map(em);
10334next:
10335 num_bytes -= ins.offset;
10336 cur_offset += ins.offset;
10337 *alloc_hint = ins.objectid + ins.offset;
10338
10339 inode_inc_iversion(inode);
10340 inode->i_ctime = current_time(inode);
10341 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10342 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10343 (actual_len > inode->i_size) &&
10344 (cur_offset > inode->i_size)) {
10345 if (cur_offset > actual_len)
10346 i_size = actual_len;
10347 else
10348 i_size = cur_offset;
10349 i_size_write(inode, i_size);
10350 btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
10351 }
10352
10353 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
10354
10355 if (ret) {
10356 btrfs_abort_transaction(trans, ret);
10357 if (own_trans)
10358 btrfs_end_transaction(trans);
10359 break;
10360 }
10361
10362 if (own_trans) {
10363 btrfs_end_transaction(trans);
10364 trans = NULL;
10365 }
10366 }
10367 if (clear_offset < end)
10368 btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
10369 end - clear_offset + 1);
10370 return ret;
10371}
10372
10373int btrfs_prealloc_file_range(struct inode *inode, int mode,
10374 u64 start, u64 num_bytes, u64 min_size,
10375 loff_t actual_len, u64 *alloc_hint)
10376{
10377 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10378 min_size, actual_len, alloc_hint,
10379 NULL);
10380}
10381
10382int btrfs_prealloc_file_range_trans(struct inode *inode,
10383 struct btrfs_trans_handle *trans, int mode,
10384 u64 start, u64 num_bytes, u64 min_size,
10385 loff_t actual_len, u64 *alloc_hint)
10386{
10387 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10388 min_size, actual_len, alloc_hint, trans);
10389}
10390
10391static int btrfs_set_page_dirty(struct page *page)
10392{
10393 return __set_page_dirty_nobuffers(page);
10394}
10395
10396static int btrfs_permission(struct user_namespace *mnt_userns,
10397 struct inode *inode, int mask)
10398{
10399 struct btrfs_root *root = BTRFS_I(inode)->root;
10400 umode_t mode = inode->i_mode;
10401
10402 if (mask & MAY_WRITE &&
10403 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10404 if (btrfs_root_readonly(root))
10405 return -EROFS;
10406 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10407 return -EACCES;
10408 }
10409 return generic_permission(mnt_userns, inode, mask);
10410}
10411
10412static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
10413 struct dentry *dentry, umode_t mode)
10414{
10415 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10416 struct btrfs_trans_handle *trans;
10417 struct btrfs_root *root = BTRFS_I(dir)->root;
10418 struct inode *inode = NULL;
10419 u64 objectid;
10420 u64 index;
10421 int ret = 0;
10422
10423
10424
10425
10426 trans = btrfs_start_transaction(root, 5);
10427 if (IS_ERR(trans))
10428 return PTR_ERR(trans);
10429
10430 ret = btrfs_get_free_objectid(root, &objectid);
10431 if (ret)
10432 goto out;
10433
10434 inode = btrfs_new_inode(trans, root, mnt_userns, dir, NULL, 0,
10435 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
10436 if (IS_ERR(inode)) {
10437 ret = PTR_ERR(inode);
10438 inode = NULL;
10439 goto out;
10440 }
10441
10442 inode->i_fop = &btrfs_file_operations;
10443 inode->i_op = &btrfs_file_inode_operations;
10444
10445 inode->i_mapping->a_ops = &btrfs_aops;
10446
10447 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
10448 if (ret)
10449 goto out;
10450
10451 ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
10452 if (ret)
10453 goto out;
10454 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10455 if (ret)
10456 goto out;
10457
10458
10459
10460
10461
10462
10463
10464
10465 set_nlink(inode, 1);
10466 d_tmpfile(dentry, inode);
10467 unlock_new_inode(inode);
10468 mark_inode_dirty(inode);
10469out:
10470 btrfs_end_transaction(trans);
10471 if (ret && inode)
10472 discard_new_inode(inode);
10473 btrfs_btree_balance_dirty(fs_info);
10474 return ret;
10475}
10476
10477void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
10478{
10479 struct btrfs_fs_info *fs_info = inode->root->fs_info;
10480 unsigned long index = start >> PAGE_SHIFT;
10481 unsigned long end_index = end >> PAGE_SHIFT;
10482 struct page *page;
10483 u32 len;
10484
10485 ASSERT(end + 1 - start <= U32_MAX);
10486 len = end + 1 - start;
10487 while (index <= end_index) {
10488 page = find_get_page(inode->vfs_inode.i_mapping, index);
10489 ASSERT(page);
10490
10491 btrfs_page_set_writeback(fs_info, page, start, len);
10492 put_page(page);
10493 index++;
10494 }
10495}
10496
10497#ifdef CONFIG_SWAP
10498
10499
10500
10501
10502
10503static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
10504 bool is_block_group)
10505{
10506 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10507 struct btrfs_swapfile_pin *sp, *entry;
10508 struct rb_node **p;
10509 struct rb_node *parent = NULL;
10510
10511 sp = kmalloc(sizeof(*sp), GFP_NOFS);
10512 if (!sp)
10513 return -ENOMEM;
10514 sp->ptr = ptr;
10515 sp->inode = inode;
10516 sp->is_block_group = is_block_group;
10517 sp->bg_extent_count = 1;
10518
10519 spin_lock(&fs_info->swapfile_pins_lock);
10520 p = &fs_info->swapfile_pins.rb_node;
10521 while (*p) {
10522 parent = *p;
10523 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
10524 if (sp->ptr < entry->ptr ||
10525 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
10526 p = &(*p)->rb_left;
10527 } else if (sp->ptr > entry->ptr ||
10528 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
10529 p = &(*p)->rb_right;
10530 } else {
10531 if (is_block_group)
10532 entry->bg_extent_count++;
10533 spin_unlock(&fs_info->swapfile_pins_lock);
10534 kfree(sp);
10535 return 1;
10536 }
10537 }
10538 rb_link_node(&sp->node, parent, p);
10539 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
10540 spin_unlock(&fs_info->swapfile_pins_lock);
10541 return 0;
10542}
10543
10544
10545static void btrfs_free_swapfile_pins(struct inode *inode)
10546{
10547 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10548 struct btrfs_swapfile_pin *sp;
10549 struct rb_node *node, *next;
10550
10551 spin_lock(&fs_info->swapfile_pins_lock);
10552 node = rb_first(&fs_info->swapfile_pins);
10553 while (node) {
10554 next = rb_next(node);
10555 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
10556 if (sp->inode == inode) {
10557 rb_erase(&sp->node, &fs_info->swapfile_pins);
10558 if (sp->is_block_group) {
10559 btrfs_dec_block_group_swap_extents(sp->ptr,
10560 sp->bg_extent_count);
10561 btrfs_put_block_group(sp->ptr);
10562 }
10563 kfree(sp);
10564 }
10565 node = next;
10566 }
10567 spin_unlock(&fs_info->swapfile_pins_lock);
10568}
10569
10570struct btrfs_swap_info {
10571 u64 start;
10572 u64 block_start;
10573 u64 block_len;
10574 u64 lowest_ppage;
10575 u64 highest_ppage;
10576 unsigned long nr_pages;
10577 int nr_extents;
10578};
10579
10580static int btrfs_add_swap_extent(struct swap_info_struct *sis,
10581 struct btrfs_swap_info *bsi)
10582{
10583 unsigned long nr_pages;
10584 u64 first_ppage, first_ppage_reported, next_ppage;
10585 int ret;
10586
10587 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
10588 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
10589 PAGE_SIZE) >> PAGE_SHIFT;
10590
10591 if (first_ppage >= next_ppage)
10592 return 0;
10593 nr_pages = next_ppage - first_ppage;
10594
10595 first_ppage_reported = first_ppage;
10596 if (bsi->start == 0)
10597 first_ppage_reported++;
10598 if (bsi->lowest_ppage > first_ppage_reported)
10599 bsi->lowest_ppage = first_ppage_reported;
10600 if (bsi->highest_ppage < (next_ppage - 1))
10601 bsi->highest_ppage = next_ppage - 1;
10602
10603 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
10604 if (ret < 0)
10605 return ret;
10606 bsi->nr_extents += ret;
10607 bsi->nr_pages += nr_pages;
10608 return 0;
10609}
10610
10611static void btrfs_swap_deactivate(struct file *file)
10612{
10613 struct inode *inode = file_inode(file);
10614
10615 btrfs_free_swapfile_pins(inode);
10616 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
10617}
10618
10619static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10620 sector_t *span)
10621{
10622 struct inode *inode = file_inode(file);
10623 struct btrfs_root *root = BTRFS_I(inode)->root;
10624 struct btrfs_fs_info *fs_info = root->fs_info;
10625 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
10626 struct extent_state *cached_state = NULL;
10627 struct extent_map *em = NULL;
10628 struct btrfs_device *device = NULL;
10629 struct btrfs_swap_info bsi = {
10630 .lowest_ppage = (sector_t)-1ULL,
10631 };
10632 int ret = 0;
10633 u64 isize;
10634 u64 start;
10635
10636
10637
10638
10639
10640
10641 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
10642 if (ret)
10643 return ret;
10644
10645
10646
10647
10648 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
10649 btrfs_warn(fs_info, "swapfile must not be compressed");
10650 return -EINVAL;
10651 }
10652 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
10653 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
10654 return -EINVAL;
10655 }
10656 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
10657 btrfs_warn(fs_info, "swapfile must not be checksummed");
10658 return -EINVAL;
10659 }
10660
10661
10662
10663
10664
10665
10666
10667
10668
10669
10670 if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
10671 btrfs_warn(fs_info,
10672 "cannot activate swapfile while exclusive operation is running");
10673 return -EBUSY;
10674 }
10675
10676
10677
10678
10679
10680
10681
10682
10683 if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
10684 btrfs_exclop_finish(fs_info);
10685 btrfs_warn(fs_info,
10686 "cannot activate swapfile because snapshot creation is in progress");
10687 return -EINVAL;
10688 }
10689
10690
10691
10692
10693
10694
10695 atomic_inc(&root->nr_swapfiles);
10696
10697 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
10698
10699 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
10700 start = 0;
10701 while (start < isize) {
10702 u64 logical_block_start, physical_block_start;
10703 struct btrfs_block_group *bg;
10704 u64 len = isize - start;
10705
10706 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
10707 if (IS_ERR(em)) {
10708 ret = PTR_ERR(em);
10709 goto out;
10710 }
10711
10712 if (em->block_start == EXTENT_MAP_HOLE) {
10713 btrfs_warn(fs_info, "swapfile must not have holes");
10714 ret = -EINVAL;
10715 goto out;
10716 }
10717 if (em->block_start == EXTENT_MAP_INLINE) {
10718
10719
10720
10721
10722
10723
10724
10725 btrfs_warn(fs_info, "swapfile must not be inline");
10726 ret = -EINVAL;
10727 goto out;
10728 }
10729 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10730 btrfs_warn(fs_info, "swapfile must not be compressed");
10731 ret = -EINVAL;
10732 goto out;
10733 }
10734
10735 logical_block_start = em->block_start + (start - em->start);
10736 len = min(len, em->len - (start - em->start));
10737 free_extent_map(em);
10738 em = NULL;
10739
10740 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
10741 if (ret < 0) {
10742 goto out;
10743 } else if (ret) {
10744 ret = 0;
10745 } else {
10746 btrfs_warn(fs_info,
10747 "swapfile must not be copy-on-write");
10748 ret = -EINVAL;
10749 goto out;
10750 }
10751
10752 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
10753 if (IS_ERR(em)) {
10754 ret = PTR_ERR(em);
10755 goto out;
10756 }
10757
10758 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
10759 btrfs_warn(fs_info,
10760 "swapfile must have single data profile");
10761 ret = -EINVAL;
10762 goto out;
10763 }
10764
10765 if (device == NULL) {
10766 device = em->map_lookup->stripes[0].dev;
10767 ret = btrfs_add_swapfile_pin(inode, device, false);
10768 if (ret == 1)
10769 ret = 0;
10770 else if (ret)
10771 goto out;
10772 } else if (device != em->map_lookup->stripes[0].dev) {
10773 btrfs_warn(fs_info, "swapfile must be on one device");
10774 ret = -EINVAL;
10775 goto out;
10776 }
10777
10778 physical_block_start = (em->map_lookup->stripes[0].physical +
10779 (logical_block_start - em->start));
10780 len = min(len, em->len - (logical_block_start - em->start));
10781 free_extent_map(em);
10782 em = NULL;
10783
10784 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
10785 if (!bg) {
10786 btrfs_warn(fs_info,
10787 "could not find block group containing swapfile");
10788 ret = -EINVAL;
10789 goto out;
10790 }
10791
10792 if (!btrfs_inc_block_group_swap_extents(bg)) {
10793 btrfs_warn(fs_info,
10794 "block group for swapfile at %llu is read-only%s",
10795 bg->start,
10796 atomic_read(&fs_info->scrubs_running) ?
10797 " (scrub running)" : "");
10798 btrfs_put_block_group(bg);
10799 ret = -EINVAL;
10800 goto out;
10801 }
10802
10803 ret = btrfs_add_swapfile_pin(inode, bg, true);
10804 if (ret) {
10805 btrfs_put_block_group(bg);
10806 if (ret == 1)
10807 ret = 0;
10808 else
10809 goto out;
10810 }
10811
10812 if (bsi.block_len &&
10813 bsi.block_start + bsi.block_len == physical_block_start) {
10814 bsi.block_len += len;
10815 } else {
10816 if (bsi.block_len) {
10817 ret = btrfs_add_swap_extent(sis, &bsi);
10818 if (ret)
10819 goto out;
10820 }
10821 bsi.start = start;
10822 bsi.block_start = physical_block_start;
10823 bsi.block_len = len;
10824 }
10825
10826 start += len;
10827 }
10828
10829 if (bsi.block_len)
10830 ret = btrfs_add_swap_extent(sis, &bsi);
10831
10832out:
10833 if (!IS_ERR_OR_NULL(em))
10834 free_extent_map(em);
10835
10836 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
10837
10838 if (ret)
10839 btrfs_swap_deactivate(file);
10840
10841 btrfs_drew_write_unlock(&root->snapshot_lock);
10842
10843 btrfs_exclop_finish(fs_info);
10844
10845 if (ret)
10846 return ret;
10847
10848 if (device)
10849 sis->bdev = device->bdev;
10850 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
10851 sis->max = bsi.nr_pages;
10852 sis->pages = bsi.nr_pages - 1;
10853 sis->highest_bit = bsi.nr_pages - 1;
10854 return bsi.nr_extents;
10855}
10856#else
10857static void btrfs_swap_deactivate(struct file *file)
10858{
10859}
10860
10861static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10862 sector_t *span)
10863{
10864 return -EOPNOTSUPP;
10865}
10866#endif
10867
10868
10869
10870
10871
10872
10873
10874void btrfs_update_inode_bytes(struct btrfs_inode *inode,
10875 const u64 add_bytes,
10876 const u64 del_bytes)
10877{
10878 if (add_bytes == del_bytes)
10879 return;
10880
10881 spin_lock(&inode->lock);
10882 if (del_bytes > 0)
10883 inode_sub_bytes(&inode->vfs_inode, del_bytes);
10884 if (add_bytes > 0)
10885 inode_add_bytes(&inode->vfs_inode, add_bytes);
10886 spin_unlock(&inode->lock);
10887}
10888
10889static const struct inode_operations btrfs_dir_inode_operations = {
10890 .getattr = btrfs_getattr,
10891 .lookup = btrfs_lookup,
10892 .create = btrfs_create,
10893 .unlink = btrfs_unlink,
10894 .link = btrfs_link,
10895 .mkdir = btrfs_mkdir,
10896 .rmdir = btrfs_rmdir,
10897 .rename = btrfs_rename2,
10898 .symlink = btrfs_symlink,
10899 .setattr = btrfs_setattr,
10900 .mknod = btrfs_mknod,
10901 .listxattr = btrfs_listxattr,
10902 .permission = btrfs_permission,
10903 .get_acl = btrfs_get_acl,
10904 .set_acl = btrfs_set_acl,
10905 .update_time = btrfs_update_time,
10906 .tmpfile = btrfs_tmpfile,
10907 .fileattr_get = btrfs_fileattr_get,
10908 .fileattr_set = btrfs_fileattr_set,
10909};
10910
10911static const struct file_operations btrfs_dir_file_operations = {
10912 .llseek = generic_file_llseek,
10913 .read = generic_read_dir,
10914 .iterate_shared = btrfs_real_readdir,
10915 .open = btrfs_opendir,
10916 .unlocked_ioctl = btrfs_ioctl,
10917#ifdef CONFIG_COMPAT
10918 .compat_ioctl = btrfs_compat_ioctl,
10919#endif
10920 .release = btrfs_release_file,
10921 .fsync = btrfs_sync_file,
10922};
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936static const struct address_space_operations btrfs_aops = {
10937 .readpage = btrfs_readpage,
10938 .writepage = btrfs_writepage,
10939 .writepages = btrfs_writepages,
10940 .readahead = btrfs_readahead,
10941 .direct_IO = noop_direct_IO,
10942 .invalidatepage = btrfs_invalidatepage,
10943 .releasepage = btrfs_releasepage,
10944#ifdef CONFIG_MIGRATION
10945 .migratepage = btrfs_migratepage,
10946#endif
10947 .set_page_dirty = btrfs_set_page_dirty,
10948 .error_remove_page = generic_error_remove_page,
10949 .swap_activate = btrfs_swap_activate,
10950 .swap_deactivate = btrfs_swap_deactivate,
10951};
10952
10953static const struct inode_operations btrfs_file_inode_operations = {
10954 .getattr = btrfs_getattr,
10955 .setattr = btrfs_setattr,
10956 .listxattr = btrfs_listxattr,
10957 .permission = btrfs_permission,
10958 .fiemap = btrfs_fiemap,
10959 .get_acl = btrfs_get_acl,
10960 .set_acl = btrfs_set_acl,
10961 .update_time = btrfs_update_time,
10962 .fileattr_get = btrfs_fileattr_get,
10963 .fileattr_set = btrfs_fileattr_set,
10964};
10965static const struct inode_operations btrfs_special_inode_operations = {
10966 .getattr = btrfs_getattr,
10967 .setattr = btrfs_setattr,
10968 .permission = btrfs_permission,
10969 .listxattr = btrfs_listxattr,
10970 .get_acl = btrfs_get_acl,
10971 .set_acl = btrfs_set_acl,
10972 .update_time = btrfs_update_time,
10973};
10974static const struct inode_operations btrfs_symlink_inode_operations = {
10975 .get_link = page_get_link,
10976 .getattr = btrfs_getattr,
10977 .setattr = btrfs_setattr,
10978 .permission = btrfs_permission,
10979 .listxattr = btrfs_listxattr,
10980 .update_time = btrfs_update_time,
10981};
10982
10983const struct dentry_operations btrfs_dentry_operations = {
10984 .d_delete = btrfs_dentry_delete,
10985};
10986