1
2
3
4
5
6#include <crypto/hash.h>
7#include <linux/kernel.h>
8#include <linux/bio.h>
9#include <linux/buffer_head.h>
10#include <linux/file.h>
11#include <linux/fs.h>
12#include <linux/pagemap.h>
13#include <linux/highmem.h>
14#include <linux/time.h>
15#include <linux/init.h>
16#include <linux/string.h>
17#include <linux/backing-dev.h>
18#include <linux/writeback.h>
19#include <linux/compat.h>
20#include <linux/xattr.h>
21#include <linux/posix_acl.h>
22#include <linux/falloc.h>
23#include <linux/slab.h>
24#include <linux/ratelimit.h>
25#include <linux/btrfs.h>
26#include <linux/blkdev.h>
27#include <linux/posix_acl_xattr.h>
28#include <linux/uio.h>
29#include <linux/magic.h>
30#include <linux/iversion.h>
31#include <linux/swap.h>
32#include <linux/migrate.h>
33#include <linux/sched/mm.h>
34#include <asm/unaligned.h>
35#include "misc.h"
36#include "ctree.h"
37#include "disk-io.h"
38#include "transaction.h"
39#include "btrfs_inode.h"
40#include "print-tree.h"
41#include "ordered-data.h"
42#include "xattr.h"
43#include "tree-log.h"
44#include "volumes.h"
45#include "compression.h"
46#include "locking.h"
47#include "free-space-cache.h"
48#include "inode-map.h"
49#include "props.h"
50#include "qgroup.h"
51#include "delalloc-space.h"
52#include "block-group.h"
53#include "space-info.h"
54
55struct btrfs_iget_args {
56 u64 ino;
57 struct btrfs_root *root;
58};
59
60struct btrfs_dio_data {
61 u64 reserve;
62 u64 unsubmitted_oe_range_start;
63 u64 unsubmitted_oe_range_end;
64 int overwrite;
65};
66
67static const struct inode_operations btrfs_dir_inode_operations;
68static const struct inode_operations btrfs_symlink_inode_operations;
69static const struct inode_operations btrfs_special_inode_operations;
70static const struct inode_operations btrfs_file_inode_operations;
71static const struct address_space_operations btrfs_aops;
72static const struct file_operations btrfs_dir_file_operations;
73static const struct extent_io_ops btrfs_extent_io_ops;
74
75static struct kmem_cache *btrfs_inode_cachep;
76struct kmem_cache *btrfs_trans_handle_cachep;
77struct kmem_cache *btrfs_path_cachep;
78struct kmem_cache *btrfs_free_space_cachep;
79struct kmem_cache *btrfs_free_space_bitmap_cachep;
80
81static int btrfs_setsize(struct inode *inode, struct iattr *attr);
82static int btrfs_truncate(struct inode *inode, bool skip_writeback);
83static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
84static noinline int cow_file_range(struct btrfs_inode *inode,
85 struct page *locked_page,
86 u64 start, u64 end, int *page_started,
87 unsigned long *nr_written, int unlock);
88static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
89 u64 len, u64 orig_start, u64 block_start,
90 u64 block_len, u64 orig_block_len,
91 u64 ram_bytes, int compress_type,
92 int type);
93
94static void __endio_write_update_ordered(struct btrfs_inode *inode,
95 const u64 offset, const u64 bytes,
96 const bool uptodate);
97
98
99
100
101
102
103
104
105
106
107
108static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
109 struct page *locked_page,
110 u64 offset, u64 bytes)
111{
112 unsigned long index = offset >> PAGE_SHIFT;
113 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
114 u64 page_start = page_offset(locked_page);
115 u64 page_end = page_start + PAGE_SIZE - 1;
116
117 struct page *page;
118
119 while (index <= end_index) {
120 page = find_get_page(inode->vfs_inode.i_mapping, index);
121 index++;
122 if (!page)
123 continue;
124 ClearPagePrivate2(page);
125 put_page(page);
126 }
127
128
129
130
131
132
133 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
134 offset += PAGE_SIZE;
135 bytes -= PAGE_SIZE;
136 }
137
138 return __endio_write_update_ordered(inode, offset, bytes, false);
139}
140
141static int btrfs_dirty_inode(struct inode *inode);
142
143#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
144void btrfs_test_inode_set_ops(struct inode *inode)
145{
146 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
147}
148#endif
149
150static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
151 struct inode *inode, struct inode *dir,
152 const struct qstr *qstr)
153{
154 int err;
155
156 err = btrfs_init_acl(trans, inode, dir);
157 if (!err)
158 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
159 return err;
160}
161
162
163
164
165
166
167static int insert_inline_extent(struct btrfs_trans_handle *trans,
168 struct btrfs_path *path, int extent_inserted,
169 struct btrfs_root *root, struct inode *inode,
170 u64 start, size_t size, size_t compressed_size,
171 int compress_type,
172 struct page **compressed_pages)
173{
174 struct extent_buffer *leaf;
175 struct page *page = NULL;
176 char *kaddr;
177 unsigned long ptr;
178 struct btrfs_file_extent_item *ei;
179 int ret;
180 size_t cur_size = size;
181 unsigned long offset;
182
183 ASSERT((compressed_size > 0 && compressed_pages) ||
184 (compressed_size == 0 && !compressed_pages));
185
186 if (compressed_size && compressed_pages)
187 cur_size = compressed_size;
188
189 inode_add_bytes(inode, size);
190
191 if (!extent_inserted) {
192 struct btrfs_key key;
193 size_t datasize;
194
195 key.objectid = btrfs_ino(BTRFS_I(inode));
196 key.offset = start;
197 key.type = BTRFS_EXTENT_DATA_KEY;
198
199 datasize = btrfs_file_extent_calc_inline_size(cur_size);
200 path->leave_spinning = 1;
201 ret = btrfs_insert_empty_item(trans, root, path, &key,
202 datasize);
203 if (ret)
204 goto fail;
205 }
206 leaf = path->nodes[0];
207 ei = btrfs_item_ptr(leaf, path->slots[0],
208 struct btrfs_file_extent_item);
209 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
210 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
211 btrfs_set_file_extent_encryption(leaf, ei, 0);
212 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
213 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
214 ptr = btrfs_file_extent_inline_start(ei);
215
216 if (compress_type != BTRFS_COMPRESS_NONE) {
217 struct page *cpage;
218 int i = 0;
219 while (compressed_size > 0) {
220 cpage = compressed_pages[i];
221 cur_size = min_t(unsigned long, compressed_size,
222 PAGE_SIZE);
223
224 kaddr = kmap_atomic(cpage);
225 write_extent_buffer(leaf, kaddr, ptr, cur_size);
226 kunmap_atomic(kaddr);
227
228 i++;
229 ptr += cur_size;
230 compressed_size -= cur_size;
231 }
232 btrfs_set_file_extent_compression(leaf, ei,
233 compress_type);
234 } else {
235 page = find_get_page(inode->i_mapping,
236 start >> PAGE_SHIFT);
237 btrfs_set_file_extent_compression(leaf, ei, 0);
238 kaddr = kmap_atomic(page);
239 offset = offset_in_page(start);
240 write_extent_buffer(leaf, kaddr + offset, ptr, size);
241 kunmap_atomic(kaddr);
242 put_page(page);
243 }
244 btrfs_mark_buffer_dirty(leaf);
245 btrfs_release_path(path);
246
247
248
249
250
251 size = ALIGN(size, root->fs_info->sectorsize);
252 ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
253 if (ret)
254 goto fail;
255
256
257
258
259
260
261
262
263
264
265 BTRFS_I(inode)->disk_i_size = inode->i_size;
266 ret = btrfs_update_inode(trans, root, inode);
267
268fail:
269 return ret;
270}
271
272
273
274
275
276
277
278static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
279 u64 end, size_t compressed_size,
280 int compress_type,
281 struct page **compressed_pages)
282{
283 struct btrfs_root *root = inode->root;
284 struct btrfs_fs_info *fs_info = root->fs_info;
285 struct btrfs_trans_handle *trans;
286 u64 isize = i_size_read(&inode->vfs_inode);
287 u64 actual_end = min(end + 1, isize);
288 u64 inline_len = actual_end - start;
289 u64 aligned_end = ALIGN(end, fs_info->sectorsize);
290 u64 data_len = inline_len;
291 int ret;
292 struct btrfs_path *path;
293 int extent_inserted = 0;
294 u32 extent_item_size;
295
296 if (compressed_size)
297 data_len = compressed_size;
298
299 if (start > 0 ||
300 actual_end > fs_info->sectorsize ||
301 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
302 (!compressed_size &&
303 (actual_end & (fs_info->sectorsize - 1)) == 0) ||
304 end + 1 < isize ||
305 data_len > fs_info->max_inline) {
306 return 1;
307 }
308
309 path = btrfs_alloc_path();
310 if (!path)
311 return -ENOMEM;
312
313 trans = btrfs_join_transaction(root);
314 if (IS_ERR(trans)) {
315 btrfs_free_path(path);
316 return PTR_ERR(trans);
317 }
318 trans->block_rsv = &inode->block_rsv;
319
320 if (compressed_size && compressed_pages)
321 extent_item_size = btrfs_file_extent_calc_inline_size(
322 compressed_size);
323 else
324 extent_item_size = btrfs_file_extent_calc_inline_size(
325 inline_len);
326
327 ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
328 NULL, 1, 1, extent_item_size,
329 &extent_inserted);
330 if (ret) {
331 btrfs_abort_transaction(trans, ret);
332 goto out;
333 }
334
335 if (isize > actual_end)
336 inline_len = min_t(u64, isize, actual_end);
337 ret = insert_inline_extent(trans, path, extent_inserted,
338 root, &inode->vfs_inode, start,
339 inline_len, compressed_size,
340 compress_type, compressed_pages);
341 if (ret && ret != -ENOSPC) {
342 btrfs_abort_transaction(trans, ret);
343 goto out;
344 } else if (ret == -ENOSPC) {
345 ret = 1;
346 goto out;
347 }
348
349 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
350 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
351out:
352
353
354
355
356
357
358 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
359 btrfs_free_path(path);
360 btrfs_end_transaction(trans);
361 return ret;
362}
363
364struct async_extent {
365 u64 start;
366 u64 ram_size;
367 u64 compressed_size;
368 struct page **pages;
369 unsigned long nr_pages;
370 int compress_type;
371 struct list_head list;
372};
373
374struct async_chunk {
375 struct inode *inode;
376 struct page *locked_page;
377 u64 start;
378 u64 end;
379 unsigned int write_flags;
380 struct list_head extents;
381 struct cgroup_subsys_state *blkcg_css;
382 struct btrfs_work work;
383 atomic_t *pending;
384};
385
386struct async_cow {
387
388 atomic_t num_chunks;
389 struct async_chunk chunks[];
390};
391
392static noinline int add_async_extent(struct async_chunk *cow,
393 u64 start, u64 ram_size,
394 u64 compressed_size,
395 struct page **pages,
396 unsigned long nr_pages,
397 int compress_type)
398{
399 struct async_extent *async_extent;
400
401 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
402 BUG_ON(!async_extent);
403 async_extent->start = start;
404 async_extent->ram_size = ram_size;
405 async_extent->compressed_size = compressed_size;
406 async_extent->pages = pages;
407 async_extent->nr_pages = nr_pages;
408 async_extent->compress_type = compress_type;
409 list_add_tail(&async_extent->list, &cow->extents);
410 return 0;
411}
412
413
414
415
416static inline bool inode_can_compress(struct btrfs_inode *inode)
417{
418 if (inode->flags & BTRFS_INODE_NODATACOW ||
419 inode->flags & BTRFS_INODE_NODATASUM)
420 return false;
421 return true;
422}
423
424
425
426
427
428static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
429 u64 end)
430{
431 struct btrfs_fs_info *fs_info = inode->root->fs_info;
432
433 if (!inode_can_compress(inode)) {
434 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
435 KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
436 btrfs_ino(inode));
437 return 0;
438 }
439
440 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
441 return 1;
442
443 if (inode->defrag_compress)
444 return 1;
445
446 if (inode->flags & BTRFS_INODE_NOCOMPRESS)
447 return 0;
448 if (btrfs_test_opt(fs_info, COMPRESS) ||
449 inode->flags & BTRFS_INODE_COMPRESS ||
450 inode->prop_compress)
451 return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
452 return 0;
453}
454
455static inline void inode_should_defrag(struct btrfs_inode *inode,
456 u64 start, u64 end, u64 num_bytes, u64 small_write)
457{
458
459 if (num_bytes < small_write &&
460 (start > 0 || end + 1 < inode->disk_i_size))
461 btrfs_add_inode_defrag(NULL, inode);
462}
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481static noinline int compress_file_range(struct async_chunk *async_chunk)
482{
483 struct inode *inode = async_chunk->inode;
484 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
485 u64 blocksize = fs_info->sectorsize;
486 u64 start = async_chunk->start;
487 u64 end = async_chunk->end;
488 u64 actual_end;
489 u64 i_size;
490 int ret = 0;
491 struct page **pages = NULL;
492 unsigned long nr_pages;
493 unsigned long total_compressed = 0;
494 unsigned long total_in = 0;
495 int i;
496 int will_compress;
497 int compress_type = fs_info->compress_type;
498 int compressed_extents = 0;
499 int redirty = 0;
500
501 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
502 SZ_16K);
503
504
505
506
507
508
509
510
511
512
513 barrier();
514 i_size = i_size_read(inode);
515 barrier();
516 actual_end = min_t(u64, i_size, end + 1);
517again:
518 will_compress = 0;
519 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
520 BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
521 nr_pages = min_t(unsigned long, nr_pages,
522 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
523
524
525
526
527
528
529
530
531
532
533
534 if (actual_end <= start)
535 goto cleanup_and_bail_uncompressed;
536
537 total_compressed = actual_end - start;
538
539
540
541
542
543 if (total_compressed <= blocksize &&
544 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
545 goto cleanup_and_bail_uncompressed;
546
547 total_compressed = min_t(unsigned long, total_compressed,
548 BTRFS_MAX_UNCOMPRESSED);
549 total_in = 0;
550 ret = 0;
551
552
553
554
555
556
557 if (inode_need_compress(BTRFS_I(inode), start, end)) {
558 WARN_ON(pages);
559 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
560 if (!pages) {
561
562 nr_pages = 0;
563 goto cont;
564 }
565
566 if (BTRFS_I(inode)->defrag_compress)
567 compress_type = BTRFS_I(inode)->defrag_compress;
568 else if (BTRFS_I(inode)->prop_compress)
569 compress_type = BTRFS_I(inode)->prop_compress;
570
571
572
573
574
575
576
577
578
579
580
581
582
583 if (!redirty) {
584 extent_range_clear_dirty_for_io(inode, start, end);
585 redirty = 1;
586 }
587
588
589 ret = btrfs_compress_pages(
590 compress_type | (fs_info->compress_level << 4),
591 inode->i_mapping, start,
592 pages,
593 &nr_pages,
594 &total_in,
595 &total_compressed);
596
597 if (!ret) {
598 unsigned long offset = offset_in_page(total_compressed);
599 struct page *page = pages[nr_pages - 1];
600 char *kaddr;
601
602
603
604
605 if (offset) {
606 kaddr = kmap_atomic(page);
607 memset(kaddr + offset, 0,
608 PAGE_SIZE - offset);
609 kunmap_atomic(kaddr);
610 }
611 will_compress = 1;
612 }
613 }
614cont:
615 if (start == 0) {
616
617 if (ret || total_in < actual_end) {
618
619
620
621 ret = cow_file_range_inline(BTRFS_I(inode), start, end,
622 0, BTRFS_COMPRESS_NONE,
623 NULL);
624 } else {
625
626 ret = cow_file_range_inline(BTRFS_I(inode), start, end,
627 total_compressed,
628 compress_type, pages);
629 }
630 if (ret <= 0) {
631 unsigned long clear_flags = EXTENT_DELALLOC |
632 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
633 EXTENT_DO_ACCOUNTING;
634 unsigned long page_error_op;
635
636 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
637
638
639
640
641
642
643
644
645
646
647
648 extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
649 NULL,
650 clear_flags,
651 PAGE_UNLOCK |
652 PAGE_CLEAR_DIRTY |
653 PAGE_SET_WRITEBACK |
654 page_error_op |
655 PAGE_END_WRITEBACK);
656
657
658
659
660
661
662 if (pages) {
663 for (i = 0; i < nr_pages; i++) {
664 WARN_ON(pages[i]->mapping);
665 put_page(pages[i]);
666 }
667 kfree(pages);
668 }
669 return 0;
670 }
671 }
672
673 if (will_compress) {
674
675
676
677
678
679 total_compressed = ALIGN(total_compressed, blocksize);
680
681
682
683
684
685
686 total_in = ALIGN(total_in, PAGE_SIZE);
687 if (total_compressed + blocksize <= total_in) {
688 compressed_extents++;
689
690
691
692
693
694
695 add_async_extent(async_chunk, start, total_in,
696 total_compressed, pages, nr_pages,
697 compress_type);
698
699 if (start + total_in < end) {
700 start += total_in;
701 pages = NULL;
702 cond_resched();
703 goto again;
704 }
705 return compressed_extents;
706 }
707 }
708 if (pages) {
709
710
711
712
713 for (i = 0; i < nr_pages; i++) {
714 WARN_ON(pages[i]->mapping);
715 put_page(pages[i]);
716 }
717 kfree(pages);
718 pages = NULL;
719 total_compressed = 0;
720 nr_pages = 0;
721
722
723 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
724 !(BTRFS_I(inode)->prop_compress)) {
725 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
726 }
727 }
728cleanup_and_bail_uncompressed:
729
730
731
732
733
734
735 if (async_chunk->locked_page &&
736 (page_offset(async_chunk->locked_page) >= start &&
737 page_offset(async_chunk->locked_page)) <= end) {
738 __set_page_dirty_nobuffers(async_chunk->locked_page);
739
740 }
741
742 if (redirty)
743 extent_range_redirty_for_io(inode, start, end);
744 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
745 BTRFS_COMPRESS_NONE);
746 compressed_extents++;
747
748 return compressed_extents;
749}
750
751static void free_async_extent_pages(struct async_extent *async_extent)
752{
753 int i;
754
755 if (!async_extent->pages)
756 return;
757
758 for (i = 0; i < async_extent->nr_pages; i++) {
759 WARN_ON(async_extent->pages[i]->mapping);
760 put_page(async_extent->pages[i]);
761 }
762 kfree(async_extent->pages);
763 async_extent->nr_pages = 0;
764 async_extent->pages = NULL;
765}
766
767
768
769
770
771
772
773static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
774{
775 struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
776 struct btrfs_fs_info *fs_info = inode->root->fs_info;
777 struct async_extent *async_extent;
778 u64 alloc_hint = 0;
779 struct btrfs_key ins;
780 struct extent_map *em;
781 struct btrfs_root *root = inode->root;
782 struct extent_io_tree *io_tree = &inode->io_tree;
783 int ret = 0;
784
785again:
786 while (!list_empty(&async_chunk->extents)) {
787 async_extent = list_entry(async_chunk->extents.next,
788 struct async_extent, list);
789 list_del(&async_extent->list);
790
791retry:
792 lock_extent(io_tree, async_extent->start,
793 async_extent->start + async_extent->ram_size - 1);
794
795 if (!async_extent->pages) {
796 int page_started = 0;
797 unsigned long nr_written = 0;
798
799
800 ret = cow_file_range(inode, async_chunk->locked_page,
801 async_extent->start,
802 async_extent->start +
803 async_extent->ram_size - 1,
804 &page_started, &nr_written, 0);
805
806
807
808
809
810
811
812
813
814 if (!page_started && !ret)
815 extent_write_locked_range(&inode->vfs_inode,
816 async_extent->start,
817 async_extent->start +
818 async_extent->ram_size - 1,
819 WB_SYNC_ALL);
820 else if (ret && async_chunk->locked_page)
821 unlock_page(async_chunk->locked_page);
822 kfree(async_extent);
823 cond_resched();
824 continue;
825 }
826
827 ret = btrfs_reserve_extent(root, async_extent->ram_size,
828 async_extent->compressed_size,
829 async_extent->compressed_size,
830 0, alloc_hint, &ins, 1, 1);
831 if (ret) {
832 free_async_extent_pages(async_extent);
833
834 if (ret == -ENOSPC) {
835 unlock_extent(io_tree, async_extent->start,
836 async_extent->start +
837 async_extent->ram_size - 1);
838
839
840
841
842
843
844
845 extent_range_redirty_for_io(&inode->vfs_inode,
846 async_extent->start,
847 async_extent->start +
848 async_extent->ram_size - 1);
849
850 goto retry;
851 }
852 goto out_free;
853 }
854
855
856
857
858 em = create_io_em(inode, async_extent->start,
859 async_extent->ram_size,
860 async_extent->start,
861 ins.objectid,
862 ins.offset,
863 ins.offset,
864 async_extent->ram_size,
865 async_extent->compress_type,
866 BTRFS_ORDERED_COMPRESSED);
867 if (IS_ERR(em))
868
869 goto out_free_reserve;
870 free_extent_map(em);
871
872 ret = btrfs_add_ordered_extent_compress(inode,
873 async_extent->start,
874 ins.objectid,
875 async_extent->ram_size,
876 ins.offset,
877 BTRFS_ORDERED_COMPRESSED,
878 async_extent->compress_type);
879 if (ret) {
880 btrfs_drop_extent_cache(inode, async_extent->start,
881 async_extent->start +
882 async_extent->ram_size - 1, 0);
883 goto out_free_reserve;
884 }
885 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
886
887
888
889
890 extent_clear_unlock_delalloc(inode, async_extent->start,
891 async_extent->start +
892 async_extent->ram_size - 1,
893 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
894 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
895 PAGE_SET_WRITEBACK);
896 if (btrfs_submit_compressed_write(inode, async_extent->start,
897 async_extent->ram_size,
898 ins.objectid,
899 ins.offset, async_extent->pages,
900 async_extent->nr_pages,
901 async_chunk->write_flags,
902 async_chunk->blkcg_css)) {
903 struct page *p = async_extent->pages[0];
904 const u64 start = async_extent->start;
905 const u64 end = start + async_extent->ram_size - 1;
906
907 p->mapping = inode->vfs_inode.i_mapping;
908 btrfs_writepage_endio_finish_ordered(p, start, end, 0);
909
910 p->mapping = NULL;
911 extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
912 PAGE_END_WRITEBACK |
913 PAGE_SET_ERROR);
914 free_async_extent_pages(async_extent);
915 }
916 alloc_hint = ins.objectid + ins.offset;
917 kfree(async_extent);
918 cond_resched();
919 }
920 return;
921out_free_reserve:
922 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
923 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
924out_free:
925 extent_clear_unlock_delalloc(inode, async_extent->start,
926 async_extent->start +
927 async_extent->ram_size - 1,
928 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
929 EXTENT_DELALLOC_NEW |
930 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
931 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
932 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
933 PAGE_SET_ERROR);
934 free_async_extent_pages(async_extent);
935 kfree(async_extent);
936 goto again;
937}
938
939static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
940 u64 num_bytes)
941{
942 struct extent_map_tree *em_tree = &inode->extent_tree;
943 struct extent_map *em;
944 u64 alloc_hint = 0;
945
946 read_lock(&em_tree->lock);
947 em = search_extent_mapping(em_tree, start, num_bytes);
948 if (em) {
949
950
951
952
953
954 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
955 free_extent_map(em);
956 em = search_extent_mapping(em_tree, 0, 0);
957 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
958 alloc_hint = em->block_start;
959 if (em)
960 free_extent_map(em);
961 } else {
962 alloc_hint = em->block_start;
963 free_extent_map(em);
964 }
965 }
966 read_unlock(&em_tree->lock);
967
968 return alloc_hint;
969}
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984static noinline int cow_file_range(struct btrfs_inode *inode,
985 struct page *locked_page,
986 u64 start, u64 end, int *page_started,
987 unsigned long *nr_written, int unlock)
988{
989 struct btrfs_root *root = inode->root;
990 struct btrfs_fs_info *fs_info = root->fs_info;
991 u64 alloc_hint = 0;
992 u64 num_bytes;
993 unsigned long ram_size;
994 u64 cur_alloc_size = 0;
995 u64 min_alloc_size;
996 u64 blocksize = fs_info->sectorsize;
997 struct btrfs_key ins;
998 struct extent_map *em;
999 unsigned clear_bits;
1000 unsigned long page_ops;
1001 bool extent_reserved = false;
1002 int ret = 0;
1003
1004 if (btrfs_is_free_space_inode(inode)) {
1005 WARN_ON_ONCE(1);
1006 ret = -EINVAL;
1007 goto out_unlock;
1008 }
1009
1010 num_bytes = ALIGN(end - start + 1, blocksize);
1011 num_bytes = max(blocksize, num_bytes);
1012 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
1013
1014 inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
1015
1016 if (start == 0) {
1017
1018 ret = cow_file_range_inline(inode, start, end, 0,
1019 BTRFS_COMPRESS_NONE, NULL);
1020 if (ret == 0) {
1021
1022
1023
1024
1025
1026
1027 extent_clear_unlock_delalloc(inode, start, end, NULL,
1028 EXTENT_LOCKED | EXTENT_DELALLOC |
1029 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1030 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1031 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1032 PAGE_END_WRITEBACK);
1033 *nr_written = *nr_written +
1034 (end - start + PAGE_SIZE) / PAGE_SIZE;
1035 *page_started = 1;
1036 goto out;
1037 } else if (ret < 0) {
1038 goto out_unlock;
1039 }
1040 }
1041
1042 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
1043 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1057 min_alloc_size = num_bytes;
1058 else
1059 min_alloc_size = fs_info->sectorsize;
1060
1061 while (num_bytes > 0) {
1062 cur_alloc_size = num_bytes;
1063 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
1064 min_alloc_size, 0, alloc_hint,
1065 &ins, 1, 1);
1066 if (ret < 0)
1067 goto out_unlock;
1068 cur_alloc_size = ins.offset;
1069 extent_reserved = true;
1070
1071 ram_size = ins.offset;
1072 em = create_io_em(inode, start, ins.offset,
1073 start,
1074 ins.objectid,
1075 ins.offset,
1076 ins.offset,
1077 ram_size,
1078 BTRFS_COMPRESS_NONE,
1079 BTRFS_ORDERED_REGULAR );
1080 if (IS_ERR(em)) {
1081 ret = PTR_ERR(em);
1082 goto out_reserve;
1083 }
1084 free_extent_map(em);
1085
1086 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1087 ram_size, cur_alloc_size, 0);
1088 if (ret)
1089 goto out_drop_extent_cache;
1090
1091 if (root->root_key.objectid ==
1092 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1093 ret = btrfs_reloc_clone_csums(inode, start,
1094 cur_alloc_size);
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106 if (ret)
1107 btrfs_drop_extent_cache(inode, start,
1108 start + ram_size - 1, 0);
1109 }
1110
1111 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1112
1113
1114
1115
1116
1117
1118
1119
1120 page_ops = unlock ? PAGE_UNLOCK : 0;
1121 page_ops |= PAGE_SET_PRIVATE2;
1122
1123 extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
1124 locked_page,
1125 EXTENT_LOCKED | EXTENT_DELALLOC,
1126 page_ops);
1127 if (num_bytes < cur_alloc_size)
1128 num_bytes = 0;
1129 else
1130 num_bytes -= cur_alloc_size;
1131 alloc_hint = ins.objectid + ins.offset;
1132 start += cur_alloc_size;
1133 extent_reserved = false;
1134
1135
1136
1137
1138
1139
1140 if (ret)
1141 goto out_unlock;
1142 }
1143out:
1144 return ret;
1145
1146out_drop_extent_cache:
1147 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1148out_reserve:
1149 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1150 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1151out_unlock:
1152 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1153 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1154 page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1155 PAGE_END_WRITEBACK;
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166 if (extent_reserved) {
1167 extent_clear_unlock_delalloc(inode, start,
1168 start + cur_alloc_size - 1,
1169 locked_page,
1170 clear_bits,
1171 page_ops);
1172 start += cur_alloc_size;
1173 if (start >= end)
1174 goto out;
1175 }
1176 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1177 clear_bits | EXTENT_CLEAR_DATA_RESV,
1178 page_ops);
1179 goto out;
1180}
1181
1182
1183
1184
1185static noinline void async_cow_start(struct btrfs_work *work)
1186{
1187 struct async_chunk *async_chunk;
1188 int compressed_extents;
1189
1190 async_chunk = container_of(work, struct async_chunk, work);
1191
1192 compressed_extents = compress_file_range(async_chunk);
1193 if (compressed_extents == 0) {
1194 btrfs_add_delayed_iput(async_chunk->inode);
1195 async_chunk->inode = NULL;
1196 }
1197}
1198
1199
1200
1201
1202static noinline void async_cow_submit(struct btrfs_work *work)
1203{
1204 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1205 work);
1206 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1207 unsigned long nr_pages;
1208
1209 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1210 PAGE_SHIFT;
1211
1212
1213 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1214 5 * SZ_1M)
1215 cond_wake_up_nomb(&fs_info->async_submit_wait);
1216
1217
1218
1219
1220
1221
1222
1223 if (async_chunk->inode)
1224 submit_compressed_extents(async_chunk);
1225}
1226
1227static noinline void async_cow_free(struct btrfs_work *work)
1228{
1229 struct async_chunk *async_chunk;
1230
1231 async_chunk = container_of(work, struct async_chunk, work);
1232 if (async_chunk->inode)
1233 btrfs_add_delayed_iput(async_chunk->inode);
1234 if (async_chunk->blkcg_css)
1235 css_put(async_chunk->blkcg_css);
1236
1237
1238
1239
1240 if (atomic_dec_and_test(async_chunk->pending))
1241 kvfree(async_chunk->pending);
1242}
1243
1244static int cow_file_range_async(struct btrfs_inode *inode,
1245 struct writeback_control *wbc,
1246 struct page *locked_page,
1247 u64 start, u64 end, int *page_started,
1248 unsigned long *nr_written)
1249{
1250 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1251 struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
1252 struct async_cow *ctx;
1253 struct async_chunk *async_chunk;
1254 unsigned long nr_pages;
1255 u64 cur_end;
1256 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1257 int i;
1258 bool should_compress;
1259 unsigned nofs_flag;
1260 const unsigned int write_flags = wbc_to_write_flags(wbc);
1261
1262 unlock_extent(&inode->io_tree, start, end);
1263
1264 if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
1265 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1266 num_chunks = 1;
1267 should_compress = false;
1268 } else {
1269 should_compress = true;
1270 }
1271
1272 nofs_flag = memalloc_nofs_save();
1273 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1274 memalloc_nofs_restore(nofs_flag);
1275
1276 if (!ctx) {
1277 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1278 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1279 EXTENT_DO_ACCOUNTING;
1280 unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1281 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
1282 PAGE_SET_ERROR;
1283
1284 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1285 clear_bits, page_ops);
1286 return -ENOMEM;
1287 }
1288
1289 async_chunk = ctx->chunks;
1290 atomic_set(&ctx->num_chunks, num_chunks);
1291
1292 for (i = 0; i < num_chunks; i++) {
1293 if (should_compress)
1294 cur_end = min(end, start + SZ_512K - 1);
1295 else
1296 cur_end = end;
1297
1298
1299
1300
1301
1302 ihold(&inode->vfs_inode);
1303 async_chunk[i].pending = &ctx->num_chunks;
1304 async_chunk[i].inode = &inode->vfs_inode;
1305 async_chunk[i].start = start;
1306 async_chunk[i].end = cur_end;
1307 async_chunk[i].write_flags = write_flags;
1308 INIT_LIST_HEAD(&async_chunk[i].extents);
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319 if (locked_page) {
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329 wbc_account_cgroup_owner(wbc, locked_page,
1330 cur_end - start);
1331 async_chunk[i].locked_page = locked_page;
1332 locked_page = NULL;
1333 } else {
1334 async_chunk[i].locked_page = NULL;
1335 }
1336
1337 if (blkcg_css != blkcg_root_css) {
1338 css_get(blkcg_css);
1339 async_chunk[i].blkcg_css = blkcg_css;
1340 } else {
1341 async_chunk[i].blkcg_css = NULL;
1342 }
1343
1344 btrfs_init_work(&async_chunk[i].work, async_cow_start,
1345 async_cow_submit, async_cow_free);
1346
1347 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1348 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1349
1350 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1351
1352 *nr_written += nr_pages;
1353 start = cur_end + 1;
1354 }
1355 *page_started = 1;
1356 return 0;
1357}
1358
1359static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1360 u64 bytenr, u64 num_bytes)
1361{
1362 int ret;
1363 struct btrfs_ordered_sum *sums;
1364 LIST_HEAD(list);
1365
1366 ret = btrfs_lookup_csums_range(fs_info->csum_root, bytenr,
1367 bytenr + num_bytes - 1, &list, 0);
1368 if (ret == 0 && list_empty(&list))
1369 return 0;
1370
1371 while (!list_empty(&list)) {
1372 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1373 list_del(&sums->list);
1374 kfree(sums);
1375 }
1376 if (ret < 0)
1377 return ret;
1378 return 1;
1379}
1380
1381static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
1382 const u64 start, const u64 end,
1383 int *page_started, unsigned long *nr_written)
1384{
1385 const bool is_space_ino = btrfs_is_free_space_inode(inode);
1386 const bool is_reloc_ino = (inode->root->root_key.objectid ==
1387 BTRFS_DATA_RELOC_TREE_OBJECTID);
1388 const u64 range_bytes = end + 1 - start;
1389 struct extent_io_tree *io_tree = &inode->io_tree;
1390 u64 range_start = start;
1391 u64 count;
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425 count = count_range_bits(io_tree, &range_start, end, range_bytes,
1426 EXTENT_NORESERVE, 0);
1427 if (count > 0 || is_space_ino || is_reloc_ino) {
1428 u64 bytes = count;
1429 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1430 struct btrfs_space_info *sinfo = fs_info->data_sinfo;
1431
1432 if (is_space_ino || is_reloc_ino)
1433 bytes = range_bytes;
1434
1435 spin_lock(&sinfo->lock);
1436 btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
1437 spin_unlock(&sinfo->lock);
1438
1439 if (count > 0)
1440 clear_extent_bit(io_tree, start, end, EXTENT_NORESERVE,
1441 0, 0, NULL);
1442 }
1443
1444 return cow_file_range(inode, locked_page, start, end, page_started,
1445 nr_written, 1);
1446}
1447
1448
1449
1450
1451
1452
1453
1454
1455static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
1456 struct page *locked_page,
1457 const u64 start, const u64 end,
1458 int *page_started, int force,
1459 unsigned long *nr_written)
1460{
1461 struct btrfs_fs_info *fs_info = inode->root->fs_info;
1462 struct btrfs_root *root = inode->root;
1463 struct btrfs_path *path;
1464 u64 cow_start = (u64)-1;
1465 u64 cur_offset = start;
1466 int ret;
1467 bool check_prev = true;
1468 const bool freespace_inode = btrfs_is_free_space_inode(inode);
1469 u64 ino = btrfs_ino(inode);
1470 bool nocow = false;
1471 u64 disk_bytenr = 0;
1472
1473 path = btrfs_alloc_path();
1474 if (!path) {
1475 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1476 EXTENT_LOCKED | EXTENT_DELALLOC |
1477 EXTENT_DO_ACCOUNTING |
1478 EXTENT_DEFRAG, PAGE_UNLOCK |
1479 PAGE_CLEAR_DIRTY |
1480 PAGE_SET_WRITEBACK |
1481 PAGE_END_WRITEBACK);
1482 return -ENOMEM;
1483 }
1484
1485 while (1) {
1486 struct btrfs_key found_key;
1487 struct btrfs_file_extent_item *fi;
1488 struct extent_buffer *leaf;
1489 u64 extent_end;
1490 u64 extent_offset;
1491 u64 num_bytes = 0;
1492 u64 disk_num_bytes;
1493 u64 ram_bytes;
1494 int extent_type;
1495
1496 nocow = false;
1497
1498 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1499 cur_offset, 0);
1500 if (ret < 0)
1501 goto error;
1502
1503
1504
1505
1506
1507
1508 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1509 leaf = path->nodes[0];
1510 btrfs_item_key_to_cpu(leaf, &found_key,
1511 path->slots[0] - 1);
1512 if (found_key.objectid == ino &&
1513 found_key.type == BTRFS_EXTENT_DATA_KEY)
1514 path->slots[0]--;
1515 }
1516 check_prev = false;
1517next_slot:
1518
1519 leaf = path->nodes[0];
1520 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1521 ret = btrfs_next_leaf(root, path);
1522 if (ret < 0) {
1523 if (cow_start != (u64)-1)
1524 cur_offset = cow_start;
1525 goto error;
1526 }
1527 if (ret > 0)
1528 break;
1529 leaf = path->nodes[0];
1530 }
1531
1532 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1533
1534
1535 if (found_key.objectid > ino)
1536 break;
1537
1538
1539
1540
1541 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1542 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1543 path->slots[0]++;
1544 goto next_slot;
1545 }
1546
1547
1548 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1549 found_key.offset > end)
1550 break;
1551
1552
1553
1554
1555
1556 if (found_key.offset > cur_offset) {
1557 extent_end = found_key.offset;
1558 extent_type = 0;
1559 goto out_check;
1560 }
1561
1562
1563
1564
1565
1566 fi = btrfs_item_ptr(leaf, path->slots[0],
1567 struct btrfs_file_extent_item);
1568 extent_type = btrfs_file_extent_type(leaf, fi);
1569
1570 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1571 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1572 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1573 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1574 extent_offset = btrfs_file_extent_offset(leaf, fi);
1575 extent_end = found_key.offset +
1576 btrfs_file_extent_num_bytes(leaf, fi);
1577 disk_num_bytes =
1578 btrfs_file_extent_disk_num_bytes(leaf, fi);
1579
1580
1581
1582
1583 if (extent_end <= cur_offset) {
1584 path->slots[0]++;
1585 goto next_slot;
1586 }
1587
1588 if (disk_bytenr == 0)
1589 goto out_check;
1590
1591 if (btrfs_file_extent_compression(leaf, fi) ||
1592 btrfs_file_extent_encryption(leaf, fi) ||
1593 btrfs_file_extent_other_encoding(leaf, fi))
1594 goto out_check;
1595
1596
1597
1598
1599
1600
1601
1602 if (!freespace_inode &&
1603 btrfs_file_extent_generation(leaf, fi) <=
1604 btrfs_root_last_snapshot(&root->root_item))
1605 goto out_check;
1606 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1607 goto out_check;
1608
1609 if (btrfs_extent_readonly(fs_info, disk_bytenr))
1610 goto out_check;
1611 ret = btrfs_cross_ref_exist(root, ino,
1612 found_key.offset -
1613 extent_offset, disk_bytenr, false);
1614 if (ret) {
1615
1616
1617
1618
1619 if (ret < 0) {
1620 if (cow_start != (u64)-1)
1621 cur_offset = cow_start;
1622 goto error;
1623 }
1624
1625 WARN_ON_ONCE(freespace_inode);
1626 goto out_check;
1627 }
1628 disk_bytenr += extent_offset;
1629 disk_bytenr += cur_offset - found_key.offset;
1630 num_bytes = min(end + 1, extent_end) - cur_offset;
1631
1632
1633
1634
1635 if (!freespace_inode && atomic_read(&root->snapshot_force_cow))
1636 goto out_check;
1637
1638
1639
1640
1641
1642 ret = csum_exist_in_range(fs_info, disk_bytenr,
1643 num_bytes);
1644 if (ret) {
1645
1646
1647
1648
1649 if (ret < 0) {
1650 if (cow_start != (u64)-1)
1651 cur_offset = cow_start;
1652 goto error;
1653 }
1654 WARN_ON_ONCE(freespace_inode);
1655 goto out_check;
1656 }
1657 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
1658 goto out_check;
1659 nocow = true;
1660 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1661 extent_end = found_key.offset + ram_bytes;
1662 extent_end = ALIGN(extent_end, fs_info->sectorsize);
1663
1664 if (extent_end <= start) {
1665 path->slots[0]++;
1666 goto next_slot;
1667 }
1668 } else {
1669
1670 BUG();
1671 }
1672out_check:
1673
1674
1675
1676
1677 if (!nocow) {
1678 if (cow_start == (u64)-1)
1679 cow_start = cur_offset;
1680 cur_offset = extent_end;
1681 if (cur_offset > end)
1682 break;
1683 path->slots[0]++;
1684 goto next_slot;
1685 }
1686
1687 btrfs_release_path(path);
1688
1689
1690
1691
1692
1693
1694 if (cow_start != (u64)-1) {
1695 ret = fallback_to_cow(inode, locked_page,
1696 cow_start, found_key.offset - 1,
1697 page_started, nr_written);
1698 if (ret)
1699 goto error;
1700 cow_start = (u64)-1;
1701 }
1702
1703 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1704 u64 orig_start = found_key.offset - extent_offset;
1705 struct extent_map *em;
1706
1707 em = create_io_em(inode, cur_offset, num_bytes,
1708 orig_start,
1709 disk_bytenr,
1710 num_bytes,
1711 disk_num_bytes,
1712 ram_bytes, BTRFS_COMPRESS_NONE,
1713 BTRFS_ORDERED_PREALLOC);
1714 if (IS_ERR(em)) {
1715 ret = PTR_ERR(em);
1716 goto error;
1717 }
1718 free_extent_map(em);
1719 ret = btrfs_add_ordered_extent(inode, cur_offset,
1720 disk_bytenr, num_bytes,
1721 num_bytes,
1722 BTRFS_ORDERED_PREALLOC);
1723 if (ret) {
1724 btrfs_drop_extent_cache(inode, cur_offset,
1725 cur_offset + num_bytes - 1,
1726 0);
1727 goto error;
1728 }
1729 } else {
1730 ret = btrfs_add_ordered_extent(inode, cur_offset,
1731 disk_bytenr, num_bytes,
1732 num_bytes,
1733 BTRFS_ORDERED_NOCOW);
1734 if (ret)
1735 goto error;
1736 }
1737
1738 if (nocow)
1739 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1740 nocow = false;
1741
1742 if (root->root_key.objectid ==
1743 BTRFS_DATA_RELOC_TREE_OBJECTID)
1744
1745
1746
1747
1748
1749 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1750 num_bytes);
1751
1752 extent_clear_unlock_delalloc(inode, cur_offset,
1753 cur_offset + num_bytes - 1,
1754 locked_page, EXTENT_LOCKED |
1755 EXTENT_DELALLOC |
1756 EXTENT_CLEAR_DATA_RESV,
1757 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1758
1759 cur_offset = extent_end;
1760
1761
1762
1763
1764
1765
1766 if (ret)
1767 goto error;
1768 if (cur_offset > end)
1769 break;
1770 }
1771 btrfs_release_path(path);
1772
1773 if (cur_offset <= end && cow_start == (u64)-1)
1774 cow_start = cur_offset;
1775
1776 if (cow_start != (u64)-1) {
1777 cur_offset = end;
1778 ret = fallback_to_cow(inode, locked_page, cow_start, end,
1779 page_started, nr_written);
1780 if (ret)
1781 goto error;
1782 }
1783
1784error:
1785 if (nocow)
1786 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1787
1788 if (ret && cur_offset < end)
1789 extent_clear_unlock_delalloc(inode, cur_offset, end,
1790 locked_page, EXTENT_LOCKED |
1791 EXTENT_DELALLOC | EXTENT_DEFRAG |
1792 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1793 PAGE_CLEAR_DIRTY |
1794 PAGE_SET_WRITEBACK |
1795 PAGE_END_WRITEBACK);
1796 btrfs_free_path(path);
1797 return ret;
1798}
1799
1800static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
1801{
1802
1803 if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
1804 !(inode->flags & BTRFS_INODE_PREALLOC))
1805 return 0;
1806
1807
1808
1809
1810
1811
1812 if (inode->defrag_bytes &&
1813 test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
1814 return 1;
1815
1816 return 0;
1817}
1818
1819
1820
1821
1822
1823int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
1824 u64 start, u64 end, int *page_started, unsigned long *nr_written,
1825 struct writeback_control *wbc)
1826{
1827 int ret;
1828 int force_cow = need_force_cow(inode, start, end);
1829
1830 if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1831 ret = run_delalloc_nocow(inode, locked_page, start, end,
1832 page_started, 1, nr_written);
1833 } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1834 ret = run_delalloc_nocow(inode, locked_page, start, end,
1835 page_started, 0, nr_written);
1836 } else if (!inode_can_compress(inode) ||
1837 !inode_need_compress(inode, start, end)) {
1838 ret = cow_file_range(inode, locked_page, start, end,
1839 page_started, nr_written, 1);
1840 } else {
1841 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
1842 ret = cow_file_range_async(inode, wbc, locked_page, start, end,
1843 page_started, nr_written);
1844 }
1845 if (ret)
1846 btrfs_cleanup_ordered_extents(inode, locked_page, start,
1847 end - start + 1);
1848 return ret;
1849}
1850
1851void btrfs_split_delalloc_extent(struct inode *inode,
1852 struct extent_state *orig, u64 split)
1853{
1854 u64 size;
1855
1856
1857 if (!(orig->state & EXTENT_DELALLOC))
1858 return;
1859
1860 size = orig->end - orig->start + 1;
1861 if (size > BTRFS_MAX_EXTENT_SIZE) {
1862 u32 num_extents;
1863 u64 new_size;
1864
1865
1866
1867
1868
1869 new_size = orig->end - split + 1;
1870 num_extents = count_max_extents(new_size);
1871 new_size = split - orig->start;
1872 num_extents += count_max_extents(new_size);
1873 if (count_max_extents(size) >= num_extents)
1874 return;
1875 }
1876
1877 spin_lock(&BTRFS_I(inode)->lock);
1878 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
1879 spin_unlock(&BTRFS_I(inode)->lock);
1880}
1881
1882
1883
1884
1885
1886
1887void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
1888 struct extent_state *other)
1889{
1890 u64 new_size, old_size;
1891 u32 num_extents;
1892
1893
1894 if (!(other->state & EXTENT_DELALLOC))
1895 return;
1896
1897 if (new->start > other->start)
1898 new_size = new->end - other->start + 1;
1899 else
1900 new_size = other->end - new->start + 1;
1901
1902
1903 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1904 spin_lock(&BTRFS_I(inode)->lock);
1905 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1906 spin_unlock(&BTRFS_I(inode)->lock);
1907 return;
1908 }
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 old_size = other->end - other->start + 1;
1929 num_extents = count_max_extents(old_size);
1930 old_size = new->end - new->start + 1;
1931 num_extents += count_max_extents(old_size);
1932 if (count_max_extents(new_size) >= num_extents)
1933 return;
1934
1935 spin_lock(&BTRFS_I(inode)->lock);
1936 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1937 spin_unlock(&BTRFS_I(inode)->lock);
1938}
1939
1940static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1941 struct inode *inode)
1942{
1943 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1944
1945 spin_lock(&root->delalloc_lock);
1946 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1947 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1948 &root->delalloc_inodes);
1949 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1950 &BTRFS_I(inode)->runtime_flags);
1951 root->nr_delalloc_inodes++;
1952 if (root->nr_delalloc_inodes == 1) {
1953 spin_lock(&fs_info->delalloc_root_lock);
1954 BUG_ON(!list_empty(&root->delalloc_root));
1955 list_add_tail(&root->delalloc_root,
1956 &fs_info->delalloc_roots);
1957 spin_unlock(&fs_info->delalloc_root_lock);
1958 }
1959 }
1960 spin_unlock(&root->delalloc_lock);
1961}
1962
1963
1964void __btrfs_del_delalloc_inode(struct btrfs_root *root,
1965 struct btrfs_inode *inode)
1966{
1967 struct btrfs_fs_info *fs_info = root->fs_info;
1968
1969 if (!list_empty(&inode->delalloc_inodes)) {
1970 list_del_init(&inode->delalloc_inodes);
1971 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1972 &inode->runtime_flags);
1973 root->nr_delalloc_inodes--;
1974 if (!root->nr_delalloc_inodes) {
1975 ASSERT(list_empty(&root->delalloc_inodes));
1976 spin_lock(&fs_info->delalloc_root_lock);
1977 BUG_ON(list_empty(&root->delalloc_root));
1978 list_del_init(&root->delalloc_root);
1979 spin_unlock(&fs_info->delalloc_root_lock);
1980 }
1981 }
1982}
1983
1984static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1985 struct btrfs_inode *inode)
1986{
1987 spin_lock(&root->delalloc_lock);
1988 __btrfs_del_delalloc_inode(root, inode);
1989 spin_unlock(&root->delalloc_lock);
1990}
1991
1992
1993
1994
1995
1996void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
1997 unsigned *bits)
1998{
1999 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2000
2001 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
2002 WARN_ON(1);
2003
2004
2005
2006
2007
2008 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
2009 struct btrfs_root *root = BTRFS_I(inode)->root;
2010 u64 len = state->end + 1 - state->start;
2011 u32 num_extents = count_max_extents(len);
2012 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
2013
2014 spin_lock(&BTRFS_I(inode)->lock);
2015 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
2016 spin_unlock(&BTRFS_I(inode)->lock);
2017
2018
2019 if (btrfs_is_testing(fs_info))
2020 return;
2021
2022 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
2023 fs_info->delalloc_batch);
2024 spin_lock(&BTRFS_I(inode)->lock);
2025 BTRFS_I(inode)->delalloc_bytes += len;
2026 if (*bits & EXTENT_DEFRAG)
2027 BTRFS_I(inode)->defrag_bytes += len;
2028 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2029 &BTRFS_I(inode)->runtime_flags))
2030 btrfs_add_delalloc_inodes(root, inode);
2031 spin_unlock(&BTRFS_I(inode)->lock);
2032 }
2033
2034 if (!(state->state & EXTENT_DELALLOC_NEW) &&
2035 (*bits & EXTENT_DELALLOC_NEW)) {
2036 spin_lock(&BTRFS_I(inode)->lock);
2037 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
2038 state->start;
2039 spin_unlock(&BTRFS_I(inode)->lock);
2040 }
2041}
2042
2043
2044
2045
2046
2047void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
2048 struct extent_state *state, unsigned *bits)
2049{
2050 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
2051 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
2052 u64 len = state->end + 1 - state->start;
2053 u32 num_extents = count_max_extents(len);
2054
2055 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
2056 spin_lock(&inode->lock);
2057 inode->defrag_bytes -= len;
2058 spin_unlock(&inode->lock);
2059 }
2060
2061
2062
2063
2064
2065
2066 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
2067 struct btrfs_root *root = inode->root;
2068 bool do_list = !btrfs_is_free_space_inode(inode);
2069
2070 spin_lock(&inode->lock);
2071 btrfs_mod_outstanding_extents(inode, -num_extents);
2072 spin_unlock(&inode->lock);
2073
2074
2075
2076
2077
2078
2079 if (*bits & EXTENT_CLEAR_META_RESV &&
2080 root != fs_info->tree_root)
2081 btrfs_delalloc_release_metadata(inode, len, false);
2082
2083
2084 if (btrfs_is_testing(fs_info))
2085 return;
2086
2087 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2088 do_list && !(state->state & EXTENT_NORESERVE) &&
2089 (*bits & EXTENT_CLEAR_DATA_RESV))
2090 btrfs_free_reserved_data_space_noquota(fs_info, len);
2091
2092 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
2093 fs_info->delalloc_batch);
2094 spin_lock(&inode->lock);
2095 inode->delalloc_bytes -= len;
2096 if (do_list && inode->delalloc_bytes == 0 &&
2097 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
2098 &inode->runtime_flags))
2099 btrfs_del_delalloc_inode(root, inode);
2100 spin_unlock(&inode->lock);
2101 }
2102
2103 if ((state->state & EXTENT_DELALLOC_NEW) &&
2104 (*bits & EXTENT_DELALLOC_NEW)) {
2105 spin_lock(&inode->lock);
2106 ASSERT(inode->new_delalloc_bytes >= len);
2107 inode->new_delalloc_bytes -= len;
2108 spin_unlock(&inode->lock);
2109 }
2110}
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
2127 unsigned long bio_flags)
2128{
2129 struct inode *inode = page->mapping->host;
2130 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2131 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
2132 u64 length = 0;
2133 u64 map_length;
2134 int ret;
2135 struct btrfs_io_geometry geom;
2136
2137 if (bio_flags & EXTENT_BIO_COMPRESSED)
2138 return 0;
2139
2140 length = bio->bi_iter.bi_size;
2141 map_length = length;
2142 ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
2143 &geom);
2144 if (ret < 0)
2145 return ret;
2146
2147 if (geom.len < length + size)
2148 return 1;
2149 return 0;
2150}
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
2161 u64 bio_offset)
2162{
2163 struct inode *inode = private_data;
2164
2165 return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
2166}
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
2187 int mirror_num,
2188 unsigned long bio_flags)
2189
2190{
2191 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2192 struct btrfs_root *root = BTRFS_I(inode)->root;
2193 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
2194 blk_status_t ret = 0;
2195 int skip_sum;
2196 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
2197
2198 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
2199
2200 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
2201 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
2202
2203 if (bio_op(bio) != REQ_OP_WRITE) {
2204 ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
2205 if (ret)
2206 goto out;
2207
2208 if (bio_flags & EXTENT_BIO_COMPRESSED) {
2209 ret = btrfs_submit_compressed_read(inode, bio,
2210 mirror_num,
2211 bio_flags);
2212 goto out;
2213 } else if (!skip_sum) {
2214 ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL);
2215 if (ret)
2216 goto out;
2217 }
2218 goto mapit;
2219 } else if (async && !skip_sum) {
2220
2221 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2222 goto mapit;
2223
2224 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
2225 0, inode, btrfs_submit_bio_start);
2226 goto out;
2227 } else if (!skip_sum) {
2228 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
2229 if (ret)
2230 goto out;
2231 }
2232
2233mapit:
2234 ret = btrfs_map_bio(fs_info, bio, mirror_num);
2235
2236out:
2237 if (ret) {
2238 bio->bi_status = ret;
2239 bio_endio(bio);
2240 }
2241 return ret;
2242}
2243
2244
2245
2246
2247
2248static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
2249 struct inode *inode, struct list_head *list)
2250{
2251 struct btrfs_ordered_sum *sum;
2252 int ret;
2253
2254 list_for_each_entry(sum, list, list) {
2255 trans->adding_csums = true;
2256 ret = btrfs_csum_file_blocks(trans,
2257 BTRFS_I(inode)->root->fs_info->csum_root, sum);
2258 trans->adding_csums = false;
2259 if (ret)
2260 return ret;
2261 }
2262 return 0;
2263}
2264
2265int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
2266 unsigned int extra_bits,
2267 struct extent_state **cached_state)
2268{
2269 WARN_ON(PAGE_ALIGNED(end));
2270 return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
2271 cached_state);
2272}
2273
2274
2275struct btrfs_writepage_fixup {
2276 struct page *page;
2277 struct inode *inode;
2278 struct btrfs_work work;
2279};
2280
2281static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2282{
2283 struct btrfs_writepage_fixup *fixup;
2284 struct btrfs_ordered_extent *ordered;
2285 struct extent_state *cached_state = NULL;
2286 struct extent_changeset *data_reserved = NULL;
2287 struct page *page;
2288 struct btrfs_inode *inode;
2289 u64 page_start;
2290 u64 page_end;
2291 int ret = 0;
2292 bool free_delalloc_space = true;
2293
2294 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2295 page = fixup->page;
2296 inode = BTRFS_I(fixup->inode);
2297 page_start = page_offset(page);
2298 page_end = page_offset(page) + PAGE_SIZE - 1;
2299
2300
2301
2302
2303
2304 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2305 PAGE_SIZE);
2306again:
2307 lock_page(page);
2308
2309
2310
2311
2312
2313
2314 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332 if (!ret) {
2333 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2334 btrfs_delalloc_release_space(inode, data_reserved,
2335 page_start, PAGE_SIZE,
2336 true);
2337 }
2338 ret = 0;
2339 goto out_page;
2340 }
2341
2342
2343
2344
2345
2346 if (ret)
2347 goto out_page;
2348
2349 lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
2350
2351
2352 if (PagePrivate2(page))
2353 goto out_reserved;
2354
2355 ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
2356 if (ordered) {
2357 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2358 &cached_state);
2359 unlock_page(page);
2360 btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
2361 btrfs_put_ordered_extent(ordered);
2362 goto again;
2363 }
2364
2365 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2366 &cached_state);
2367 if (ret)
2368 goto out_reserved;
2369
2370
2371
2372
2373
2374
2375
2376
2377 BUG_ON(!PageDirty(page));
2378 free_delalloc_space = false;
2379out_reserved:
2380 btrfs_delalloc_release_extents(inode, PAGE_SIZE);
2381 if (free_delalloc_space)
2382 btrfs_delalloc_release_space(inode, data_reserved, page_start,
2383 PAGE_SIZE, true);
2384 unlock_extent_cached(&inode->io_tree, page_start, page_end,
2385 &cached_state);
2386out_page:
2387 if (ret) {
2388
2389
2390
2391
2392 mapping_set_error(page->mapping, ret);
2393 end_extent_writepage(page, ret, page_start, page_end);
2394 clear_page_dirty_for_io(page);
2395 SetPageError(page);
2396 }
2397 ClearPageChecked(page);
2398 unlock_page(page);
2399 put_page(page);
2400 kfree(fixup);
2401 extent_changeset_free(data_reserved);
2402
2403
2404
2405
2406
2407 btrfs_add_delayed_iput(&inode->vfs_inode);
2408}
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
2422{
2423 struct inode *inode = page->mapping->host;
2424 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2425 struct btrfs_writepage_fixup *fixup;
2426
2427
2428 if (TestClearPagePrivate2(page))
2429 return 0;
2430
2431
2432
2433
2434
2435
2436
2437
2438 if (PageChecked(page))
2439 return -EAGAIN;
2440
2441 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2442 if (!fixup)
2443 return -EAGAIN;
2444
2445
2446
2447
2448
2449
2450
2451 ihold(inode);
2452 SetPageChecked(page);
2453 get_page(page);
2454 btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
2455 fixup->page = page;
2456 fixup->inode = inode;
2457 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
2458
2459 return -EAGAIN;
2460}
2461
2462static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2463 struct btrfs_inode *inode, u64 file_pos,
2464 struct btrfs_file_extent_item *stack_fi,
2465 u64 qgroup_reserved)
2466{
2467 struct btrfs_root *root = inode->root;
2468 struct btrfs_path *path;
2469 struct extent_buffer *leaf;
2470 struct btrfs_key ins;
2471 u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
2472 u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
2473 u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
2474 u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
2475 int extent_inserted = 0;
2476 int ret;
2477
2478 path = btrfs_alloc_path();
2479 if (!path)
2480 return -ENOMEM;
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
2492 file_pos + num_bytes, NULL, 0,
2493 1, sizeof(*stack_fi), &extent_inserted);
2494 if (ret)
2495 goto out;
2496
2497 if (!extent_inserted) {
2498 ins.objectid = btrfs_ino(inode);
2499 ins.offset = file_pos;
2500 ins.type = BTRFS_EXTENT_DATA_KEY;
2501
2502 path->leave_spinning = 1;
2503 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2504 sizeof(*stack_fi));
2505 if (ret)
2506 goto out;
2507 }
2508 leaf = path->nodes[0];
2509 btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
2510 write_extent_buffer(leaf, stack_fi,
2511 btrfs_item_ptr_offset(leaf, path->slots[0]),
2512 sizeof(struct btrfs_file_extent_item));
2513
2514 btrfs_mark_buffer_dirty(leaf);
2515 btrfs_release_path(path);
2516
2517 inode_add_bytes(&inode->vfs_inode, num_bytes);
2518
2519 ins.objectid = disk_bytenr;
2520 ins.offset = disk_num_bytes;
2521 ins.type = BTRFS_EXTENT_ITEM_KEY;
2522
2523 ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
2524 if (ret)
2525 goto out;
2526
2527 ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
2528 file_pos, qgroup_reserved, &ins);
2529out:
2530 btrfs_free_path(path);
2531
2532 return ret;
2533}
2534
2535static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
2536 u64 start, u64 len)
2537{
2538 struct btrfs_block_group *cache;
2539
2540 cache = btrfs_lookup_block_group(fs_info, start);
2541 ASSERT(cache);
2542
2543 spin_lock(&cache->lock);
2544 cache->delalloc_bytes -= len;
2545 spin_unlock(&cache->lock);
2546
2547 btrfs_put_block_group(cache);
2548}
2549
2550static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
2551 struct inode *inode,
2552 struct btrfs_ordered_extent *oe)
2553{
2554 struct btrfs_file_extent_item stack_fi;
2555 u64 logical_len;
2556
2557 memset(&stack_fi, 0, sizeof(stack_fi));
2558 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
2559 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
2560 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
2561 oe->disk_num_bytes);
2562 if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
2563 logical_len = oe->truncated_len;
2564 else
2565 logical_len = oe->num_bytes;
2566 btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
2567 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
2568 btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
2569
2570
2571 return insert_reserved_file_extent(trans, BTRFS_I(inode), oe->file_offset,
2572 &stack_fi, oe->qgroup_rsv);
2573}
2574
2575
2576
2577
2578
2579
2580static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2581{
2582 struct inode *inode = ordered_extent->inode;
2583 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2584 struct btrfs_root *root = BTRFS_I(inode)->root;
2585 struct btrfs_trans_handle *trans = NULL;
2586 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2587 struct extent_state *cached_state = NULL;
2588 u64 start, end;
2589 int compress_type = 0;
2590 int ret = 0;
2591 u64 logical_len = ordered_extent->num_bytes;
2592 bool freespace_inode;
2593 bool truncated = false;
2594 bool range_locked = false;
2595 bool clear_new_delalloc_bytes = false;
2596 bool clear_reserved_extent = true;
2597 unsigned int clear_bits;
2598
2599 start = ordered_extent->file_offset;
2600 end = start + ordered_extent->num_bytes - 1;
2601
2602 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2603 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
2604 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
2605 clear_new_delalloc_bytes = true;
2606
2607 freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
2608
2609 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2610 ret = -EIO;
2611 goto out;
2612 }
2613
2614 btrfs_free_io_failure_record(BTRFS_I(inode), start, end);
2615
2616 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2617 truncated = true;
2618 logical_len = ordered_extent->truncated_len;
2619
2620 if (!logical_len)
2621 goto out;
2622 }
2623
2624 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2625 BUG_ON(!list_empty(&ordered_extent->list));
2626
2627 btrfs_inode_safe_disk_i_size_write(inode, 0);
2628 if (freespace_inode)
2629 trans = btrfs_join_transaction_spacecache(root);
2630 else
2631 trans = btrfs_join_transaction(root);
2632 if (IS_ERR(trans)) {
2633 ret = PTR_ERR(trans);
2634 trans = NULL;
2635 goto out;
2636 }
2637 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
2638 ret = btrfs_update_inode_fallback(trans, root, inode);
2639 if (ret)
2640 btrfs_abort_transaction(trans, ret);
2641 goto out;
2642 }
2643
2644 range_locked = true;
2645 lock_extent_bits(io_tree, start, end, &cached_state);
2646
2647 if (freespace_inode)
2648 trans = btrfs_join_transaction_spacecache(root);
2649 else
2650 trans = btrfs_join_transaction(root);
2651 if (IS_ERR(trans)) {
2652 ret = PTR_ERR(trans);
2653 trans = NULL;
2654 goto out;
2655 }
2656
2657 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
2658
2659 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
2660 compress_type = ordered_extent->compress_type;
2661 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
2662 BUG_ON(compress_type);
2663 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
2664 ordered_extent->file_offset,
2665 ordered_extent->file_offset +
2666 logical_len);
2667 } else {
2668 BUG_ON(root == fs_info->tree_root);
2669 ret = insert_ordered_extent_file_extent(trans, inode,
2670 ordered_extent);
2671 if (!ret) {
2672 clear_reserved_extent = false;
2673 btrfs_release_delalloc_bytes(fs_info,
2674 ordered_extent->disk_bytenr,
2675 ordered_extent->disk_num_bytes);
2676 }
2677 }
2678 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
2679 ordered_extent->file_offset,
2680 ordered_extent->num_bytes, trans->transid);
2681 if (ret < 0) {
2682 btrfs_abort_transaction(trans, ret);
2683 goto out;
2684 }
2685
2686 ret = add_pending_csums(trans, inode, &ordered_extent->list);
2687 if (ret) {
2688 btrfs_abort_transaction(trans, ret);
2689 goto out;
2690 }
2691
2692 btrfs_inode_safe_disk_i_size_write(inode, 0);
2693 ret = btrfs_update_inode_fallback(trans, root, inode);
2694 if (ret) {
2695 btrfs_abort_transaction(trans, ret);
2696 goto out;
2697 }
2698 ret = 0;
2699out:
2700 clear_bits = EXTENT_DEFRAG;
2701 if (range_locked)
2702 clear_bits |= EXTENT_LOCKED;
2703 if (clear_new_delalloc_bytes)
2704 clear_bits |= EXTENT_DELALLOC_NEW;
2705 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits,
2706 (clear_bits & EXTENT_LOCKED) ? 1 : 0, 0,
2707 &cached_state);
2708
2709 if (trans)
2710 btrfs_end_transaction(trans);
2711
2712 if (ret || truncated) {
2713 u64 unwritten_start = start;
2714
2715 if (truncated)
2716 unwritten_start += logical_len;
2717 clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
2718
2719
2720 btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732 if ((ret || !logical_len) &&
2733 clear_reserved_extent &&
2734 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2735 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
2736
2737
2738
2739
2740 if (ret && btrfs_test_opt(fs_info, DISCARD_SYNC))
2741 btrfs_discard_extent(fs_info,
2742 ordered_extent->disk_bytenr,
2743 ordered_extent->disk_num_bytes,
2744 NULL);
2745 btrfs_free_reserved_extent(fs_info,
2746 ordered_extent->disk_bytenr,
2747 ordered_extent->disk_num_bytes, 1);
2748 }
2749 }
2750
2751
2752
2753
2754
2755 btrfs_remove_ordered_extent(inode, ordered_extent);
2756
2757
2758 btrfs_put_ordered_extent(ordered_extent);
2759
2760 btrfs_put_ordered_extent(ordered_extent);
2761
2762 return ret;
2763}
2764
2765static void finish_ordered_fn(struct btrfs_work *work)
2766{
2767 struct btrfs_ordered_extent *ordered_extent;
2768 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
2769 btrfs_finish_ordered_io(ordered_extent);
2770}
2771
2772void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
2773 u64 end, int uptodate)
2774{
2775 struct inode *inode = page->mapping->host;
2776 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2777 struct btrfs_ordered_extent *ordered_extent = NULL;
2778 struct btrfs_workqueue *wq;
2779
2780 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
2781
2782 ClearPagePrivate2(page);
2783 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
2784 end - start + 1, uptodate))
2785 return;
2786
2787 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
2788 wq = fs_info->endio_freespace_worker;
2789 else
2790 wq = fs_info->endio_write_workers;
2791
2792 btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
2793 btrfs_queue_work(wq, &ordered_extent->work);
2794}
2795
2796static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
2797 int icsum, struct page *page, int pgoff, u64 start,
2798 size_t len)
2799{
2800 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2801 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
2802 char *kaddr;
2803 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
2804 u8 *csum_expected;
2805 u8 csum[BTRFS_CSUM_SIZE];
2806
2807 csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
2808
2809 kaddr = kmap_atomic(page);
2810 shash->tfm = fs_info->csum_shash;
2811
2812 crypto_shash_digest(shash, kaddr + pgoff, len, csum);
2813
2814 if (memcmp(csum, csum_expected, csum_size))
2815 goto zeroit;
2816
2817 kunmap_atomic(kaddr);
2818 return 0;
2819zeroit:
2820 btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
2821 io_bio->mirror_num);
2822 if (io_bio->device)
2823 btrfs_dev_stat_inc_and_print(io_bio->device,
2824 BTRFS_DEV_STAT_CORRUPTION_ERRS);
2825 memset(kaddr + pgoff, 1, len);
2826 flush_dcache_page(page);
2827 kunmap_atomic(kaddr);
2828 return -EIO;
2829}
2830
2831
2832
2833
2834
2835
2836static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
2837 u64 phy_offset, struct page *page,
2838 u64 start, u64 end, int mirror)
2839{
2840 size_t offset = start - page_offset(page);
2841 struct inode *inode = page->mapping->host;
2842 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2843 struct btrfs_root *root = BTRFS_I(inode)->root;
2844
2845 if (PageChecked(page)) {
2846 ClearPageChecked(page);
2847 return 0;
2848 }
2849
2850 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
2851 return 0;
2852
2853 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
2854 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
2855 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
2856 return 0;
2857 }
2858
2859 phy_offset >>= inode->i_sb->s_blocksize_bits;
2860 return check_data_csum(inode, io_bio, phy_offset, page, offset, start,
2861 (size_t)(end - start + 1));
2862}
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874void btrfs_add_delayed_iput(struct inode *inode)
2875{
2876 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2877 struct btrfs_inode *binode = BTRFS_I(inode);
2878
2879 if (atomic_add_unless(&inode->i_count, -1, 1))
2880 return;
2881
2882 atomic_inc(&fs_info->nr_delayed_iputs);
2883 spin_lock(&fs_info->delayed_iput_lock);
2884 ASSERT(list_empty(&binode->delayed_iput));
2885 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
2886 spin_unlock(&fs_info->delayed_iput_lock);
2887 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
2888 wake_up_process(fs_info->cleaner_kthread);
2889}
2890
2891static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
2892 struct btrfs_inode *inode)
2893{
2894 list_del_init(&inode->delayed_iput);
2895 spin_unlock(&fs_info->delayed_iput_lock);
2896 iput(&inode->vfs_inode);
2897 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
2898 wake_up(&fs_info->delayed_iputs_wait);
2899 spin_lock(&fs_info->delayed_iput_lock);
2900}
2901
2902static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
2903 struct btrfs_inode *inode)
2904{
2905 if (!list_empty(&inode->delayed_iput)) {
2906 spin_lock(&fs_info->delayed_iput_lock);
2907 if (!list_empty(&inode->delayed_iput))
2908 run_delayed_iput_locked(fs_info, inode);
2909 spin_unlock(&fs_info->delayed_iput_lock);
2910 }
2911}
2912
2913void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
2914{
2915
2916 spin_lock(&fs_info->delayed_iput_lock);
2917 while (!list_empty(&fs_info->delayed_iputs)) {
2918 struct btrfs_inode *inode;
2919
2920 inode = list_first_entry(&fs_info->delayed_iputs,
2921 struct btrfs_inode, delayed_iput);
2922 run_delayed_iput_locked(fs_info, inode);
2923 }
2924 spin_unlock(&fs_info->delayed_iput_lock);
2925}
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
2938{
2939 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
2940 atomic_read(&fs_info->nr_delayed_iputs) == 0);
2941 if (ret)
2942 return -EINTR;
2943 return 0;
2944}
2945
2946
2947
2948
2949
2950int btrfs_orphan_add(struct btrfs_trans_handle *trans,
2951 struct btrfs_inode *inode)
2952{
2953 int ret;
2954
2955 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
2956 if (ret && ret != -EEXIST) {
2957 btrfs_abort_transaction(trans, ret);
2958 return ret;
2959 }
2960
2961 return 0;
2962}
2963
2964
2965
2966
2967
2968static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
2969 struct btrfs_inode *inode)
2970{
2971 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
2972}
2973
2974
2975
2976
2977
2978int btrfs_orphan_cleanup(struct btrfs_root *root)
2979{
2980 struct btrfs_fs_info *fs_info = root->fs_info;
2981 struct btrfs_path *path;
2982 struct extent_buffer *leaf;
2983 struct btrfs_key key, found_key;
2984 struct btrfs_trans_handle *trans;
2985 struct inode *inode;
2986 u64 last_objectid = 0;
2987 int ret = 0, nr_unlink = 0;
2988
2989 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2990 return 0;
2991
2992 path = btrfs_alloc_path();
2993 if (!path) {
2994 ret = -ENOMEM;
2995 goto out;
2996 }
2997 path->reada = READA_BACK;
2998
2999 key.objectid = BTRFS_ORPHAN_OBJECTID;
3000 key.type = BTRFS_ORPHAN_ITEM_KEY;
3001 key.offset = (u64)-1;
3002
3003 while (1) {
3004 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3005 if (ret < 0)
3006 goto out;
3007
3008
3009
3010
3011
3012
3013 if (ret > 0) {
3014 ret = 0;
3015 if (path->slots[0] == 0)
3016 break;
3017 path->slots[0]--;
3018 }
3019
3020
3021 leaf = path->nodes[0];
3022 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3023
3024
3025 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3026 break;
3027 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3028 break;
3029
3030
3031 btrfs_release_path(path);
3032
3033
3034
3035
3036
3037
3038
3039 if (found_key.offset == last_objectid) {
3040 btrfs_err(fs_info,
3041 "Error removing orphan entry, stopping orphan cleanup");
3042 ret = -EINVAL;
3043 goto out;
3044 }
3045
3046 last_objectid = found_key.offset;
3047
3048 found_key.objectid = found_key.offset;
3049 found_key.type = BTRFS_INODE_ITEM_KEY;
3050 found_key.offset = 0;
3051 inode = btrfs_iget(fs_info->sb, last_objectid, root);
3052 ret = PTR_ERR_OR_ZERO(inode);
3053 if (ret && ret != -ENOENT)
3054 goto out;
3055
3056 if (ret == -ENOENT && root == fs_info->tree_root) {
3057 struct btrfs_root *dead_root;
3058 struct btrfs_fs_info *fs_info = root->fs_info;
3059 int is_dead_root = 0;
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073 spin_lock(&fs_info->fs_roots_radix_lock);
3074 dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
3075 (unsigned long)found_key.objectid);
3076 if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
3077 is_dead_root = 1;
3078 spin_unlock(&fs_info->fs_roots_radix_lock);
3079
3080 if (is_dead_root) {
3081
3082 key.offset = found_key.objectid - 1;
3083 continue;
3084 }
3085
3086 }
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107 if (ret == -ENOENT || inode->i_nlink) {
3108 if (!ret)
3109 iput(inode);
3110 trans = btrfs_start_transaction(root, 1);
3111 if (IS_ERR(trans)) {
3112 ret = PTR_ERR(trans);
3113 goto out;
3114 }
3115 btrfs_debug(fs_info, "auto deleting %Lu",
3116 found_key.objectid);
3117 ret = btrfs_del_orphan_item(trans, root,
3118 found_key.objectid);
3119 btrfs_end_transaction(trans);
3120 if (ret)
3121 goto out;
3122 continue;
3123 }
3124
3125 nr_unlink++;
3126
3127
3128 iput(inode);
3129 }
3130
3131 btrfs_release_path(path);
3132
3133 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3134
3135 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3136 trans = btrfs_join_transaction(root);
3137 if (!IS_ERR(trans))
3138 btrfs_end_transaction(trans);
3139 }
3140
3141 if (nr_unlink)
3142 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3143
3144out:
3145 if (ret)
3146 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3147 btrfs_free_path(path);
3148 return ret;
3149}
3150
3151
3152
3153
3154
3155
3156
3157static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3158 int slot, u64 objectid,
3159 int *first_xattr_slot)
3160{
3161 u32 nritems = btrfs_header_nritems(leaf);
3162 struct btrfs_key found_key;
3163 static u64 xattr_access = 0;
3164 static u64 xattr_default = 0;
3165 int scanned = 0;
3166
3167 if (!xattr_access) {
3168 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3169 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3170 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3171 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3172 }
3173
3174 slot++;
3175 *first_xattr_slot = -1;
3176 while (slot < nritems) {
3177 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3178
3179
3180 if (found_key.objectid != objectid)
3181 return 0;
3182
3183
3184 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3185 if (*first_xattr_slot == -1)
3186 *first_xattr_slot = slot;
3187 if (found_key.offset == xattr_access ||
3188 found_key.offset == xattr_default)
3189 return 1;
3190 }
3191
3192
3193
3194
3195
3196 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3197 return 0;
3198
3199 slot++;
3200 scanned++;
3201
3202
3203
3204
3205
3206
3207
3208 if (scanned >= 8)
3209 break;
3210 }
3211
3212
3213
3214
3215 if (*first_xattr_slot == -1)
3216 *first_xattr_slot = slot;
3217 return 1;
3218}
3219
3220
3221
3222
3223static int btrfs_read_locked_inode(struct inode *inode,
3224 struct btrfs_path *in_path)
3225{
3226 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3227 struct btrfs_path *path = in_path;
3228 struct extent_buffer *leaf;
3229 struct btrfs_inode_item *inode_item;
3230 struct btrfs_root *root = BTRFS_I(inode)->root;
3231 struct btrfs_key location;
3232 unsigned long ptr;
3233 int maybe_acls;
3234 u32 rdev;
3235 int ret;
3236 bool filled = false;
3237 int first_xattr_slot;
3238
3239 ret = btrfs_fill_inode(inode, &rdev);
3240 if (!ret)
3241 filled = true;
3242
3243 if (!path) {
3244 path = btrfs_alloc_path();
3245 if (!path)
3246 return -ENOMEM;
3247 }
3248
3249 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3250
3251 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3252 if (ret) {
3253 if (path != in_path)
3254 btrfs_free_path(path);
3255 return ret;
3256 }
3257
3258 leaf = path->nodes[0];
3259
3260 if (filled)
3261 goto cache_index;
3262
3263 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3264 struct btrfs_inode_item);
3265 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3266 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3267 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3268 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3269 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3270 btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
3271 round_up(i_size_read(inode), fs_info->sectorsize));
3272
3273 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3274 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3275
3276 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3277 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3278
3279 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3280 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3281
3282 BTRFS_I(inode)->i_otime.tv_sec =
3283 btrfs_timespec_sec(leaf, &inode_item->otime);
3284 BTRFS_I(inode)->i_otime.tv_nsec =
3285 btrfs_timespec_nsec(leaf, &inode_item->otime);
3286
3287 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3288 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3289 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3290
3291 inode_set_iversion_queried(inode,
3292 btrfs_inode_sequence(leaf, inode_item));
3293 inode->i_generation = BTRFS_I(inode)->generation;
3294 inode->i_rdev = 0;
3295 rdev = btrfs_inode_rdev(leaf, inode_item);
3296
3297 BTRFS_I(inode)->index_cnt = (u64)-1;
3298 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3299
3300cache_index:
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310 if (BTRFS_I(inode)->last_trans == fs_info->generation)
3311 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3312 &BTRFS_I(inode)->runtime_flags);
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3342
3343
3344
3345
3346
3347
3348
3349 BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
3350
3351 path->slots[0]++;
3352 if (inode->i_nlink != 1 ||
3353 path->slots[0] >= btrfs_header_nritems(leaf))
3354 goto cache_acl;
3355
3356 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3357 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
3358 goto cache_acl;
3359
3360 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3361 if (location.type == BTRFS_INODE_REF_KEY) {
3362 struct btrfs_inode_ref *ref;
3363
3364 ref = (struct btrfs_inode_ref *)ptr;
3365 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3366 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3367 struct btrfs_inode_extref *extref;
3368
3369 extref = (struct btrfs_inode_extref *)ptr;
3370 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3371 extref);
3372 }
3373cache_acl:
3374
3375
3376
3377
3378 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3379 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
3380 if (first_xattr_slot != -1) {
3381 path->slots[0] = first_xattr_slot;
3382 ret = btrfs_load_inode_props(inode, path);
3383 if (ret)
3384 btrfs_err(fs_info,
3385 "error loading props for ino %llu (root %llu): %d",
3386 btrfs_ino(BTRFS_I(inode)),
3387 root->root_key.objectid, ret);
3388 }
3389 if (path != in_path)
3390 btrfs_free_path(path);
3391
3392 if (!maybe_acls)
3393 cache_no_acl(inode);
3394
3395 switch (inode->i_mode & S_IFMT) {
3396 case S_IFREG:
3397 inode->i_mapping->a_ops = &btrfs_aops;
3398 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3399 inode->i_fop = &btrfs_file_operations;
3400 inode->i_op = &btrfs_file_inode_operations;
3401 break;
3402 case S_IFDIR:
3403 inode->i_fop = &btrfs_dir_file_operations;
3404 inode->i_op = &btrfs_dir_inode_operations;
3405 break;
3406 case S_IFLNK:
3407 inode->i_op = &btrfs_symlink_inode_operations;
3408 inode_nohighmem(inode);
3409 inode->i_mapping->a_ops = &btrfs_aops;
3410 break;
3411 default:
3412 inode->i_op = &btrfs_special_inode_operations;
3413 init_special_inode(inode, inode->i_mode, rdev);
3414 break;
3415 }
3416
3417 btrfs_sync_inode_flags_to_i_flags(inode);
3418 return 0;
3419}
3420
3421
3422
3423
3424static void fill_inode_item(struct btrfs_trans_handle *trans,
3425 struct extent_buffer *leaf,
3426 struct btrfs_inode_item *item,
3427 struct inode *inode)
3428{
3429 struct btrfs_map_token token;
3430
3431 btrfs_init_map_token(&token, leaf);
3432
3433 btrfs_set_token_inode_uid(&token, item, i_uid_read(inode));
3434 btrfs_set_token_inode_gid(&token, item, i_gid_read(inode));
3435 btrfs_set_token_inode_size(&token, item, BTRFS_I(inode)->disk_i_size);
3436 btrfs_set_token_inode_mode(&token, item, inode->i_mode);
3437 btrfs_set_token_inode_nlink(&token, item, inode->i_nlink);
3438
3439 btrfs_set_token_timespec_sec(&token, &item->atime,
3440 inode->i_atime.tv_sec);
3441 btrfs_set_token_timespec_nsec(&token, &item->atime,
3442 inode->i_atime.tv_nsec);
3443
3444 btrfs_set_token_timespec_sec(&token, &item->mtime,
3445 inode->i_mtime.tv_sec);
3446 btrfs_set_token_timespec_nsec(&token, &item->mtime,
3447 inode->i_mtime.tv_nsec);
3448
3449 btrfs_set_token_timespec_sec(&token, &item->ctime,
3450 inode->i_ctime.tv_sec);
3451 btrfs_set_token_timespec_nsec(&token, &item->ctime,
3452 inode->i_ctime.tv_nsec);
3453
3454 btrfs_set_token_timespec_sec(&token, &item->otime,
3455 BTRFS_I(inode)->i_otime.tv_sec);
3456 btrfs_set_token_timespec_nsec(&token, &item->otime,
3457 BTRFS_I(inode)->i_otime.tv_nsec);
3458
3459 btrfs_set_token_inode_nbytes(&token, item, inode_get_bytes(inode));
3460 btrfs_set_token_inode_generation(&token, item,
3461 BTRFS_I(inode)->generation);
3462 btrfs_set_token_inode_sequence(&token, item, inode_peek_iversion(inode));
3463 btrfs_set_token_inode_transid(&token, item, trans->transid);
3464 btrfs_set_token_inode_rdev(&token, item, inode->i_rdev);
3465 btrfs_set_token_inode_flags(&token, item, BTRFS_I(inode)->flags);
3466 btrfs_set_token_inode_block_group(&token, item, 0);
3467}
3468
3469
3470
3471
3472static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3473 struct btrfs_root *root, struct inode *inode)
3474{
3475 struct btrfs_inode_item *inode_item;
3476 struct btrfs_path *path;
3477 struct extent_buffer *leaf;
3478 int ret;
3479
3480 path = btrfs_alloc_path();
3481 if (!path)
3482 return -ENOMEM;
3483
3484 path->leave_spinning = 1;
3485 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3486 1);
3487 if (ret) {
3488 if (ret > 0)
3489 ret = -ENOENT;
3490 goto failed;
3491 }
3492
3493 leaf = path->nodes[0];
3494 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3495 struct btrfs_inode_item);
3496
3497 fill_inode_item(trans, leaf, inode_item, inode);
3498 btrfs_mark_buffer_dirty(leaf);
3499 btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
3500 ret = 0;
3501failed:
3502 btrfs_free_path(path);
3503 return ret;
3504}
3505
3506
3507
3508
3509noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3510 struct btrfs_root *root, struct inode *inode)
3511{
3512 struct btrfs_fs_info *fs_info = root->fs_info;
3513 int ret;
3514
3515
3516
3517
3518
3519
3520
3521
3522 if (!btrfs_is_free_space_inode(BTRFS_I(inode))
3523 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3524 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
3525 btrfs_update_root_times(trans, root);
3526
3527 ret = btrfs_delayed_update_inode(trans, root, inode);
3528 if (!ret)
3529 btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
3530 return ret;
3531 }
3532
3533 return btrfs_update_inode_item(trans, root, inode);
3534}
3535
3536noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3537 struct btrfs_root *root,
3538 struct inode *inode)
3539{
3540 int ret;
3541
3542 ret = btrfs_update_inode(trans, root, inode);
3543 if (ret == -ENOSPC)
3544 return btrfs_update_inode_item(trans, root, inode);
3545 return ret;
3546}
3547
3548
3549
3550
3551
3552
3553static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3554 struct btrfs_root *root,
3555 struct btrfs_inode *dir,
3556 struct btrfs_inode *inode,
3557 const char *name, int name_len)
3558{
3559 struct btrfs_fs_info *fs_info = root->fs_info;
3560 struct btrfs_path *path;
3561 int ret = 0;
3562 struct btrfs_dir_item *di;
3563 u64 index;
3564 u64 ino = btrfs_ino(inode);
3565 u64 dir_ino = btrfs_ino(dir);
3566
3567 path = btrfs_alloc_path();
3568 if (!path) {
3569 ret = -ENOMEM;
3570 goto out;
3571 }
3572
3573 path->leave_spinning = 1;
3574 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3575 name, name_len, -1);
3576 if (IS_ERR_OR_NULL(di)) {
3577 ret = di ? PTR_ERR(di) : -ENOENT;
3578 goto err;
3579 }
3580 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3581 if (ret)
3582 goto err;
3583 btrfs_release_path(path);
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595 if (inode->dir_index) {
3596 ret = btrfs_delayed_delete_inode_ref(inode);
3597 if (!ret) {
3598 index = inode->dir_index;
3599 goto skip_backref;
3600 }
3601 }
3602
3603 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3604 dir_ino, &index);
3605 if (ret) {
3606 btrfs_info(fs_info,
3607 "failed to delete reference to %.*s, inode %llu parent %llu",
3608 name_len, name, ino, dir_ino);
3609 btrfs_abort_transaction(trans, ret);
3610 goto err;
3611 }
3612skip_backref:
3613 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
3614 if (ret) {
3615 btrfs_abort_transaction(trans, ret);
3616 goto err;
3617 }
3618
3619 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
3620 dir_ino);
3621 if (ret != 0 && ret != -ENOENT) {
3622 btrfs_abort_transaction(trans, ret);
3623 goto err;
3624 }
3625
3626 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
3627 index);
3628 if (ret == -ENOENT)
3629 ret = 0;
3630 else if (ret)
3631 btrfs_abort_transaction(trans, ret);
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642 btrfs_run_delayed_iput(fs_info, inode);
3643err:
3644 btrfs_free_path(path);
3645 if (ret)
3646 goto out;
3647
3648 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
3649 inode_inc_iversion(&inode->vfs_inode);
3650 inode_inc_iversion(&dir->vfs_inode);
3651 inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
3652 dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
3653 ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
3654out:
3655 return ret;
3656}
3657
3658int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3659 struct btrfs_root *root,
3660 struct btrfs_inode *dir, struct btrfs_inode *inode,
3661 const char *name, int name_len)
3662{
3663 int ret;
3664 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
3665 if (!ret) {
3666 drop_nlink(&inode->vfs_inode);
3667 ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
3668 }
3669 return ret;
3670}
3671
3672
3673
3674
3675
3676
3677
3678
3679
3680static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
3681{
3682 struct btrfs_root *root = BTRFS_I(dir)->root;
3683
3684
3685
3686
3687
3688
3689
3690
3691 return btrfs_start_transaction_fallback_global_rsv(root, 5);
3692}
3693
3694static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3695{
3696 struct btrfs_root *root = BTRFS_I(dir)->root;
3697 struct btrfs_trans_handle *trans;
3698 struct inode *inode = d_inode(dentry);
3699 int ret;
3700
3701 trans = __unlink_start_trans(dir);
3702 if (IS_ERR(trans))
3703 return PTR_ERR(trans);
3704
3705 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
3706 0);
3707
3708 ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
3709 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
3710 dentry->d_name.len);
3711 if (ret)
3712 goto out;
3713
3714 if (inode->i_nlink == 0) {
3715 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
3716 if (ret)
3717 goto out;
3718 }
3719
3720out:
3721 btrfs_end_transaction(trans);
3722 btrfs_btree_balance_dirty(root->fs_info);
3723 return ret;
3724}
3725
3726static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3727 struct inode *dir, struct dentry *dentry)
3728{
3729 struct btrfs_root *root = BTRFS_I(dir)->root;
3730 struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
3731 struct btrfs_path *path;
3732 struct extent_buffer *leaf;
3733 struct btrfs_dir_item *di;
3734 struct btrfs_key key;
3735 const char *name = dentry->d_name.name;
3736 int name_len = dentry->d_name.len;
3737 u64 index;
3738 int ret;
3739 u64 objectid;
3740 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
3741
3742 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
3743 objectid = inode->root->root_key.objectid;
3744 } else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
3745 objectid = inode->location.objectid;
3746 } else {
3747 WARN_ON(1);
3748 return -EINVAL;
3749 }
3750
3751 path = btrfs_alloc_path();
3752 if (!path)
3753 return -ENOMEM;
3754
3755 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3756 name, name_len, -1);
3757 if (IS_ERR_OR_NULL(di)) {
3758 ret = di ? PTR_ERR(di) : -ENOENT;
3759 goto out;
3760 }
3761
3762 leaf = path->nodes[0];
3763 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3764 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3765 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3766 if (ret) {
3767 btrfs_abort_transaction(trans, ret);
3768 goto out;
3769 }
3770 btrfs_release_path(path);
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
3782 di = btrfs_search_dir_index_item(root, path, dir_ino,
3783 name, name_len);
3784 if (IS_ERR_OR_NULL(di)) {
3785 if (!di)
3786 ret = -ENOENT;
3787 else
3788 ret = PTR_ERR(di);
3789 btrfs_abort_transaction(trans, ret);
3790 goto out;
3791 }
3792
3793 leaf = path->nodes[0];
3794 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3795 index = key.offset;
3796 btrfs_release_path(path);
3797 } else {
3798 ret = btrfs_del_root_ref(trans, objectid,
3799 root->root_key.objectid, dir_ino,
3800 &index, name, name_len);
3801 if (ret) {
3802 btrfs_abort_transaction(trans, ret);
3803 goto out;
3804 }
3805 }
3806
3807 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
3808 if (ret) {
3809 btrfs_abort_transaction(trans, ret);
3810 goto out;
3811 }
3812
3813 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
3814 inode_inc_iversion(dir);
3815 dir->i_mtime = dir->i_ctime = current_time(dir);
3816 ret = btrfs_update_inode_fallback(trans, root, dir);
3817 if (ret)
3818 btrfs_abort_transaction(trans, ret);
3819out:
3820 btrfs_free_path(path);
3821 return ret;
3822}
3823
3824
3825
3826
3827
3828static noinline int may_destroy_subvol(struct btrfs_root *root)
3829{
3830 struct btrfs_fs_info *fs_info = root->fs_info;
3831 struct btrfs_path *path;
3832 struct btrfs_dir_item *di;
3833 struct btrfs_key key;
3834 u64 dir_id;
3835 int ret;
3836
3837 path = btrfs_alloc_path();
3838 if (!path)
3839 return -ENOMEM;
3840
3841
3842 dir_id = btrfs_super_root_dir(fs_info->super_copy);
3843 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
3844 dir_id, "default", 7, 0);
3845 if (di && !IS_ERR(di)) {
3846 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
3847 if (key.objectid == root->root_key.objectid) {
3848 ret = -EPERM;
3849 btrfs_err(fs_info,
3850 "deleting default subvolume %llu is not allowed",
3851 key.objectid);
3852 goto out;
3853 }
3854 btrfs_release_path(path);
3855 }
3856
3857 key.objectid = root->root_key.objectid;
3858 key.type = BTRFS_ROOT_REF_KEY;
3859 key.offset = (u64)-1;
3860
3861 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
3862 if (ret < 0)
3863 goto out;
3864 BUG_ON(ret == 0);
3865
3866 ret = 0;
3867 if (path->slots[0] > 0) {
3868 path->slots[0]--;
3869 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3870 if (key.objectid == root->root_key.objectid &&
3871 key.type == BTRFS_ROOT_REF_KEY)
3872 ret = -ENOTEMPTY;
3873 }
3874out:
3875 btrfs_free_path(path);
3876 return ret;
3877}
3878
3879
3880static void btrfs_prune_dentries(struct btrfs_root *root)
3881{
3882 struct btrfs_fs_info *fs_info = root->fs_info;
3883 struct rb_node *node;
3884 struct rb_node *prev;
3885 struct btrfs_inode *entry;
3886 struct inode *inode;
3887 u64 objectid = 0;
3888
3889 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3890 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
3891
3892 spin_lock(&root->inode_lock);
3893again:
3894 node = root->inode_tree.rb_node;
3895 prev = NULL;
3896 while (node) {
3897 prev = node;
3898 entry = rb_entry(node, struct btrfs_inode, rb_node);
3899
3900 if (objectid < btrfs_ino(entry))
3901 node = node->rb_left;
3902 else if (objectid > btrfs_ino(entry))
3903 node = node->rb_right;
3904 else
3905 break;
3906 }
3907 if (!node) {
3908 while (prev) {
3909 entry = rb_entry(prev, struct btrfs_inode, rb_node);
3910 if (objectid <= btrfs_ino(entry)) {
3911 node = prev;
3912 break;
3913 }
3914 prev = rb_next(prev);
3915 }
3916 }
3917 while (node) {
3918 entry = rb_entry(node, struct btrfs_inode, rb_node);
3919 objectid = btrfs_ino(entry) + 1;
3920 inode = igrab(&entry->vfs_inode);
3921 if (inode) {
3922 spin_unlock(&root->inode_lock);
3923 if (atomic_read(&inode->i_count) > 1)
3924 d_prune_aliases(inode);
3925
3926
3927
3928
3929 iput(inode);
3930 cond_resched();
3931 spin_lock(&root->inode_lock);
3932 goto again;
3933 }
3934
3935 if (cond_resched_lock(&root->inode_lock))
3936 goto again;
3937
3938 node = rb_next(node);
3939 }
3940 spin_unlock(&root->inode_lock);
3941}
3942
3943int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
3944{
3945 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
3946 struct btrfs_root *root = BTRFS_I(dir)->root;
3947 struct inode *inode = d_inode(dentry);
3948 struct btrfs_root *dest = BTRFS_I(inode)->root;
3949 struct btrfs_trans_handle *trans;
3950 struct btrfs_block_rsv block_rsv;
3951 u64 root_flags;
3952 int ret;
3953 int err;
3954
3955
3956
3957
3958
3959
3960 spin_lock(&dest->root_item_lock);
3961 if (dest->send_in_progress) {
3962 spin_unlock(&dest->root_item_lock);
3963 btrfs_warn(fs_info,
3964 "attempt to delete subvolume %llu during send",
3965 dest->root_key.objectid);
3966 return -EPERM;
3967 }
3968 root_flags = btrfs_root_flags(&dest->root_item);
3969 btrfs_set_root_flags(&dest->root_item,
3970 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
3971 spin_unlock(&dest->root_item_lock);
3972
3973 down_write(&fs_info->subvol_sem);
3974
3975 err = may_destroy_subvol(dest);
3976 if (err)
3977 goto out_up_write;
3978
3979 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
3980
3981
3982
3983
3984
3985 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
3986 if (err)
3987 goto out_up_write;
3988
3989 trans = btrfs_start_transaction(root, 0);
3990 if (IS_ERR(trans)) {
3991 err = PTR_ERR(trans);
3992 goto out_release;
3993 }
3994 trans->block_rsv = &block_rsv;
3995 trans->bytes_reserved = block_rsv.size;
3996
3997 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
3998
3999 ret = btrfs_unlink_subvol(trans, dir, dentry);
4000 if (ret) {
4001 err = ret;
4002 btrfs_abort_transaction(trans, ret);
4003 goto out_end_trans;
4004 }
4005
4006 btrfs_record_root_in_trans(trans, dest);
4007
4008 memset(&dest->root_item.drop_progress, 0,
4009 sizeof(dest->root_item.drop_progress));
4010 dest->root_item.drop_level = 0;
4011 btrfs_set_root_refs(&dest->root_item, 0);
4012
4013 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4014 ret = btrfs_insert_orphan_item(trans,
4015 fs_info->tree_root,
4016 dest->root_key.objectid);
4017 if (ret) {
4018 btrfs_abort_transaction(trans, ret);
4019 err = ret;
4020 goto out_end_trans;
4021 }
4022 }
4023
4024 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4025 BTRFS_UUID_KEY_SUBVOL,
4026 dest->root_key.objectid);
4027 if (ret && ret != -ENOENT) {
4028 btrfs_abort_transaction(trans, ret);
4029 err = ret;
4030 goto out_end_trans;
4031 }
4032 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4033 ret = btrfs_uuid_tree_remove(trans,
4034 dest->root_item.received_uuid,
4035 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4036 dest->root_key.objectid);
4037 if (ret && ret != -ENOENT) {
4038 btrfs_abort_transaction(trans, ret);
4039 err = ret;
4040 goto out_end_trans;
4041 }
4042 }
4043
4044 free_anon_bdev(dest->anon_dev);
4045 dest->anon_dev = 0;
4046out_end_trans:
4047 trans->block_rsv = NULL;
4048 trans->bytes_reserved = 0;
4049 ret = btrfs_end_transaction(trans);
4050 if (ret && !err)
4051 err = ret;
4052 inode->i_flags |= S_DEAD;
4053out_release:
4054 btrfs_subvolume_release_metadata(fs_info, &block_rsv);
4055out_up_write:
4056 up_write(&fs_info->subvol_sem);
4057 if (err) {
4058 spin_lock(&dest->root_item_lock);
4059 root_flags = btrfs_root_flags(&dest->root_item);
4060 btrfs_set_root_flags(&dest->root_item,
4061 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4062 spin_unlock(&dest->root_item_lock);
4063 } else {
4064 d_invalidate(dentry);
4065 btrfs_prune_dentries(dest);
4066 ASSERT(dest->send_in_progress == 0);
4067
4068
4069 if (dest->ino_cache_inode) {
4070 iput(dest->ino_cache_inode);
4071 dest->ino_cache_inode = NULL;
4072 }
4073 }
4074
4075 return err;
4076}
4077
4078static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4079{
4080 struct inode *inode = d_inode(dentry);
4081 int err = 0;
4082 struct btrfs_root *root = BTRFS_I(dir)->root;
4083 struct btrfs_trans_handle *trans;
4084 u64 last_unlink_trans;
4085
4086 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4087 return -ENOTEMPTY;
4088 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
4089 return btrfs_delete_subvolume(dir, dentry);
4090
4091 trans = __unlink_start_trans(dir);
4092 if (IS_ERR(trans))
4093 return PTR_ERR(trans);
4094
4095 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4096 err = btrfs_unlink_subvol(trans, dir, dentry);
4097 goto out;
4098 }
4099
4100 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4101 if (err)
4102 goto out;
4103
4104 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4105
4106
4107 err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4108 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4109 dentry->d_name.len);
4110 if (!err) {
4111 btrfs_i_size_write(BTRFS_I(inode), 0);
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123 if (last_unlink_trans >= trans->transid)
4124 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4125 }
4126out:
4127 btrfs_end_transaction(trans);
4128 btrfs_btree_balance_dirty(root->fs_info);
4129
4130 return err;
4131}
4132
4133
4134
4135
4136
4137#define NEED_TRUNCATE_BLOCK 1
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4151 struct btrfs_root *root,
4152 struct inode *inode,
4153 u64 new_size, u32 min_type)
4154{
4155 struct btrfs_fs_info *fs_info = root->fs_info;
4156 struct btrfs_path *path;
4157 struct extent_buffer *leaf;
4158 struct btrfs_file_extent_item *fi;
4159 struct btrfs_key key;
4160 struct btrfs_key found_key;
4161 u64 extent_start = 0;
4162 u64 extent_num_bytes = 0;
4163 u64 extent_offset = 0;
4164 u64 item_end = 0;
4165 u64 last_size = new_size;
4166 u32 found_type = (u8)-1;
4167 int found_extent;
4168 int del_item;
4169 int pending_del_nr = 0;
4170 int pending_del_slot = 0;
4171 int extent_type = -1;
4172 int ret;
4173 u64 ino = btrfs_ino(BTRFS_I(inode));
4174 u64 bytes_deleted = 0;
4175 bool be_nice = false;
4176 bool should_throttle = false;
4177 const u64 lock_start = ALIGN_DOWN(new_size, fs_info->sectorsize);
4178 struct extent_state *cached_state = NULL;
4179
4180 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4181
4182
4183
4184
4185
4186
4187 if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
4188 test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
4189 be_nice = true;
4190
4191 path = btrfs_alloc_path();
4192 if (!path)
4193 return -ENOMEM;
4194 path->reada = READA_BACK;
4195
4196 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4197 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, (u64)-1,
4198 &cached_state);
4199
4200
4201
4202
4203
4204
4205 btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
4206 fs_info->sectorsize),
4207 (u64)-1, 0);
4208 }
4209
4210
4211
4212
4213
4214
4215
4216 if (min_type == 0 && root == BTRFS_I(inode)->root)
4217 btrfs_kill_delayed_inode_items(BTRFS_I(inode));
4218
4219 key.objectid = ino;
4220 key.offset = (u64)-1;
4221 key.type = (u8)-1;
4222
4223search_again:
4224
4225
4226
4227
4228
4229 if (be_nice && bytes_deleted > SZ_32M &&
4230 btrfs_should_end_transaction(trans)) {
4231 ret = -EAGAIN;
4232 goto out;
4233 }
4234
4235 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4236 if (ret < 0)
4237 goto out;
4238
4239 if (ret > 0) {
4240 ret = 0;
4241
4242
4243
4244 if (path->slots[0] == 0)
4245 goto out;
4246 path->slots[0]--;
4247 }
4248
4249 while (1) {
4250 u64 clear_start = 0, clear_len = 0;
4251
4252 fi = NULL;
4253 leaf = path->nodes[0];
4254 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4255 found_type = found_key.type;
4256
4257 if (found_key.objectid != ino)
4258 break;
4259
4260 if (found_type < min_type)
4261 break;
4262
4263 item_end = found_key.offset;
4264 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4265 fi = btrfs_item_ptr(leaf, path->slots[0],
4266 struct btrfs_file_extent_item);
4267 extent_type = btrfs_file_extent_type(leaf, fi);
4268 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4269 item_end +=
4270 btrfs_file_extent_num_bytes(leaf, fi);
4271
4272 trace_btrfs_truncate_show_fi_regular(
4273 BTRFS_I(inode), leaf, fi,
4274 found_key.offset);
4275 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4276 item_end += btrfs_file_extent_ram_bytes(leaf,
4277 fi);
4278
4279 trace_btrfs_truncate_show_fi_inline(
4280 BTRFS_I(inode), leaf, fi, path->slots[0],
4281 found_key.offset);
4282 }
4283 item_end--;
4284 }
4285 if (found_type > min_type) {
4286 del_item = 1;
4287 } else {
4288 if (item_end < new_size)
4289 break;
4290 if (found_key.offset >= new_size)
4291 del_item = 1;
4292 else
4293 del_item = 0;
4294 }
4295 found_extent = 0;
4296
4297 if (found_type != BTRFS_EXTENT_DATA_KEY)
4298 goto delete;
4299
4300 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4301 u64 num_dec;
4302
4303 clear_start = found_key.offset;
4304 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4305 if (!del_item) {
4306 u64 orig_num_bytes =
4307 btrfs_file_extent_num_bytes(leaf, fi);
4308 extent_num_bytes = ALIGN(new_size -
4309 found_key.offset,
4310 fs_info->sectorsize);
4311 clear_start = ALIGN(new_size, fs_info->sectorsize);
4312 btrfs_set_file_extent_num_bytes(leaf, fi,
4313 extent_num_bytes);
4314 num_dec = (orig_num_bytes -
4315 extent_num_bytes);
4316 if (test_bit(BTRFS_ROOT_SHAREABLE,
4317 &root->state) &&
4318 extent_start != 0)
4319 inode_sub_bytes(inode, num_dec);
4320 btrfs_mark_buffer_dirty(leaf);
4321 } else {
4322 extent_num_bytes =
4323 btrfs_file_extent_disk_num_bytes(leaf,
4324 fi);
4325 extent_offset = found_key.offset -
4326 btrfs_file_extent_offset(leaf, fi);
4327
4328
4329 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4330 if (extent_start != 0) {
4331 found_extent = 1;
4332 if (test_bit(BTRFS_ROOT_SHAREABLE,
4333 &root->state))
4334 inode_sub_bytes(inode, num_dec);
4335 }
4336 }
4337 clear_len = num_dec;
4338 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4339
4340
4341
4342
4343 if (!del_item &&
4344 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4345 btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
4346 btrfs_file_extent_compression(leaf, fi) == 0) {
4347 u32 size = (u32)(new_size - found_key.offset);
4348
4349 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4350 size = btrfs_file_extent_calc_inline_size(size);
4351 btrfs_truncate_item(path, size, 1);
4352 } else if (!del_item) {
4353
4354
4355
4356
4357 ret = NEED_TRUNCATE_BLOCK;
4358 break;
4359 } else {
4360
4361
4362
4363
4364
4365 clear_len = fs_info->sectorsize;
4366 }
4367
4368 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
4369 inode_sub_bytes(inode, item_end + 1 - new_size);
4370 }
4371delete:
4372
4373
4374
4375
4376
4377 if (root == BTRFS_I(inode)->root) {
4378 ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
4379 clear_start, clear_len);
4380 if (ret) {
4381 btrfs_abort_transaction(trans, ret);
4382 break;
4383 }
4384 }
4385
4386 if (del_item)
4387 last_size = found_key.offset;
4388 else
4389 last_size = new_size;
4390 if (del_item) {
4391 if (!pending_del_nr) {
4392
4393 pending_del_slot = path->slots[0];
4394 pending_del_nr = 1;
4395 } else if (pending_del_nr &&
4396 path->slots[0] + 1 == pending_del_slot) {
4397
4398 pending_del_nr++;
4399 pending_del_slot = path->slots[0];
4400 } else {
4401 BUG();
4402 }
4403 } else {
4404 break;
4405 }
4406 should_throttle = false;
4407
4408 if (found_extent &&
4409 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4410 struct btrfs_ref ref = { 0 };
4411
4412 bytes_deleted += extent_num_bytes;
4413
4414 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
4415 extent_start, extent_num_bytes, 0);
4416 ref.real_root = root->root_key.objectid;
4417 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
4418 ino, extent_offset);
4419 ret = btrfs_free_extent(trans, &ref);
4420 if (ret) {
4421 btrfs_abort_transaction(trans, ret);
4422 break;
4423 }
4424 if (be_nice) {
4425 if (btrfs_should_throttle_delayed_refs(trans))
4426 should_throttle = true;
4427 }
4428 }
4429
4430 if (found_type == BTRFS_INODE_ITEM_KEY)
4431 break;
4432
4433 if (path->slots[0] == 0 ||
4434 path->slots[0] != pending_del_slot ||
4435 should_throttle) {
4436 if (pending_del_nr) {
4437 ret = btrfs_del_items(trans, root, path,
4438 pending_del_slot,
4439 pending_del_nr);
4440 if (ret) {
4441 btrfs_abort_transaction(trans, ret);
4442 break;
4443 }
4444 pending_del_nr = 0;
4445 }
4446 btrfs_release_path(path);
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456
4457
4458 if (should_throttle) {
4459 ret = btrfs_delayed_refs_rsv_refill(fs_info,
4460 BTRFS_RESERVE_NO_FLUSH);
4461 if (ret) {
4462 ret = -EAGAIN;
4463 break;
4464 }
4465 }
4466 goto search_again;
4467 } else {
4468 path->slots[0]--;
4469 }
4470 }
4471out:
4472 if (ret >= 0 && pending_del_nr) {
4473 int err;
4474
4475 err = btrfs_del_items(trans, root, path, pending_del_slot,
4476 pending_del_nr);
4477 if (err) {
4478 btrfs_abort_transaction(trans, err);
4479 ret = err;
4480 }
4481 }
4482 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4483 ASSERT(last_size >= new_size);
4484 if (!ret && last_size > new_size)
4485 last_size = new_size;
4486 btrfs_inode_safe_disk_i_size_write(inode, last_size);
4487 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
4488 (u64)-1, &cached_state);
4489 }
4490
4491 btrfs_free_path(path);
4492 return ret;
4493}
4494
4495
4496
4497
4498
4499
4500
4501
4502
4503
4504
4505
4506int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4507 int front)
4508{
4509 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4510 struct address_space *mapping = inode->i_mapping;
4511 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4512 struct btrfs_ordered_extent *ordered;
4513 struct extent_state *cached_state = NULL;
4514 struct extent_changeset *data_reserved = NULL;
4515 char *kaddr;
4516 bool only_release_metadata = false;
4517 u32 blocksize = fs_info->sectorsize;
4518 pgoff_t index = from >> PAGE_SHIFT;
4519 unsigned offset = from & (blocksize - 1);
4520 struct page *page;
4521 gfp_t mask = btrfs_alloc_write_mask(mapping);
4522 size_t write_bytes = blocksize;
4523 int ret = 0;
4524 u64 block_start;
4525 u64 block_end;
4526
4527 if (IS_ALIGNED(offset, blocksize) &&
4528 (!len || IS_ALIGNED(len, blocksize)))
4529 goto out;
4530
4531 block_start = round_down(from, blocksize);
4532 block_end = block_start + blocksize - 1;
4533
4534 ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
4535 block_start, blocksize);
4536 if (ret < 0) {
4537 if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
4538 &write_bytes) > 0) {
4539
4540 only_release_metadata = true;
4541 } else {
4542 goto out;
4543 }
4544 }
4545 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
4546 if (ret < 0) {
4547 if (!only_release_metadata)
4548 btrfs_free_reserved_data_space(BTRFS_I(inode),
4549 data_reserved, block_start, blocksize);
4550 goto out;
4551 }
4552again:
4553 page = find_or_create_page(mapping, index, mask);
4554 if (!page) {
4555 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
4556 block_start, blocksize, true);
4557 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
4558 ret = -ENOMEM;
4559 goto out;
4560 }
4561
4562 if (!PageUptodate(page)) {
4563 ret = btrfs_readpage(NULL, page);
4564 lock_page(page);
4565 if (page->mapping != mapping) {
4566 unlock_page(page);
4567 put_page(page);
4568 goto again;
4569 }
4570 if (!PageUptodate(page)) {
4571 ret = -EIO;
4572 goto out_unlock;
4573 }
4574 }
4575 wait_on_page_writeback(page);
4576
4577 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4578 set_page_extent_mapped(page);
4579
4580 ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
4581 if (ordered) {
4582 unlock_extent_cached(io_tree, block_start, block_end,
4583 &cached_state);
4584 unlock_page(page);
4585 put_page(page);
4586 btrfs_start_ordered_extent(inode, ordered, 1);
4587 btrfs_put_ordered_extent(ordered);
4588 goto again;
4589 }
4590
4591 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4592 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4593 0, 0, &cached_state);
4594
4595 ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
4596 &cached_state);
4597 if (ret) {
4598 unlock_extent_cached(io_tree, block_start, block_end,
4599 &cached_state);
4600 goto out_unlock;
4601 }
4602
4603 if (offset != blocksize) {
4604 if (!len)
4605 len = blocksize - offset;
4606 kaddr = kmap(page);
4607 if (front)
4608 memset(kaddr + (block_start - page_offset(page)),
4609 0, offset);
4610 else
4611 memset(kaddr + (block_start - page_offset(page)) + offset,
4612 0, len);
4613 flush_dcache_page(page);
4614 kunmap(page);
4615 }
4616 ClearPageChecked(page);
4617 set_page_dirty(page);
4618 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
4619
4620 if (only_release_metadata)
4621 set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
4622 block_end, EXTENT_NORESERVE, NULL, NULL,
4623 GFP_NOFS);
4624
4625out_unlock:
4626 if (ret) {
4627 if (only_release_metadata)
4628 btrfs_delalloc_release_metadata(BTRFS_I(inode),
4629 blocksize, true);
4630 else
4631 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
4632 block_start, blocksize, true);
4633 }
4634 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
4635 unlock_page(page);
4636 put_page(page);
4637out:
4638 if (only_release_metadata)
4639 btrfs_check_nocow_unlock(BTRFS_I(inode));
4640 extent_changeset_free(data_reserved);
4641 return ret;
4642}
4643
4644static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4645 u64 offset, u64 len)
4646{
4647 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4648 struct btrfs_trans_handle *trans;
4649 int ret;
4650
4651
4652
4653
4654
4655 if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
4656 BTRFS_I(inode)->last_trans = fs_info->generation;
4657 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4658 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4659 return 0;
4660 }
4661
4662
4663
4664
4665
4666
4667 trans = btrfs_start_transaction(root, 3);
4668 if (IS_ERR(trans))
4669 return PTR_ERR(trans);
4670
4671 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4672 if (ret) {
4673 btrfs_abort_transaction(trans, ret);
4674 btrfs_end_transaction(trans);
4675 return ret;
4676 }
4677
4678 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
4679 offset, 0, 0, len, 0, len, 0, 0, 0);
4680 if (ret)
4681 btrfs_abort_transaction(trans, ret);
4682 else
4683 btrfs_update_inode(trans, root, inode);
4684 btrfs_end_transaction(trans);
4685 return ret;
4686}
4687
4688
4689
4690
4691
4692
4693
4694int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4695{
4696 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4697 struct btrfs_root *root = BTRFS_I(inode)->root;
4698 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4699 struct extent_map *em = NULL;
4700 struct extent_state *cached_state = NULL;
4701 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4702 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
4703 u64 block_end = ALIGN(size, fs_info->sectorsize);
4704 u64 last_byte;
4705 u64 cur_offset;
4706 u64 hole_size;
4707 int err = 0;
4708
4709
4710
4711
4712
4713
4714 err = btrfs_truncate_block(inode, oldsize, 0, 0);
4715 if (err)
4716 return err;
4717
4718 if (size <= hole_start)
4719 return 0;
4720
4721 btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), hole_start,
4722 block_end - 1, &cached_state);
4723 cur_offset = hole_start;
4724 while (1) {
4725 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
4726 block_end - cur_offset);
4727 if (IS_ERR(em)) {
4728 err = PTR_ERR(em);
4729 em = NULL;
4730 break;
4731 }
4732 last_byte = min(extent_map_end(em), block_end);
4733 last_byte = ALIGN(last_byte, fs_info->sectorsize);
4734 hole_size = last_byte - cur_offset;
4735
4736 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4737 struct extent_map *hole_em;
4738
4739 err = maybe_insert_hole(root, inode, cur_offset,
4740 hole_size);
4741 if (err)
4742 break;
4743
4744 err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
4745 cur_offset, hole_size);
4746 if (err)
4747 break;
4748
4749 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
4750 cur_offset + hole_size - 1, 0);
4751 hole_em = alloc_extent_map();
4752 if (!hole_em) {
4753 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4754 &BTRFS_I(inode)->runtime_flags);
4755 goto next;
4756 }
4757 hole_em->start = cur_offset;
4758 hole_em->len = hole_size;
4759 hole_em->orig_start = cur_offset;
4760
4761 hole_em->block_start = EXTENT_MAP_HOLE;
4762 hole_em->block_len = 0;
4763 hole_em->orig_block_len = 0;
4764 hole_em->ram_bytes = hole_size;
4765 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4766 hole_em->generation = fs_info->generation;
4767
4768 while (1) {
4769 write_lock(&em_tree->lock);
4770 err = add_extent_mapping(em_tree, hole_em, 1);
4771 write_unlock(&em_tree->lock);
4772 if (err != -EEXIST)
4773 break;
4774 btrfs_drop_extent_cache(BTRFS_I(inode),
4775 cur_offset,
4776 cur_offset +
4777 hole_size - 1, 0);
4778 }
4779 free_extent_map(hole_em);
4780 } else {
4781 err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
4782 cur_offset, hole_size);
4783 if (err)
4784 break;
4785 }
4786next:
4787 free_extent_map(em);
4788 em = NULL;
4789 cur_offset = last_byte;
4790 if (cur_offset >= block_end)
4791 break;
4792 }
4793 free_extent_map(em);
4794 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
4795 return err;
4796}
4797
4798static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4799{
4800 struct btrfs_root *root = BTRFS_I(inode)->root;
4801 struct btrfs_trans_handle *trans;
4802 loff_t oldsize = i_size_read(inode);
4803 loff_t newsize = attr->ia_size;
4804 int mask = attr->ia_valid;
4805 int ret;
4806
4807
4808
4809
4810
4811
4812
4813 if (newsize != oldsize) {
4814 inode_inc_iversion(inode);
4815 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
4816 inode->i_ctime = inode->i_mtime =
4817 current_time(inode);
4818 }
4819
4820 if (newsize > oldsize) {
4821
4822
4823
4824
4825
4826
4827
4828 btrfs_drew_write_lock(&root->snapshot_lock);
4829 ret = btrfs_cont_expand(inode, oldsize, newsize);
4830 if (ret) {
4831 btrfs_drew_write_unlock(&root->snapshot_lock);
4832 return ret;
4833 }
4834
4835 trans = btrfs_start_transaction(root, 1);
4836 if (IS_ERR(trans)) {
4837 btrfs_drew_write_unlock(&root->snapshot_lock);
4838 return PTR_ERR(trans);
4839 }
4840
4841 i_size_write(inode, newsize);
4842 btrfs_inode_safe_disk_i_size_write(inode, 0);
4843 pagecache_isize_extended(inode, oldsize, newsize);
4844 ret = btrfs_update_inode(trans, root, inode);
4845 btrfs_drew_write_unlock(&root->snapshot_lock);
4846 btrfs_end_transaction(trans);
4847 } else {
4848
4849
4850
4851
4852
4853
4854 if (newsize == 0)
4855 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
4856 &BTRFS_I(inode)->runtime_flags);
4857
4858 truncate_setsize(inode, newsize);
4859
4860
4861 btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
4862 inode_dio_wait(inode);
4863 btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
4864
4865 ret = btrfs_truncate(inode, newsize == oldsize);
4866 if (ret && inode->i_nlink) {
4867 int err;
4868
4869
4870
4871
4872
4873
4874
4875 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
4876 if (err)
4877 return err;
4878 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
4879 }
4880 }
4881
4882 return ret;
4883}
4884
4885static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
4886{
4887 struct inode *inode = d_inode(dentry);
4888 struct btrfs_root *root = BTRFS_I(inode)->root;
4889 int err;
4890
4891 if (btrfs_root_readonly(root))
4892 return -EROFS;
4893
4894 err = setattr_prepare(dentry, attr);
4895 if (err)
4896 return err;
4897
4898 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
4899 err = btrfs_setsize(inode, attr);
4900 if (err)
4901 return err;
4902 }
4903
4904 if (attr->ia_valid) {
4905 setattr_copy(inode, attr);
4906 inode_inc_iversion(inode);
4907 err = btrfs_dirty_inode(inode);
4908
4909 if (!err && attr->ia_valid & ATTR_MODE)
4910 err = posix_acl_chmod(inode, inode->i_mode);
4911 }
4912
4913 return err;
4914}
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928static void evict_inode_truncate_pages(struct inode *inode)
4929{
4930 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4931 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
4932 struct rb_node *node;
4933
4934 ASSERT(inode->i_state & I_FREEING);
4935 truncate_inode_pages_final(&inode->i_data);
4936
4937 write_lock(&map_tree->lock);
4938 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
4939 struct extent_map *em;
4940
4941 node = rb_first_cached(&map_tree->map);
4942 em = rb_entry(node, struct extent_map, rb_node);
4943 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
4944 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4945 remove_extent_mapping(map_tree, em);
4946 free_extent_map(em);
4947 if (need_resched()) {
4948 write_unlock(&map_tree->lock);
4949 cond_resched();
4950 write_lock(&map_tree->lock);
4951 }
4952 }
4953 write_unlock(&map_tree->lock);
4954
4955
4956
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971 spin_lock(&io_tree->lock);
4972 while (!RB_EMPTY_ROOT(&io_tree->state)) {
4973 struct extent_state *state;
4974 struct extent_state *cached_state = NULL;
4975 u64 start;
4976 u64 end;
4977 unsigned state_flags;
4978
4979 node = rb_first(&io_tree->state);
4980 state = rb_entry(node, struct extent_state, rb_node);
4981 start = state->start;
4982 end = state->end;
4983 state_flags = state->state;
4984 spin_unlock(&io_tree->lock);
4985
4986 lock_extent_bits(io_tree, start, end, &cached_state);
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996 if (state_flags & EXTENT_DELALLOC)
4997 btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
4998 end - start + 1);
4999
5000 clear_extent_bit(io_tree, start, end,
5001 EXTENT_LOCKED | EXTENT_DELALLOC |
5002 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
5003 &cached_state);
5004
5005 cond_resched();
5006 spin_lock(&io_tree->lock);
5007 }
5008 spin_unlock(&io_tree->lock);
5009}
5010
5011static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5012 struct btrfs_block_rsv *rsv)
5013{
5014 struct btrfs_fs_info *fs_info = root->fs_info;
5015 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5016 struct btrfs_trans_handle *trans;
5017 u64 delayed_refs_extra = btrfs_calc_insert_metadata_size(fs_info, 1);
5018 int ret;
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033 ret = btrfs_block_rsv_refill(root, rsv, rsv->size + delayed_refs_extra,
5034 BTRFS_RESERVE_FLUSH_EVICT);
5035 if (ret) {
5036
5037
5038
5039
5040 if (btrfs_check_space_for_delayed_refs(fs_info) ||
5041 btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0)) {
5042 btrfs_warn(fs_info,
5043 "could not allocate space for delete; will truncate on mount");
5044 return ERR_PTR(-ENOSPC);
5045 }
5046 delayed_refs_extra = 0;
5047 }
5048
5049 trans = btrfs_join_transaction(root);
5050 if (IS_ERR(trans))
5051 return trans;
5052
5053 if (delayed_refs_extra) {
5054 trans->block_rsv = &fs_info->trans_block_rsv;
5055 trans->bytes_reserved = delayed_refs_extra;
5056 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5057 delayed_refs_extra, 1);
5058 }
5059 return trans;
5060}
5061
5062void btrfs_evict_inode(struct inode *inode)
5063{
5064 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5065 struct btrfs_trans_handle *trans;
5066 struct btrfs_root *root = BTRFS_I(inode)->root;
5067 struct btrfs_block_rsv *rsv;
5068 int ret;
5069
5070 trace_btrfs_inode_evict(inode);
5071
5072 if (!root) {
5073 clear_inode(inode);
5074 return;
5075 }
5076
5077 evict_inode_truncate_pages(inode);
5078
5079 if (inode->i_nlink &&
5080 ((btrfs_root_refs(&root->root_item) != 0 &&
5081 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5082 btrfs_is_free_space_inode(BTRFS_I(inode))))
5083 goto no_delete;
5084
5085 if (is_bad_inode(inode))
5086 goto no_delete;
5087
5088 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5089
5090 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5091 goto no_delete;
5092
5093 if (inode->i_nlink > 0) {
5094 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5095 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5096 goto no_delete;
5097 }
5098
5099 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5100 if (ret)
5101 goto no_delete;
5102
5103 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5104 if (!rsv)
5105 goto no_delete;
5106 rsv->size = btrfs_calc_metadata_size(fs_info, 1);
5107 rsv->failfast = 1;
5108
5109 btrfs_i_size_write(BTRFS_I(inode), 0);
5110
5111 while (1) {
5112 trans = evict_refill_and_join(root, rsv);
5113 if (IS_ERR(trans))
5114 goto free_rsv;
5115
5116 trans->block_rsv = rsv;
5117
5118 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5119 trans->block_rsv = &fs_info->trans_block_rsv;
5120 btrfs_end_transaction(trans);
5121 btrfs_btree_balance_dirty(fs_info);
5122 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5123 goto free_rsv;
5124 else if (!ret)
5125 break;
5126 }
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137 trans = evict_refill_and_join(root, rsv);
5138 if (!IS_ERR(trans)) {
5139 trans->block_rsv = rsv;
5140 btrfs_orphan_del(trans, BTRFS_I(inode));
5141 trans->block_rsv = &fs_info->trans_block_rsv;
5142 btrfs_end_transaction(trans);
5143 }
5144
5145 if (!(root == fs_info->tree_root ||
5146 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
5147 btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
5148
5149free_rsv:
5150 btrfs_free_block_rsv(fs_info, rsv);
5151no_delete:
5152
5153
5154
5155
5156
5157 btrfs_remove_delayed_node(BTRFS_I(inode));
5158 clear_inode(inode);
5159}
5160
5161
5162
5163
5164
5165
5166
5167
5168static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5169 struct btrfs_key *location, u8 *type)
5170{
5171 const char *name = dentry->d_name.name;
5172 int namelen = dentry->d_name.len;
5173 struct btrfs_dir_item *di;
5174 struct btrfs_path *path;
5175 struct btrfs_root *root = BTRFS_I(dir)->root;
5176 int ret = 0;
5177
5178 path = btrfs_alloc_path();
5179 if (!path)
5180 return -ENOMEM;
5181
5182 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5183 name, namelen, 0);
5184 if (IS_ERR_OR_NULL(di)) {
5185 ret = di ? PTR_ERR(di) : -ENOENT;
5186 goto out;
5187 }
5188
5189 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5190 if (location->type != BTRFS_INODE_ITEM_KEY &&
5191 location->type != BTRFS_ROOT_ITEM_KEY) {
5192 ret = -EUCLEAN;
5193 btrfs_warn(root->fs_info,
5194"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5195 __func__, name, btrfs_ino(BTRFS_I(dir)),
5196 location->objectid, location->type, location->offset);
5197 }
5198 if (!ret)
5199 *type = btrfs_dir_type(path->nodes[0], di);
5200out:
5201 btrfs_free_path(path);
5202 return ret;
5203}
5204
5205
5206
5207
5208
5209
5210static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5211 struct inode *dir,
5212 struct dentry *dentry,
5213 struct btrfs_key *location,
5214 struct btrfs_root **sub_root)
5215{
5216 struct btrfs_path *path;
5217 struct btrfs_root *new_root;
5218 struct btrfs_root_ref *ref;
5219 struct extent_buffer *leaf;
5220 struct btrfs_key key;
5221 int ret;
5222 int err = 0;
5223
5224 path = btrfs_alloc_path();
5225 if (!path) {
5226 err = -ENOMEM;
5227 goto out;
5228 }
5229
5230 err = -ENOENT;
5231 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5232 key.type = BTRFS_ROOT_REF_KEY;
5233 key.offset = location->objectid;
5234
5235 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5236 if (ret) {
5237 if (ret < 0)
5238 err = ret;
5239 goto out;
5240 }
5241
5242 leaf = path->nodes[0];
5243 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5244 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5245 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5246 goto out;
5247
5248 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5249 (unsigned long)(ref + 1),
5250 dentry->d_name.len);
5251 if (ret)
5252 goto out;
5253
5254 btrfs_release_path(path);
5255
5256 new_root = btrfs_get_fs_root(fs_info, location->objectid, true);
5257 if (IS_ERR(new_root)) {
5258 err = PTR_ERR(new_root);
5259 goto out;
5260 }
5261
5262 *sub_root = new_root;
5263 location->objectid = btrfs_root_dirid(&new_root->root_item);
5264 location->type = BTRFS_INODE_ITEM_KEY;
5265 location->offset = 0;
5266 err = 0;
5267out:
5268 btrfs_free_path(path);
5269 return err;
5270}
5271
5272static void inode_tree_add(struct inode *inode)
5273{
5274 struct btrfs_root *root = BTRFS_I(inode)->root;
5275 struct btrfs_inode *entry;
5276 struct rb_node **p;
5277 struct rb_node *parent;
5278 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5279 u64 ino = btrfs_ino(BTRFS_I(inode));
5280
5281 if (inode_unhashed(inode))
5282 return;
5283 parent = NULL;
5284 spin_lock(&root->inode_lock);
5285 p = &root->inode_tree.rb_node;
5286 while (*p) {
5287 parent = *p;
5288 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5289
5290 if (ino < btrfs_ino(entry))
5291 p = &parent->rb_left;
5292 else if (ino > btrfs_ino(entry))
5293 p = &parent->rb_right;
5294 else {
5295 WARN_ON(!(entry->vfs_inode.i_state &
5296 (I_WILL_FREE | I_FREEING)));
5297 rb_replace_node(parent, new, &root->inode_tree);
5298 RB_CLEAR_NODE(parent);
5299 spin_unlock(&root->inode_lock);
5300 return;
5301 }
5302 }
5303 rb_link_node(new, parent, p);
5304 rb_insert_color(new, &root->inode_tree);
5305 spin_unlock(&root->inode_lock);
5306}
5307
5308static void inode_tree_del(struct inode *inode)
5309{
5310 struct btrfs_root *root = BTRFS_I(inode)->root;
5311 int empty = 0;
5312
5313 spin_lock(&root->inode_lock);
5314 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5315 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5316 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5317 empty = RB_EMPTY_ROOT(&root->inode_tree);
5318 }
5319 spin_unlock(&root->inode_lock);
5320
5321 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5322 spin_lock(&root->inode_lock);
5323 empty = RB_EMPTY_ROOT(&root->inode_tree);
5324 spin_unlock(&root->inode_lock);
5325 if (empty)
5326 btrfs_add_dead_root(root);
5327 }
5328}
5329
5330
5331static int btrfs_init_locked_inode(struct inode *inode, void *p)
5332{
5333 struct btrfs_iget_args *args = p;
5334
5335 inode->i_ino = args->ino;
5336 BTRFS_I(inode)->location.objectid = args->ino;
5337 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5338 BTRFS_I(inode)->location.offset = 0;
5339 BTRFS_I(inode)->root = btrfs_grab_root(args->root);
5340 BUG_ON(args->root && !BTRFS_I(inode)->root);
5341 return 0;
5342}
5343
5344static int btrfs_find_actor(struct inode *inode, void *opaque)
5345{
5346 struct btrfs_iget_args *args = opaque;
5347
5348 return args->ino == BTRFS_I(inode)->location.objectid &&
5349 args->root == BTRFS_I(inode)->root;
5350}
5351
5352static struct inode *btrfs_iget_locked(struct super_block *s, u64 ino,
5353 struct btrfs_root *root)
5354{
5355 struct inode *inode;
5356 struct btrfs_iget_args args;
5357 unsigned long hashval = btrfs_inode_hash(ino, root);
5358
5359 args.ino = ino;
5360 args.root = root;
5361
5362 inode = iget5_locked(s, hashval, btrfs_find_actor,
5363 btrfs_init_locked_inode,
5364 (void *)&args);
5365 return inode;
5366}
5367
5368
5369
5370
5371
5372
5373
5374struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
5375 struct btrfs_root *root, struct btrfs_path *path)
5376{
5377 struct inode *inode;
5378
5379 inode = btrfs_iget_locked(s, ino, root);
5380 if (!inode)
5381 return ERR_PTR(-ENOMEM);
5382
5383 if (inode->i_state & I_NEW) {
5384 int ret;
5385
5386 ret = btrfs_read_locked_inode(inode, path);
5387 if (!ret) {
5388 inode_tree_add(inode);
5389 unlock_new_inode(inode);
5390 } else {
5391 iget_failed(inode);
5392
5393
5394
5395
5396
5397 if (ret > 0)
5398 ret = -ENOENT;
5399 inode = ERR_PTR(ret);
5400 }
5401 }
5402
5403 return inode;
5404}
5405
5406struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root)
5407{
5408 return btrfs_iget_path(s, ino, root, NULL);
5409}
5410
5411static struct inode *new_simple_dir(struct super_block *s,
5412 struct btrfs_key *key,
5413 struct btrfs_root *root)
5414{
5415 struct inode *inode = new_inode(s);
5416
5417 if (!inode)
5418 return ERR_PTR(-ENOMEM);
5419
5420 BTRFS_I(inode)->root = btrfs_grab_root(root);
5421 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5422 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5423
5424 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5425
5426
5427
5428
5429 inode->i_op = &simple_dir_inode_operations;
5430 inode->i_opflags &= ~IOP_XATTR;
5431 inode->i_fop = &simple_dir_operations;
5432 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5433 inode->i_mtime = current_time(inode);
5434 inode->i_atime = inode->i_mtime;
5435 inode->i_ctime = inode->i_mtime;
5436 BTRFS_I(inode)->i_otime = inode->i_mtime;
5437
5438 return inode;
5439}
5440
5441static inline u8 btrfs_inode_type(struct inode *inode)
5442{
5443
5444
5445
5446
5447 BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
5448 BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
5449 BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
5450 BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
5451 BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
5452 BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
5453 BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
5454 BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
5455
5456 return fs_umode_to_ftype(inode->i_mode);
5457}
5458
5459struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5460{
5461 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5462 struct inode *inode;
5463 struct btrfs_root *root = BTRFS_I(dir)->root;
5464 struct btrfs_root *sub_root = root;
5465 struct btrfs_key location;
5466 u8 di_type = 0;
5467 int ret = 0;
5468
5469 if (dentry->d_name.len > BTRFS_NAME_LEN)
5470 return ERR_PTR(-ENAMETOOLONG);
5471
5472 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5473 if (ret < 0)
5474 return ERR_PTR(ret);
5475
5476 if (location.type == BTRFS_INODE_ITEM_KEY) {
5477 inode = btrfs_iget(dir->i_sb, location.objectid, root);
5478 if (IS_ERR(inode))
5479 return inode;
5480
5481
5482 if (btrfs_inode_type(inode) != di_type) {
5483 btrfs_crit(fs_info,
5484"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5485 inode->i_mode, btrfs_inode_type(inode),
5486 di_type);
5487 iput(inode);
5488 return ERR_PTR(-EUCLEAN);
5489 }
5490 return inode;
5491 }
5492
5493 ret = fixup_tree_root_location(fs_info, dir, dentry,
5494 &location, &sub_root);
5495 if (ret < 0) {
5496 if (ret != -ENOENT)
5497 inode = ERR_PTR(ret);
5498 else
5499 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5500 } else {
5501 inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
5502 }
5503 if (root != sub_root)
5504 btrfs_put_root(sub_root);
5505
5506 if (!IS_ERR(inode) && root != sub_root) {
5507 down_read(&fs_info->cleanup_work_sem);
5508 if (!sb_rdonly(inode->i_sb))
5509 ret = btrfs_orphan_cleanup(sub_root);
5510 up_read(&fs_info->cleanup_work_sem);
5511 if (ret) {
5512 iput(inode);
5513 inode = ERR_PTR(ret);
5514 }
5515 }
5516
5517 return inode;
5518}
5519
5520static int btrfs_dentry_delete(const struct dentry *dentry)
5521{
5522 struct btrfs_root *root;
5523 struct inode *inode = d_inode(dentry);
5524
5525 if (!inode && !IS_ROOT(dentry))
5526 inode = d_inode(dentry->d_parent);
5527
5528 if (inode) {
5529 root = BTRFS_I(inode)->root;
5530 if (btrfs_root_refs(&root->root_item) == 0)
5531 return 1;
5532
5533 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5534 return 1;
5535 }
5536 return 0;
5537}
5538
5539static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5540 unsigned int flags)
5541{
5542 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
5543
5544 if (inode == ERR_PTR(-ENOENT))
5545 inode = NULL;
5546 return d_splice_alias(inode, dentry);
5547}
5548
5549
5550
5551
5552
5553
5554
5555
5556
5557
5558static int btrfs_opendir(struct inode *inode, struct file *file)
5559{
5560 struct btrfs_file_private *private;
5561
5562 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5563 if (!private)
5564 return -ENOMEM;
5565 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5566 if (!private->filldir_buf) {
5567 kfree(private);
5568 return -ENOMEM;
5569 }
5570 file->private_data = private;
5571 return 0;
5572}
5573
5574struct dir_entry {
5575 u64 ino;
5576 u64 offset;
5577 unsigned type;
5578 int name_len;
5579};
5580
5581static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5582{
5583 while (entries--) {
5584 struct dir_entry *entry = addr;
5585 char *name = (char *)(entry + 1);
5586
5587 ctx->pos = get_unaligned(&entry->offset);
5588 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5589 get_unaligned(&entry->ino),
5590 get_unaligned(&entry->type)))
5591 return 1;
5592 addr += sizeof(struct dir_entry) +
5593 get_unaligned(&entry->name_len);
5594 ctx->pos++;
5595 }
5596 return 0;
5597}
5598
5599static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5600{
5601 struct inode *inode = file_inode(file);
5602 struct btrfs_root *root = BTRFS_I(inode)->root;
5603 struct btrfs_file_private *private = file->private_data;
5604 struct btrfs_dir_item *di;
5605 struct btrfs_key key;
5606 struct btrfs_key found_key;
5607 struct btrfs_path *path;
5608 void *addr;
5609 struct list_head ins_list;
5610 struct list_head del_list;
5611 int ret;
5612 struct extent_buffer *leaf;
5613 int slot;
5614 char *name_ptr;
5615 int name_len;
5616 int entries = 0;
5617 int total_len = 0;
5618 bool put = false;
5619 struct btrfs_key location;
5620
5621 if (!dir_emit_dots(file, ctx))
5622 return 0;
5623
5624 path = btrfs_alloc_path();
5625 if (!path)
5626 return -ENOMEM;
5627
5628 addr = private->filldir_buf;
5629 path->reada = READA_FORWARD;
5630
5631 INIT_LIST_HEAD(&ins_list);
5632 INIT_LIST_HEAD(&del_list);
5633 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5634
5635again:
5636 key.type = BTRFS_DIR_INDEX_KEY;
5637 key.offset = ctx->pos;
5638 key.objectid = btrfs_ino(BTRFS_I(inode));
5639
5640 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5641 if (ret < 0)
5642 goto err;
5643
5644 while (1) {
5645 struct dir_entry *entry;
5646
5647 leaf = path->nodes[0];
5648 slot = path->slots[0];
5649 if (slot >= btrfs_header_nritems(leaf)) {
5650 ret = btrfs_next_leaf(root, path);
5651 if (ret < 0)
5652 goto err;
5653 else if (ret > 0)
5654 break;
5655 continue;
5656 }
5657
5658 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5659
5660 if (found_key.objectid != key.objectid)
5661 break;
5662 if (found_key.type != BTRFS_DIR_INDEX_KEY)
5663 break;
5664 if (found_key.offset < ctx->pos)
5665 goto next;
5666 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
5667 goto next;
5668 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5669 name_len = btrfs_dir_name_len(leaf, di);
5670 if ((total_len + sizeof(struct dir_entry) + name_len) >=
5671 PAGE_SIZE) {
5672 btrfs_release_path(path);
5673 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5674 if (ret)
5675 goto nopos;
5676 addr = private->filldir_buf;
5677 entries = 0;
5678 total_len = 0;
5679 goto again;
5680 }
5681
5682 entry = addr;
5683 put_unaligned(name_len, &entry->name_len);
5684 name_ptr = (char *)(entry + 1);
5685 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
5686 name_len);
5687 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
5688 &entry->type);
5689 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5690 put_unaligned(location.objectid, &entry->ino);
5691 put_unaligned(found_key.offset, &entry->offset);
5692 entries++;
5693 addr += sizeof(struct dir_entry) + name_len;
5694 total_len += sizeof(struct dir_entry) + name_len;
5695next:
5696 path->slots[0]++;
5697 }
5698 btrfs_release_path(path);
5699
5700 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5701 if (ret)
5702 goto nopos;
5703
5704 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
5705 if (ret)
5706 goto nopos;
5707
5708
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725 if (ctx->pos >= INT_MAX)
5726 ctx->pos = LLONG_MAX;
5727 else
5728 ctx->pos = INT_MAX;
5729nopos:
5730 ret = 0;
5731err:
5732 if (put)
5733 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
5734 btrfs_free_path(path);
5735 return ret;
5736}
5737
5738
5739
5740
5741
5742
5743
5744static int btrfs_dirty_inode(struct inode *inode)
5745{
5746 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5747 struct btrfs_root *root = BTRFS_I(inode)->root;
5748 struct btrfs_trans_handle *trans;
5749 int ret;
5750
5751 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
5752 return 0;
5753
5754 trans = btrfs_join_transaction(root);
5755 if (IS_ERR(trans))
5756 return PTR_ERR(trans);
5757
5758 ret = btrfs_update_inode(trans, root, inode);
5759 if (ret && ret == -ENOSPC) {
5760
5761 btrfs_end_transaction(trans);
5762 trans = btrfs_start_transaction(root, 1);
5763 if (IS_ERR(trans))
5764 return PTR_ERR(trans);
5765
5766 ret = btrfs_update_inode(trans, root, inode);
5767 }
5768 btrfs_end_transaction(trans);
5769 if (BTRFS_I(inode)->delayed_node)
5770 btrfs_balance_delayed_items(fs_info);
5771
5772 return ret;
5773}
5774
5775
5776
5777
5778
5779static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
5780 int flags)
5781{
5782 struct btrfs_root *root = BTRFS_I(inode)->root;
5783 bool dirty = flags & ~S_VERSION;
5784
5785 if (btrfs_root_readonly(root))
5786 return -EROFS;
5787
5788 if (flags & S_VERSION)
5789 dirty |= inode_maybe_inc_iversion(inode, dirty);
5790 if (flags & S_CTIME)
5791 inode->i_ctime = *now;
5792 if (flags & S_MTIME)
5793 inode->i_mtime = *now;
5794 if (flags & S_ATIME)
5795 inode->i_atime = *now;
5796 return dirty ? btrfs_dirty_inode(inode) : 0;
5797}
5798
5799
5800
5801
5802
5803
5804static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
5805{
5806 struct btrfs_root *root = inode->root;
5807 struct btrfs_key key, found_key;
5808 struct btrfs_path *path;
5809 struct extent_buffer *leaf;
5810 int ret;
5811
5812 key.objectid = btrfs_ino(inode);
5813 key.type = BTRFS_DIR_INDEX_KEY;
5814 key.offset = (u64)-1;
5815
5816 path = btrfs_alloc_path();
5817 if (!path)
5818 return -ENOMEM;
5819
5820 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5821 if (ret < 0)
5822 goto out;
5823
5824 if (ret == 0)
5825 goto out;
5826 ret = 0;
5827
5828
5829
5830
5831
5832
5833
5834 if (path->slots[0] == 0) {
5835 inode->index_cnt = 2;
5836 goto out;
5837 }
5838
5839 path->slots[0]--;
5840
5841 leaf = path->nodes[0];
5842 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5843
5844 if (found_key.objectid != btrfs_ino(inode) ||
5845 found_key.type != BTRFS_DIR_INDEX_KEY) {
5846 inode->index_cnt = 2;
5847 goto out;
5848 }
5849
5850 inode->index_cnt = found_key.offset + 1;
5851out:
5852 btrfs_free_path(path);
5853 return ret;
5854}
5855
5856
5857
5858
5859
5860int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
5861{
5862 int ret = 0;
5863
5864 if (dir->index_cnt == (u64)-1) {
5865 ret = btrfs_inode_delayed_dir_index_count(dir);
5866 if (ret) {
5867 ret = btrfs_set_inode_index_count(dir);
5868 if (ret)
5869 return ret;
5870 }
5871 }
5872
5873 *index = dir->index_cnt;
5874 dir->index_cnt++;
5875
5876 return ret;
5877}
5878
5879static int btrfs_insert_inode_locked(struct inode *inode)
5880{
5881 struct btrfs_iget_args args;
5882
5883 args.ino = BTRFS_I(inode)->location.objectid;
5884 args.root = BTRFS_I(inode)->root;
5885
5886 return insert_inode_locked4(inode,
5887 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
5888 btrfs_find_actor, &args);
5889}
5890
5891
5892
5893
5894
5895
5896static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
5897{
5898 unsigned int flags;
5899
5900 if (!dir)
5901 return;
5902
5903 flags = BTRFS_I(dir)->flags;
5904
5905 if (flags & BTRFS_INODE_NOCOMPRESS) {
5906 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
5907 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
5908 } else if (flags & BTRFS_INODE_COMPRESS) {
5909 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
5910 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
5911 }
5912
5913 if (flags & BTRFS_INODE_NODATACOW) {
5914 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
5915 if (S_ISREG(inode->i_mode))
5916 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
5917 }
5918
5919 btrfs_sync_inode_flags_to_i_flags(inode);
5920}
5921
5922static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5923 struct btrfs_root *root,
5924 struct inode *dir,
5925 const char *name, int name_len,
5926 u64 ref_objectid, u64 objectid,
5927 umode_t mode, u64 *index)
5928{
5929 struct btrfs_fs_info *fs_info = root->fs_info;
5930 struct inode *inode;
5931 struct btrfs_inode_item *inode_item;
5932 struct btrfs_key *location;
5933 struct btrfs_path *path;
5934 struct btrfs_inode_ref *ref;
5935 struct btrfs_key key[2];
5936 u32 sizes[2];
5937 int nitems = name ? 2 : 1;
5938 unsigned long ptr;
5939 unsigned int nofs_flag;
5940 int ret;
5941
5942 path = btrfs_alloc_path();
5943 if (!path)
5944 return ERR_PTR(-ENOMEM);
5945
5946 nofs_flag = memalloc_nofs_save();
5947 inode = new_inode(fs_info->sb);
5948 memalloc_nofs_restore(nofs_flag);
5949 if (!inode) {
5950 btrfs_free_path(path);
5951 return ERR_PTR(-ENOMEM);
5952 }
5953
5954
5955
5956
5957
5958 if (!name)
5959 set_nlink(inode, 0);
5960
5961
5962
5963
5964
5965 inode->i_ino = objectid;
5966
5967 if (dir && name) {
5968 trace_btrfs_inode_request(dir);
5969
5970 ret = btrfs_set_inode_index(BTRFS_I(dir), index);
5971 if (ret) {
5972 btrfs_free_path(path);
5973 iput(inode);
5974 return ERR_PTR(ret);
5975 }
5976 } else if (dir) {
5977 *index = 0;
5978 }
5979
5980
5981
5982
5983
5984 BTRFS_I(inode)->index_cnt = 2;
5985 BTRFS_I(inode)->dir_index = *index;
5986 BTRFS_I(inode)->root = btrfs_grab_root(root);
5987 BTRFS_I(inode)->generation = trans->transid;
5988 inode->i_generation = BTRFS_I(inode)->generation;
5989
5990
5991
5992
5993
5994
5995
5996 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
5997
5998 key[0].objectid = objectid;
5999 key[0].type = BTRFS_INODE_ITEM_KEY;
6000 key[0].offset = 0;
6001
6002 sizes[0] = sizeof(struct btrfs_inode_item);
6003
6004 if (name) {
6005
6006
6007
6008
6009
6010
6011 key[1].objectid = objectid;
6012 key[1].type = BTRFS_INODE_REF_KEY;
6013 key[1].offset = ref_objectid;
6014
6015 sizes[1] = name_len + sizeof(*ref);
6016 }
6017
6018 location = &BTRFS_I(inode)->location;
6019 location->objectid = objectid;
6020 location->offset = 0;
6021 location->type = BTRFS_INODE_ITEM_KEY;
6022
6023 ret = btrfs_insert_inode_locked(inode);
6024 if (ret < 0) {
6025 iput(inode);
6026 goto fail;
6027 }
6028
6029 path->leave_spinning = 1;
6030 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
6031 if (ret != 0)
6032 goto fail_unlock;
6033
6034 inode_init_owner(inode, dir, mode);
6035 inode_set_bytes(inode, 0);
6036
6037 inode->i_mtime = current_time(inode);
6038 inode->i_atime = inode->i_mtime;
6039 inode->i_ctime = inode->i_mtime;
6040 BTRFS_I(inode)->i_otime = inode->i_mtime;
6041
6042 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6043 struct btrfs_inode_item);
6044 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6045 sizeof(*inode_item));
6046 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6047
6048 if (name) {
6049 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6050 struct btrfs_inode_ref);
6051 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6052 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6053 ptr = (unsigned long)(ref + 1);
6054 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6055 }
6056
6057 btrfs_mark_buffer_dirty(path->nodes[0]);
6058 btrfs_free_path(path);
6059
6060 btrfs_inherit_iflags(inode, dir);
6061
6062 if (S_ISREG(mode)) {
6063 if (btrfs_test_opt(fs_info, NODATASUM))
6064 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6065 if (btrfs_test_opt(fs_info, NODATACOW))
6066 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6067 BTRFS_INODE_NODATASUM;
6068 }
6069
6070 inode_tree_add(inode);
6071
6072 trace_btrfs_inode_new(inode);
6073 btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
6074
6075 btrfs_update_root_times(trans, root);
6076
6077 ret = btrfs_inode_inherit_props(trans, inode, dir);
6078 if (ret)
6079 btrfs_err(fs_info,
6080 "error inheriting props for ino %llu (root %llu): %d",
6081 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
6082
6083 return inode;
6084
6085fail_unlock:
6086 discard_new_inode(inode);
6087fail:
6088 if (dir && name)
6089 BTRFS_I(dir)->index_cnt--;
6090 btrfs_free_path(path);
6091 return ERR_PTR(ret);
6092}
6093
6094
6095
6096
6097
6098
6099
6100int btrfs_add_link(struct btrfs_trans_handle *trans,
6101 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6102 const char *name, int name_len, int add_backref, u64 index)
6103{
6104 int ret = 0;
6105 struct btrfs_key key;
6106 struct btrfs_root *root = parent_inode->root;
6107 u64 ino = btrfs_ino(inode);
6108 u64 parent_ino = btrfs_ino(parent_inode);
6109
6110 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6111 memcpy(&key, &inode->root->root_key, sizeof(key));
6112 } else {
6113 key.objectid = ino;
6114 key.type = BTRFS_INODE_ITEM_KEY;
6115 key.offset = 0;
6116 }
6117
6118 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6119 ret = btrfs_add_root_ref(trans, key.objectid,
6120 root->root_key.objectid, parent_ino,
6121 index, name, name_len);
6122 } else if (add_backref) {
6123 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6124 parent_ino, index);
6125 }
6126
6127
6128 if (ret)
6129 return ret;
6130
6131 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6132 btrfs_inode_type(&inode->vfs_inode), index);
6133 if (ret == -EEXIST || ret == -EOVERFLOW)
6134 goto fail_dir_item;
6135 else if (ret) {
6136 btrfs_abort_transaction(trans, ret);
6137 return ret;
6138 }
6139
6140 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6141 name_len * 2);
6142 inode_inc_iversion(&parent_inode->vfs_inode);
6143
6144
6145
6146
6147
6148
6149 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6150 struct timespec64 now = current_time(&parent_inode->vfs_inode);
6151
6152 parent_inode->vfs_inode.i_mtime = now;
6153 parent_inode->vfs_inode.i_ctime = now;
6154 }
6155 ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
6156 if (ret)
6157 btrfs_abort_transaction(trans, ret);
6158 return ret;
6159
6160fail_dir_item:
6161 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6162 u64 local_index;
6163 int err;
6164 err = btrfs_del_root_ref(trans, key.objectid,
6165 root->root_key.objectid, parent_ino,
6166 &local_index, name, name_len);
6167 if (err)
6168 btrfs_abort_transaction(trans, err);
6169 } else if (add_backref) {
6170 u64 local_index;
6171 int err;
6172
6173 err = btrfs_del_inode_ref(trans, root, name, name_len,
6174 ino, parent_ino, &local_index);
6175 if (err)
6176 btrfs_abort_transaction(trans, err);
6177 }
6178
6179
6180 return ret;
6181}
6182
6183static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6184 struct btrfs_inode *dir, struct dentry *dentry,
6185 struct btrfs_inode *inode, int backref, u64 index)
6186{
6187 int err = btrfs_add_link(trans, dir, inode,
6188 dentry->d_name.name, dentry->d_name.len,
6189 backref, index);
6190 if (err > 0)
6191 err = -EEXIST;
6192 return err;
6193}
6194
6195static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6196 umode_t mode, dev_t rdev)
6197{
6198 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6199 struct btrfs_trans_handle *trans;
6200 struct btrfs_root *root = BTRFS_I(dir)->root;
6201 struct inode *inode = NULL;
6202 int err;
6203 u64 objectid;
6204 u64 index = 0;
6205
6206
6207
6208
6209
6210
6211 trans = btrfs_start_transaction(root, 5);
6212 if (IS_ERR(trans))
6213 return PTR_ERR(trans);
6214
6215 err = btrfs_find_free_ino(root, &objectid);
6216 if (err)
6217 goto out_unlock;
6218
6219 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6220 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6221 mode, &index);
6222 if (IS_ERR(inode)) {
6223 err = PTR_ERR(inode);
6224 inode = NULL;
6225 goto out_unlock;
6226 }
6227
6228
6229
6230
6231
6232
6233
6234 inode->i_op = &btrfs_special_inode_operations;
6235 init_special_inode(inode, inode->i_mode, rdev);
6236
6237 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6238 if (err)
6239 goto out_unlock;
6240
6241 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6242 0, index);
6243 if (err)
6244 goto out_unlock;
6245
6246 btrfs_update_inode(trans, root, inode);
6247 d_instantiate_new(dentry, inode);
6248
6249out_unlock:
6250 btrfs_end_transaction(trans);
6251 btrfs_btree_balance_dirty(fs_info);
6252 if (err && inode) {
6253 inode_dec_link_count(inode);
6254 discard_new_inode(inode);
6255 }
6256 return err;
6257}
6258
6259static int btrfs_create(struct inode *dir, struct dentry *dentry,
6260 umode_t mode, bool excl)
6261{
6262 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6263 struct btrfs_trans_handle *trans;
6264 struct btrfs_root *root = BTRFS_I(dir)->root;
6265 struct inode *inode = NULL;
6266 int err;
6267 u64 objectid;
6268 u64 index = 0;
6269
6270
6271
6272
6273
6274
6275 trans = btrfs_start_transaction(root, 5);
6276 if (IS_ERR(trans))
6277 return PTR_ERR(trans);
6278
6279 err = btrfs_find_free_ino(root, &objectid);
6280 if (err)
6281 goto out_unlock;
6282
6283 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6284 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6285 mode, &index);
6286 if (IS_ERR(inode)) {
6287 err = PTR_ERR(inode);
6288 inode = NULL;
6289 goto out_unlock;
6290 }
6291
6292
6293
6294
6295
6296
6297 inode->i_fop = &btrfs_file_operations;
6298 inode->i_op = &btrfs_file_inode_operations;
6299 inode->i_mapping->a_ops = &btrfs_aops;
6300
6301 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6302 if (err)
6303 goto out_unlock;
6304
6305 err = btrfs_update_inode(trans, root, inode);
6306 if (err)
6307 goto out_unlock;
6308
6309 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6310 0, index);
6311 if (err)
6312 goto out_unlock;
6313
6314 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6315 d_instantiate_new(dentry, inode);
6316
6317out_unlock:
6318 btrfs_end_transaction(trans);
6319 if (err && inode) {
6320 inode_dec_link_count(inode);
6321 discard_new_inode(inode);
6322 }
6323 btrfs_btree_balance_dirty(fs_info);
6324 return err;
6325}
6326
6327static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6328 struct dentry *dentry)
6329{
6330 struct btrfs_trans_handle *trans = NULL;
6331 struct btrfs_root *root = BTRFS_I(dir)->root;
6332 struct inode *inode = d_inode(old_dentry);
6333 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6334 u64 index;
6335 int err;
6336 int drop_inode = 0;
6337
6338
6339 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6340 return -EXDEV;
6341
6342 if (inode->i_nlink >= BTRFS_LINK_MAX)
6343 return -EMLINK;
6344
6345 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6346 if (err)
6347 goto fail;
6348
6349
6350
6351
6352
6353
6354
6355 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6356 if (IS_ERR(trans)) {
6357 err = PTR_ERR(trans);
6358 trans = NULL;
6359 goto fail;
6360 }
6361
6362
6363 BTRFS_I(inode)->dir_index = 0ULL;
6364 inc_nlink(inode);
6365 inode_inc_iversion(inode);
6366 inode->i_ctime = current_time(inode);
6367 ihold(inode);
6368 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6369
6370 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6371 1, index);
6372
6373 if (err) {
6374 drop_inode = 1;
6375 } else {
6376 struct dentry *parent = dentry->d_parent;
6377 int ret;
6378
6379 err = btrfs_update_inode(trans, root, inode);
6380 if (err)
6381 goto fail;
6382 if (inode->i_nlink == 1) {
6383
6384
6385
6386
6387 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6388 if (err)
6389 goto fail;
6390 }
6391 d_instantiate(dentry, inode);
6392 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6393 true, NULL);
6394 if (ret == BTRFS_NEED_TRANS_COMMIT) {
6395 err = btrfs_commit_transaction(trans);
6396 trans = NULL;
6397 }
6398 }
6399
6400fail:
6401 if (trans)
6402 btrfs_end_transaction(trans);
6403 if (drop_inode) {
6404 inode_dec_link_count(inode);
6405 iput(inode);
6406 }
6407 btrfs_btree_balance_dirty(fs_info);
6408 return err;
6409}
6410
6411static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6412{
6413 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6414 struct inode *inode = NULL;
6415 struct btrfs_trans_handle *trans;
6416 struct btrfs_root *root = BTRFS_I(dir)->root;
6417 int err = 0;
6418 u64 objectid = 0;
6419 u64 index = 0;
6420
6421
6422
6423
6424
6425
6426 trans = btrfs_start_transaction(root, 5);
6427 if (IS_ERR(trans))
6428 return PTR_ERR(trans);
6429
6430 err = btrfs_find_free_ino(root, &objectid);
6431 if (err)
6432 goto out_fail;
6433
6434 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6435 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6436 S_IFDIR | mode, &index);
6437 if (IS_ERR(inode)) {
6438 err = PTR_ERR(inode);
6439 inode = NULL;
6440 goto out_fail;
6441 }
6442
6443
6444 inode->i_op = &btrfs_dir_inode_operations;
6445 inode->i_fop = &btrfs_dir_file_operations;
6446
6447 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6448 if (err)
6449 goto out_fail;
6450
6451 btrfs_i_size_write(BTRFS_I(inode), 0);
6452 err = btrfs_update_inode(trans, root, inode);
6453 if (err)
6454 goto out_fail;
6455
6456 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6457 dentry->d_name.name,
6458 dentry->d_name.len, 0, index);
6459 if (err)
6460 goto out_fail;
6461
6462 d_instantiate_new(dentry, inode);
6463
6464out_fail:
6465 btrfs_end_transaction(trans);
6466 if (err && inode) {
6467 inode_dec_link_count(inode);
6468 discard_new_inode(inode);
6469 }
6470 btrfs_btree_balance_dirty(fs_info);
6471 return err;
6472}
6473
6474static noinline int uncompress_inline(struct btrfs_path *path,
6475 struct page *page,
6476 size_t pg_offset, u64 extent_offset,
6477 struct btrfs_file_extent_item *item)
6478{
6479 int ret;
6480 struct extent_buffer *leaf = path->nodes[0];
6481 char *tmp;
6482 size_t max_size;
6483 unsigned long inline_size;
6484 unsigned long ptr;
6485 int compress_type;
6486
6487 WARN_ON(pg_offset != 0);
6488 compress_type = btrfs_file_extent_compression(leaf, item);
6489 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6490 inline_size = btrfs_file_extent_inline_item_len(leaf,
6491 btrfs_item_nr(path->slots[0]));
6492 tmp = kmalloc(inline_size, GFP_NOFS);
6493 if (!tmp)
6494 return -ENOMEM;
6495 ptr = btrfs_file_extent_inline_start(item);
6496
6497 read_extent_buffer(leaf, tmp, ptr, inline_size);
6498
6499 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6500 ret = btrfs_decompress(compress_type, tmp, page,
6501 extent_offset, inline_size, max_size);
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511 if (max_size + pg_offset < PAGE_SIZE) {
6512 char *map = kmap(page);
6513 memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
6514 kunmap(page);
6515 }
6516 kfree(tmp);
6517 return ret;
6518}
6519
6520
6521
6522
6523
6524
6525
6526
6527
6528
6529
6530
6531
6532
6533
6534
6535
6536
6537
6538struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6539 struct page *page, size_t pg_offset,
6540 u64 start, u64 len)
6541{
6542 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6543 int ret;
6544 int err = 0;
6545 u64 extent_start = 0;
6546 u64 extent_end = 0;
6547 u64 objectid = btrfs_ino(inode);
6548 int extent_type = -1;
6549 struct btrfs_path *path = NULL;
6550 struct btrfs_root *root = inode->root;
6551 struct btrfs_file_extent_item *item;
6552 struct extent_buffer *leaf;
6553 struct btrfs_key found_key;
6554 struct extent_map *em = NULL;
6555 struct extent_map_tree *em_tree = &inode->extent_tree;
6556 struct extent_io_tree *io_tree = &inode->io_tree;
6557
6558 read_lock(&em_tree->lock);
6559 em = lookup_extent_mapping(em_tree, start, len);
6560 read_unlock(&em_tree->lock);
6561
6562 if (em) {
6563 if (em->start > start || em->start + em->len <= start)
6564 free_extent_map(em);
6565 else if (em->block_start == EXTENT_MAP_INLINE && page)
6566 free_extent_map(em);
6567 else
6568 goto out;
6569 }
6570 em = alloc_extent_map();
6571 if (!em) {
6572 err = -ENOMEM;
6573 goto out;
6574 }
6575 em->start = EXTENT_MAP_HOLE;
6576 em->orig_start = EXTENT_MAP_HOLE;
6577 em->len = (u64)-1;
6578 em->block_len = (u64)-1;
6579
6580 path = btrfs_alloc_path();
6581 if (!path) {
6582 err = -ENOMEM;
6583 goto out;
6584 }
6585
6586
6587 path->reada = READA_FORWARD;
6588
6589
6590
6591
6592
6593 path->leave_spinning = 1;
6594
6595 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
6596 if (ret < 0) {
6597 err = ret;
6598 goto out;
6599 } else if (ret > 0) {
6600 if (path->slots[0] == 0)
6601 goto not_found;
6602 path->slots[0]--;
6603 }
6604
6605 leaf = path->nodes[0];
6606 item = btrfs_item_ptr(leaf, path->slots[0],
6607 struct btrfs_file_extent_item);
6608 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6609 if (found_key.objectid != objectid ||
6610 found_key.type != BTRFS_EXTENT_DATA_KEY) {
6611
6612
6613
6614
6615
6616
6617 extent_end = start;
6618 goto next;
6619 }
6620
6621 extent_type = btrfs_file_extent_type(leaf, item);
6622 extent_start = found_key.offset;
6623 extent_end = btrfs_file_extent_end(path);
6624 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6625 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6626
6627 if (!S_ISREG(inode->vfs_inode.i_mode)) {
6628 err = -EUCLEAN;
6629 btrfs_crit(fs_info,
6630 "regular/prealloc extent found for non-regular inode %llu",
6631 btrfs_ino(inode));
6632 goto out;
6633 }
6634 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
6635 extent_start);
6636 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6637 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
6638 path->slots[0],
6639 extent_start);
6640 }
6641next:
6642 if (start >= extent_end) {
6643 path->slots[0]++;
6644 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6645 ret = btrfs_next_leaf(root, path);
6646 if (ret < 0) {
6647 err = ret;
6648 goto out;
6649 } else if (ret > 0) {
6650 goto not_found;
6651 }
6652 leaf = path->nodes[0];
6653 }
6654 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6655 if (found_key.objectid != objectid ||
6656 found_key.type != BTRFS_EXTENT_DATA_KEY)
6657 goto not_found;
6658 if (start + len <= found_key.offset)
6659 goto not_found;
6660 if (start > found_key.offset)
6661 goto next;
6662
6663
6664 em->start = start;
6665 em->orig_start = start;
6666 em->len = found_key.offset - start;
6667 em->block_start = EXTENT_MAP_HOLE;
6668 goto insert;
6669 }
6670
6671 btrfs_extent_item_to_extent_map(inode, path, item, !page, em);
6672
6673 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6674 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6675 goto insert;
6676 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6677 unsigned long ptr;
6678 char *map;
6679 size_t size;
6680 size_t extent_offset;
6681 size_t copy_size;
6682
6683 if (!page)
6684 goto out;
6685
6686 size = btrfs_file_extent_ram_bytes(leaf, item);
6687 extent_offset = page_offset(page) + pg_offset - extent_start;
6688 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
6689 size - extent_offset);
6690 em->start = extent_start + extent_offset;
6691 em->len = ALIGN(copy_size, fs_info->sectorsize);
6692 em->orig_block_len = em->len;
6693 em->orig_start = em->start;
6694 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6695
6696 btrfs_set_path_blocking(path);
6697 if (!PageUptodate(page)) {
6698 if (btrfs_file_extent_compression(leaf, item) !=
6699 BTRFS_COMPRESS_NONE) {
6700 ret = uncompress_inline(path, page, pg_offset,
6701 extent_offset, item);
6702 if (ret) {
6703 err = ret;
6704 goto out;
6705 }
6706 } else {
6707 map = kmap(page);
6708 read_extent_buffer(leaf, map + pg_offset, ptr,
6709 copy_size);
6710 if (pg_offset + copy_size < PAGE_SIZE) {
6711 memset(map + pg_offset + copy_size, 0,
6712 PAGE_SIZE - pg_offset -
6713 copy_size);
6714 }
6715 kunmap(page);
6716 }
6717 flush_dcache_page(page);
6718 }
6719 set_extent_uptodate(io_tree, em->start,
6720 extent_map_end(em) - 1, NULL, GFP_NOFS);
6721 goto insert;
6722 }
6723not_found:
6724 em->start = start;
6725 em->orig_start = start;
6726 em->len = len;
6727 em->block_start = EXTENT_MAP_HOLE;
6728insert:
6729 btrfs_release_path(path);
6730 if (em->start > start || extent_map_end(em) <= start) {
6731 btrfs_err(fs_info,
6732 "bad extent! em: [%llu %llu] passed [%llu %llu]",
6733 em->start, em->len, start, len);
6734 err = -EIO;
6735 goto out;
6736 }
6737
6738 err = 0;
6739 write_lock(&em_tree->lock);
6740 err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
6741 write_unlock(&em_tree->lock);
6742out:
6743 btrfs_free_path(path);
6744
6745 trace_btrfs_get_extent(root, inode, em);
6746
6747 if (err) {
6748 free_extent_map(em);
6749 return ERR_PTR(err);
6750 }
6751 BUG_ON(!em);
6752 return em;
6753}
6754
6755struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
6756 u64 start, u64 len)
6757{
6758 struct extent_map *em;
6759 struct extent_map *hole_em = NULL;
6760 u64 delalloc_start = start;
6761 u64 end;
6762 u64 delalloc_len;
6763 u64 delalloc_end;
6764 int err = 0;
6765
6766 em = btrfs_get_extent(inode, NULL, 0, start, len);
6767 if (IS_ERR(em))
6768 return em;
6769
6770
6771
6772
6773
6774
6775 if (em->block_start != EXTENT_MAP_HOLE &&
6776 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
6777 return em;
6778 else
6779 hole_em = em;
6780
6781
6782 end = start + len;
6783 if (end < start)
6784 end = (u64)-1;
6785 else
6786 end -= 1;
6787
6788 em = NULL;
6789
6790
6791 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
6792 end, len, EXTENT_DELALLOC, 1);
6793 delalloc_end = delalloc_start + delalloc_len;
6794 if (delalloc_end < delalloc_start)
6795 delalloc_end = (u64)-1;
6796
6797
6798
6799
6800
6801 if (delalloc_start > end || delalloc_end <= start) {
6802 em = hole_em;
6803 hole_em = NULL;
6804 goto out;
6805 }
6806
6807
6808
6809
6810
6811 delalloc_start = max(start, delalloc_start);
6812 delalloc_len = delalloc_end - delalloc_start;
6813
6814 if (delalloc_len > 0) {
6815 u64 hole_start;
6816 u64 hole_len;
6817 const u64 hole_end = extent_map_end(hole_em);
6818
6819 em = alloc_extent_map();
6820 if (!em) {
6821 err = -ENOMEM;
6822 goto out;
6823 }
6824
6825 ASSERT(hole_em);
6826
6827
6828
6829
6830
6831
6832
6833 if (hole_end <= start || hole_em->start > end) {
6834 free_extent_map(hole_em);
6835 hole_em = NULL;
6836 } else {
6837 hole_start = max(hole_em->start, start);
6838 hole_len = hole_end - hole_start;
6839 }
6840
6841 if (hole_em && delalloc_start > hole_start) {
6842
6843
6844
6845
6846
6847 em->len = min(hole_len, delalloc_start - hole_start);
6848 em->start = hole_start;
6849 em->orig_start = hole_start;
6850
6851
6852
6853
6854 em->block_start = hole_em->block_start;
6855 em->block_len = hole_len;
6856 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
6857 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
6858 } else {
6859
6860
6861
6862
6863 em->start = delalloc_start;
6864 em->len = delalloc_len;
6865 em->orig_start = delalloc_start;
6866 em->block_start = EXTENT_MAP_DELALLOC;
6867 em->block_len = delalloc_len;
6868 }
6869 } else {
6870 return hole_em;
6871 }
6872out:
6873
6874 free_extent_map(hole_em);
6875 if (err) {
6876 free_extent_map(em);
6877 return ERR_PTR(err);
6878 }
6879 return em;
6880}
6881
6882static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
6883 const u64 start,
6884 const u64 len,
6885 const u64 orig_start,
6886 const u64 block_start,
6887 const u64 block_len,
6888 const u64 orig_block_len,
6889 const u64 ram_bytes,
6890 const int type)
6891{
6892 struct extent_map *em = NULL;
6893 int ret;
6894
6895 if (type != BTRFS_ORDERED_NOCOW) {
6896 em = create_io_em(inode, start, len, orig_start, block_start,
6897 block_len, orig_block_len, ram_bytes,
6898 BTRFS_COMPRESS_NONE,
6899 type);
6900 if (IS_ERR(em))
6901 goto out;
6902 }
6903 ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
6904 block_len, type);
6905 if (ret) {
6906 if (em) {
6907 free_extent_map(em);
6908 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
6909 }
6910 em = ERR_PTR(ret);
6911 }
6912 out:
6913
6914 return em;
6915}
6916
6917static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
6918 u64 start, u64 len)
6919{
6920 struct btrfs_root *root = inode->root;
6921 struct btrfs_fs_info *fs_info = root->fs_info;
6922 struct extent_map *em;
6923 struct btrfs_key ins;
6924 u64 alloc_hint;
6925 int ret;
6926
6927 alloc_hint = get_extent_allocation_hint(inode, start, len);
6928 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
6929 0, alloc_hint, &ins, 1, 1);
6930 if (ret)
6931 return ERR_PTR(ret);
6932
6933 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
6934 ins.objectid, ins.offset, ins.offset,
6935 ins.offset, BTRFS_ORDERED_REGULAR);
6936 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
6937 if (IS_ERR(em))
6938 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
6939 1);
6940
6941 return em;
6942}
6943
6944
6945
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955
6956
6957
6958
6959
6960
6961
6962
6963
6964
6965
6966
6967noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6968 u64 *orig_start, u64 *orig_block_len,
6969 u64 *ram_bytes, bool strict)
6970{
6971 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6972 struct btrfs_path *path;
6973 int ret;
6974 struct extent_buffer *leaf;
6975 struct btrfs_root *root = BTRFS_I(inode)->root;
6976 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6977 struct btrfs_file_extent_item *fi;
6978 struct btrfs_key key;
6979 u64 disk_bytenr;
6980 u64 backref_offset;
6981 u64 extent_end;
6982 u64 num_bytes;
6983 int slot;
6984 int found_type;
6985 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6986
6987 path = btrfs_alloc_path();
6988 if (!path)
6989 return -ENOMEM;
6990
6991 ret = btrfs_lookup_file_extent(NULL, root, path,
6992 btrfs_ino(BTRFS_I(inode)), offset, 0);
6993 if (ret < 0)
6994 goto out;
6995
6996 slot = path->slots[0];
6997 if (ret == 1) {
6998 if (slot == 0) {
6999
7000 ret = 0;
7001 goto out;
7002 }
7003 slot--;
7004 }
7005 ret = 0;
7006 leaf = path->nodes[0];
7007 btrfs_item_key_to_cpu(leaf, &key, slot);
7008 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7009 key.type != BTRFS_EXTENT_DATA_KEY) {
7010
7011 goto out;
7012 }
7013
7014 if (key.offset > offset) {
7015
7016 goto out;
7017 }
7018
7019 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7020 found_type = btrfs_file_extent_type(leaf, fi);
7021 if (found_type != BTRFS_FILE_EXTENT_REG &&
7022 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7023
7024 goto out;
7025 }
7026
7027 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7028 goto out;
7029
7030 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7031 if (extent_end <= offset)
7032 goto out;
7033
7034 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7035 if (disk_bytenr == 0)
7036 goto out;
7037
7038 if (btrfs_file_extent_compression(leaf, fi) ||
7039 btrfs_file_extent_encryption(leaf, fi) ||
7040 btrfs_file_extent_other_encoding(leaf, fi))
7041 goto out;
7042
7043
7044
7045
7046
7047 if (!strict &&
7048 (btrfs_file_extent_generation(leaf, fi) <=
7049 btrfs_root_last_snapshot(&root->root_item)))
7050 goto out;
7051
7052 backref_offset = btrfs_file_extent_offset(leaf, fi);
7053
7054 if (orig_start) {
7055 *orig_start = key.offset - backref_offset;
7056 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7057 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7058 }
7059
7060 if (btrfs_extent_readonly(fs_info, disk_bytenr))
7061 goto out;
7062
7063 num_bytes = min(offset + *len, extent_end) - offset;
7064 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7065 u64 range_end;
7066
7067 range_end = round_up(offset + num_bytes,
7068 root->fs_info->sectorsize) - 1;
7069 ret = test_range_bit(io_tree, offset, range_end,
7070 EXTENT_DELALLOC, 0, NULL);
7071 if (ret) {
7072 ret = -EAGAIN;
7073 goto out;
7074 }
7075 }
7076
7077 btrfs_release_path(path);
7078
7079
7080
7081
7082
7083
7084 ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
7085 key.offset - backref_offset, disk_bytenr,
7086 strict);
7087 if (ret) {
7088 ret = 0;
7089 goto out;
7090 }
7091
7092
7093
7094
7095
7096
7097
7098 disk_bytenr += backref_offset;
7099 disk_bytenr += offset - key.offset;
7100 if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
7101 goto out;
7102
7103
7104
7105
7106 *len = num_bytes;
7107 ret = 1;
7108out:
7109 btrfs_free_path(path);
7110 return ret;
7111}
7112
7113static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7114 struct extent_state **cached_state, int writing)
7115{
7116 struct btrfs_ordered_extent *ordered;
7117 int ret = 0;
7118
7119 while (1) {
7120 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7121 cached_state);
7122
7123
7124
7125
7126
7127 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7128 lockend - lockstart + 1);
7129
7130
7131
7132
7133
7134
7135
7136
7137 if (!ordered &&
7138 (!writing || !filemap_range_has_page(inode->i_mapping,
7139 lockstart, lockend)))
7140 break;
7141
7142 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7143 cached_state);
7144
7145 if (ordered) {
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158
7159
7160
7161 if (writing ||
7162 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7163 btrfs_start_ordered_extent(inode, ordered, 1);
7164 else
7165 ret = -ENOTBLK;
7166 btrfs_put_ordered_extent(ordered);
7167 } else {
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181 ret = -ENOTBLK;
7182 }
7183
7184 if (ret)
7185 break;
7186
7187 cond_resched();
7188 }
7189
7190 return ret;
7191}
7192
7193
7194static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
7195 u64 len, u64 orig_start, u64 block_start,
7196 u64 block_len, u64 orig_block_len,
7197 u64 ram_bytes, int compress_type,
7198 int type)
7199{
7200 struct extent_map_tree *em_tree;
7201 struct extent_map *em;
7202 int ret;
7203
7204 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7205 type == BTRFS_ORDERED_COMPRESSED ||
7206 type == BTRFS_ORDERED_NOCOW ||
7207 type == BTRFS_ORDERED_REGULAR);
7208
7209 em_tree = &inode->extent_tree;
7210 em = alloc_extent_map();
7211 if (!em)
7212 return ERR_PTR(-ENOMEM);
7213
7214 em->start = start;
7215 em->orig_start = orig_start;
7216 em->len = len;
7217 em->block_len = block_len;
7218 em->block_start = block_start;
7219 em->orig_block_len = orig_block_len;
7220 em->ram_bytes = ram_bytes;
7221 em->generation = -1;
7222 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7223 if (type == BTRFS_ORDERED_PREALLOC) {
7224 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7225 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7226 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7227 em->compress_type = compress_type;
7228 }
7229
7230 do {
7231 btrfs_drop_extent_cache(inode, em->start,
7232 em->start + em->len - 1, 0);
7233 write_lock(&em_tree->lock);
7234 ret = add_extent_mapping(em_tree, em, 1);
7235 write_unlock(&em_tree->lock);
7236
7237
7238
7239
7240 } while (ret == -EEXIST);
7241
7242 if (ret) {
7243 free_extent_map(em);
7244 return ERR_PTR(ret);
7245 }
7246
7247
7248 return em;
7249}
7250
7251
7252static int btrfs_get_blocks_direct_read(struct extent_map *em,
7253 struct buffer_head *bh_result,
7254 struct inode *inode,
7255 u64 start, u64 len)
7256{
7257 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7258
7259 if (em->block_start == EXTENT_MAP_HOLE ||
7260 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7261 return -ENOENT;
7262
7263 len = min(len, em->len - (start - em->start));
7264
7265 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7266 inode->i_blkbits;
7267 bh_result->b_size = len;
7268 bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
7269 set_buffer_mapped(bh_result);
7270
7271 return 0;
7272}
7273
7274static int btrfs_get_blocks_direct_write(struct extent_map **map,
7275 struct buffer_head *bh_result,
7276 struct inode *inode,
7277 struct btrfs_dio_data *dio_data,
7278 u64 start, u64 len)
7279{
7280 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7281 struct extent_map *em = *map;
7282 int ret = 0;
7283
7284
7285
7286
7287
7288
7289
7290
7291
7292
7293 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7294 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7295 em->block_start != EXTENT_MAP_HOLE)) {
7296 int type;
7297 u64 block_start, orig_start, orig_block_len, ram_bytes;
7298
7299 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7300 type = BTRFS_ORDERED_PREALLOC;
7301 else
7302 type = BTRFS_ORDERED_NOCOW;
7303 len = min(len, em->len - (start - em->start));
7304 block_start = em->block_start + (start - em->start);
7305
7306 if (can_nocow_extent(inode, start, &len, &orig_start,
7307 &orig_block_len, &ram_bytes, false) == 1 &&
7308 btrfs_inc_nocow_writers(fs_info, block_start)) {
7309 struct extent_map *em2;
7310
7311 em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
7312 orig_start, block_start,
7313 len, orig_block_len,
7314 ram_bytes, type);
7315 btrfs_dec_nocow_writers(fs_info, block_start);
7316 if (type == BTRFS_ORDERED_PREALLOC) {
7317 free_extent_map(em);
7318 *map = em = em2;
7319 }
7320
7321 if (em2 && IS_ERR(em2)) {
7322 ret = PTR_ERR(em2);
7323 goto out;
7324 }
7325
7326
7327
7328
7329
7330 btrfs_free_reserved_data_space_noquota(fs_info, len);
7331 goto skip_cow;
7332 }
7333 }
7334
7335
7336 len = bh_result->b_size;
7337 free_extent_map(em);
7338 *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
7339 if (IS_ERR(em)) {
7340 ret = PTR_ERR(em);
7341 goto out;
7342 }
7343
7344 len = min(len, em->len - (start - em->start));
7345
7346skip_cow:
7347 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7348 inode->i_blkbits;
7349 bh_result->b_size = len;
7350 bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
7351 set_buffer_mapped(bh_result);
7352
7353 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7354 set_buffer_new(bh_result);
7355
7356
7357
7358
7359
7360 if (!dio_data->overwrite && start + len > i_size_read(inode))
7361 i_size_write(inode, start + len);
7362
7363 WARN_ON(dio_data->reserve < len);
7364 dio_data->reserve -= len;
7365 dio_data->unsubmitted_oe_range_end = start + len;
7366 current->journal_info = dio_data;
7367out:
7368 return ret;
7369}
7370
7371static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7372 struct buffer_head *bh_result, int create)
7373{
7374 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7375 struct extent_map *em;
7376 struct extent_state *cached_state = NULL;
7377 struct btrfs_dio_data *dio_data = NULL;
7378 u64 start = iblock << inode->i_blkbits;
7379 u64 lockstart, lockend;
7380 u64 len = bh_result->b_size;
7381 int ret = 0;
7382
7383 if (!create)
7384 len = min_t(u64, len, fs_info->sectorsize);
7385
7386 lockstart = start;
7387 lockend = start + len - 1;
7388
7389 if (current->journal_info) {
7390
7391
7392
7393
7394
7395 dio_data = current->journal_info;
7396 current->journal_info = NULL;
7397 }
7398
7399
7400
7401
7402
7403 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7404 create)) {
7405 ret = -ENOTBLK;
7406 goto err;
7407 }
7408
7409 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
7410 if (IS_ERR(em)) {
7411 ret = PTR_ERR(em);
7412 goto unlock_err;
7413 }
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7430 em->block_start == EXTENT_MAP_INLINE) {
7431 free_extent_map(em);
7432 ret = -ENOTBLK;
7433 goto unlock_err;
7434 }
7435
7436 if (create) {
7437 ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
7438 dio_data, start, len);
7439 if (ret < 0)
7440 goto unlock_err;
7441
7442 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
7443 lockend, &cached_state);
7444 } else {
7445 ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
7446 start, len);
7447
7448 if (ret < 0) {
7449 ret = 0;
7450 free_extent_map(em);
7451 goto unlock_err;
7452 }
7453
7454
7455
7456
7457 lockstart = start + bh_result->b_size;
7458 if (lockstart < lockend) {
7459 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
7460 lockstart, lockend, &cached_state);
7461 } else {
7462 free_extent_state(cached_state);
7463 }
7464 }
7465
7466 free_extent_map(em);
7467
7468 return 0;
7469
7470unlock_err:
7471 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7472 &cached_state);
7473err:
7474 if (dio_data)
7475 current->journal_info = dio_data;
7476 return ret;
7477}
7478
7479static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
7480{
7481
7482
7483
7484
7485 if (!refcount_dec_and_test(&dip->refs))
7486 return;
7487
7488 if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
7489 __endio_write_update_ordered(BTRFS_I(dip->inode),
7490 dip->logical_offset,
7491 dip->bytes,
7492 !dip->dio_bio->bi_status);
7493 } else {
7494 unlock_extent(&BTRFS_I(dip->inode)->io_tree,
7495 dip->logical_offset,
7496 dip->logical_offset + dip->bytes - 1);
7497 }
7498
7499 dio_end_io(dip->dio_bio);
7500 kfree(dip);
7501}
7502
7503static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7504 int mirror_num,
7505 unsigned long bio_flags)
7506{
7507 struct btrfs_dio_private *dip = bio->bi_private;
7508 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7509 blk_status_t ret;
7510
7511 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
7512
7513 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
7514 if (ret)
7515 return ret;
7516
7517 refcount_inc(&dip->refs);
7518 ret = btrfs_map_bio(fs_info, bio, mirror_num);
7519 if (ret)
7520 refcount_dec(&dip->refs);
7521 return ret;
7522}
7523
7524static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
7525 struct btrfs_io_bio *io_bio,
7526 const bool uptodate)
7527{
7528 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
7529 const u32 sectorsize = fs_info->sectorsize;
7530 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
7531 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7532 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
7533 struct bio_vec bvec;
7534 struct bvec_iter iter;
7535 u64 start = io_bio->logical;
7536 int icsum = 0;
7537 blk_status_t err = BLK_STS_OK;
7538
7539 __bio_for_each_segment(bvec, &io_bio->bio, iter, io_bio->iter) {
7540 unsigned int i, nr_sectors, pgoff;
7541
7542 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
7543 pgoff = bvec.bv_offset;
7544 for (i = 0; i < nr_sectors; i++) {
7545 ASSERT(pgoff < PAGE_SIZE);
7546 if (uptodate &&
7547 (!csum || !check_data_csum(inode, io_bio, icsum,
7548 bvec.bv_page, pgoff,
7549 start, sectorsize))) {
7550 clean_io_failure(fs_info, failure_tree, io_tree,
7551 start, bvec.bv_page,
7552 btrfs_ino(BTRFS_I(inode)),
7553 pgoff);
7554 } else {
7555 blk_status_t status;
7556
7557 status = btrfs_submit_read_repair(inode,
7558 &io_bio->bio,
7559 start - io_bio->logical,
7560 bvec.bv_page, pgoff,
7561 start,
7562 start + sectorsize - 1,
7563 io_bio->mirror_num,
7564 submit_dio_repair_bio);
7565 if (status)
7566 err = status;
7567 }
7568 start += sectorsize;
7569 icsum++;
7570 pgoff += sectorsize;
7571 }
7572 }
7573 return err;
7574}
7575
7576static void __endio_write_update_ordered(struct btrfs_inode *inode,
7577 const u64 offset, const u64 bytes,
7578 const bool uptodate)
7579{
7580 struct btrfs_fs_info *fs_info = inode->root->fs_info;
7581 struct btrfs_ordered_extent *ordered = NULL;
7582 struct btrfs_workqueue *wq;
7583 u64 ordered_offset = offset;
7584 u64 ordered_bytes = bytes;
7585 u64 last_offset;
7586
7587 if (btrfs_is_free_space_inode(inode))
7588 wq = fs_info->endio_freespace_worker;
7589 else
7590 wq = fs_info->endio_write_workers;
7591
7592 while (ordered_offset < offset + bytes) {
7593 last_offset = ordered_offset;
7594 if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
7595 &ordered_offset,
7596 ordered_bytes,
7597 uptodate)) {
7598 btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
7599 NULL);
7600 btrfs_queue_work(wq, &ordered->work);
7601 }
7602
7603
7604
7605
7606 if (ordered_offset == last_offset)
7607 return;
7608
7609
7610
7611
7612 if (ordered_offset < offset + bytes) {
7613 ordered_bytes = offset + bytes - ordered_offset;
7614 ordered = NULL;
7615 }
7616 }
7617}
7618
7619static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
7620 struct bio *bio, u64 offset)
7621{
7622 struct inode *inode = private_data;
7623
7624 return btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
7625}
7626
7627static void btrfs_end_dio_bio(struct bio *bio)
7628{
7629 struct btrfs_dio_private *dip = bio->bi_private;
7630 blk_status_t err = bio->bi_status;
7631
7632 if (err)
7633 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
7634 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
7635 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
7636 bio->bi_opf,
7637 (unsigned long long)bio->bi_iter.bi_sector,
7638 bio->bi_iter.bi_size, err);
7639
7640 if (bio_op(bio) == REQ_OP_READ) {
7641 err = btrfs_check_read_dio_bio(dip->inode, btrfs_io_bio(bio),
7642 !err);
7643 }
7644
7645 if (err)
7646 dip->dio_bio->bi_status = err;
7647
7648 bio_put(bio);
7649 btrfs_dio_private_put(dip);
7650}
7651
7652static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
7653 struct inode *inode, u64 file_offset, int async_submit)
7654{
7655 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7656 struct btrfs_dio_private *dip = bio->bi_private;
7657 bool write = bio_op(bio) == REQ_OP_WRITE;
7658 blk_status_t ret;
7659
7660
7661 if (async_submit)
7662 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
7663
7664 if (!write) {
7665 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
7666 if (ret)
7667 goto err;
7668 }
7669
7670 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
7671 goto map;
7672
7673 if (write && async_submit) {
7674 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
7675 file_offset, inode,
7676 btrfs_submit_bio_start_direct_io);
7677 goto err;
7678 } else if (write) {
7679
7680
7681
7682
7683 ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
7684 if (ret)
7685 goto err;
7686 } else {
7687 u64 csum_offset;
7688
7689 csum_offset = file_offset - dip->logical_offset;
7690 csum_offset >>= inode->i_sb->s_blocksize_bits;
7691 csum_offset *= btrfs_super_csum_size(fs_info->super_copy);
7692 btrfs_io_bio(bio)->csum = dip->csums + csum_offset;
7693 }
7694map:
7695 ret = btrfs_map_bio(fs_info, bio, 0);
7696err:
7697 return ret;
7698}
7699
7700
7701
7702
7703
7704static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
7705 struct inode *inode,
7706 loff_t file_offset)
7707{
7708 const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
7709 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
7710 size_t dip_size;
7711 struct btrfs_dio_private *dip;
7712
7713 dip_size = sizeof(*dip);
7714 if (!write && csum) {
7715 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7716 const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
7717 size_t nblocks;
7718
7719 nblocks = dio_bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
7720 dip_size += csum_size * nblocks;
7721 }
7722
7723 dip = kzalloc(dip_size, GFP_NOFS);
7724 if (!dip)
7725 return NULL;
7726
7727 dip->inode = inode;
7728 dip->logical_offset = file_offset;
7729 dip->bytes = dio_bio->bi_iter.bi_size;
7730 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
7731 dip->dio_bio = dio_bio;
7732 refcount_set(&dip->refs, 1);
7733
7734 if (write) {
7735 struct btrfs_dio_data *dio_data = current->journal_info;
7736
7737
7738
7739
7740
7741 dio_data->unsubmitted_oe_range_end = dip->logical_offset +
7742 dip->bytes;
7743 dio_data->unsubmitted_oe_range_start =
7744 dio_data->unsubmitted_oe_range_end;
7745 }
7746 return dip;
7747}
7748
7749static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
7750 loff_t file_offset)
7751{
7752 const bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
7753 const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
7754 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7755 const bool raid56 = (btrfs_data_alloc_profile(fs_info) &
7756 BTRFS_BLOCK_GROUP_RAID56_MASK);
7757 struct btrfs_dio_private *dip;
7758 struct bio *bio;
7759 u64 start_sector;
7760 int async_submit = 0;
7761 u64 submit_len;
7762 int clone_offset = 0;
7763 int clone_len;
7764 int ret;
7765 blk_status_t status;
7766 struct btrfs_io_geometry geom;
7767
7768 dip = btrfs_create_dio_private(dio_bio, inode, file_offset);
7769 if (!dip) {
7770 if (!write) {
7771 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
7772 file_offset + dio_bio->bi_iter.bi_size - 1);
7773 }
7774 dio_bio->bi_status = BLK_STS_RESOURCE;
7775 dio_end_io(dio_bio);
7776 return;
7777 }
7778
7779 if (!write && csum) {
7780
7781
7782
7783
7784 status = btrfs_lookup_bio_sums(inode, dio_bio, file_offset,
7785 dip->csums);
7786 if (status != BLK_STS_OK)
7787 goto out_err;
7788 }
7789
7790 start_sector = dio_bio->bi_iter.bi_sector;
7791 submit_len = dio_bio->bi_iter.bi_size;
7792
7793 do {
7794 ret = btrfs_get_io_geometry(fs_info, btrfs_op(dio_bio),
7795 start_sector << 9, submit_len,
7796 &geom);
7797 if (ret) {
7798 status = errno_to_blk_status(ret);
7799 goto out_err;
7800 }
7801 ASSERT(geom.len <= INT_MAX);
7802
7803 clone_len = min_t(int, submit_len, geom.len);
7804
7805
7806
7807
7808
7809 bio = btrfs_bio_clone_partial(dio_bio, clone_offset, clone_len);
7810 bio->bi_private = dip;
7811 bio->bi_end_io = btrfs_end_dio_bio;
7812 btrfs_io_bio(bio)->logical = file_offset;
7813
7814 ASSERT(submit_len >= clone_len);
7815 submit_len -= clone_len;
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826 if (submit_len > 0) {
7827 refcount_inc(&dip->refs);
7828
7829
7830
7831
7832
7833
7834 if (!raid56)
7835 async_submit = 1;
7836 }
7837
7838 status = btrfs_submit_dio_bio(bio, inode, file_offset,
7839 async_submit);
7840 if (status) {
7841 bio_put(bio);
7842 if (submit_len > 0)
7843 refcount_dec(&dip->refs);
7844 goto out_err;
7845 }
7846
7847 clone_offset += clone_len;
7848 start_sector += clone_len >> 9;
7849 file_offset += clone_len;
7850 } while (submit_len > 0);
7851 return;
7852
7853out_err:
7854 dip->dio_bio->bi_status = status;
7855 btrfs_dio_private_put(dip);
7856}
7857
7858static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
7859 const struct iov_iter *iter, loff_t offset)
7860{
7861 int seg;
7862 int i;
7863 unsigned int blocksize_mask = fs_info->sectorsize - 1;
7864 ssize_t retval = -EINVAL;
7865
7866 if (offset & blocksize_mask)
7867 goto out;
7868
7869 if (iov_iter_alignment(iter) & blocksize_mask)
7870 goto out;
7871
7872
7873 if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
7874 return 0;
7875
7876
7877
7878
7879
7880 for (seg = 0; seg < iter->nr_segs; seg++) {
7881 for (i = seg + 1; i < iter->nr_segs; i++) {
7882 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
7883 goto out;
7884 }
7885 }
7886 retval = 0;
7887out:
7888 return retval;
7889}
7890
7891static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
7892{
7893 struct file *file = iocb->ki_filp;
7894 struct inode *inode = file->f_mapping->host;
7895 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7896 struct btrfs_dio_data dio_data = { 0 };
7897 struct extent_changeset *data_reserved = NULL;
7898 loff_t offset = iocb->ki_pos;
7899 size_t count = 0;
7900 int flags = 0;
7901 bool wakeup = true;
7902 bool relock = false;
7903 ssize_t ret;
7904
7905 if (check_direct_IO(fs_info, iter, offset))
7906 return 0;
7907
7908 inode_dio_begin(inode);
7909
7910
7911
7912
7913
7914
7915
7916 count = iov_iter_count(iter);
7917 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7918 &BTRFS_I(inode)->runtime_flags))
7919 filemap_fdatawrite_range(inode->i_mapping, offset,
7920 offset + count - 1);
7921
7922 if (iov_iter_rw(iter) == WRITE) {
7923
7924
7925
7926
7927
7928 if (offset + count <= inode->i_size) {
7929 dio_data.overwrite = 1;
7930 inode_unlock(inode);
7931 relock = true;
7932 }
7933 ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
7934 offset, count);
7935 if (ret)
7936 goto out;
7937
7938
7939
7940
7941
7942
7943 dio_data.reserve = round_up(count,
7944 fs_info->sectorsize);
7945 dio_data.unsubmitted_oe_range_start = (u64)offset;
7946 dio_data.unsubmitted_oe_range_end = (u64)offset;
7947 current->journal_info = &dio_data;
7948 down_read(&BTRFS_I(inode)->dio_sem);
7949 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
7950 &BTRFS_I(inode)->runtime_flags)) {
7951 inode_dio_end(inode);
7952 flags = DIO_LOCKING | DIO_SKIP_HOLES;
7953 wakeup = false;
7954 }
7955
7956 ret = __blockdev_direct_IO(iocb, inode,
7957 fs_info->fs_devices->latest_bdev,
7958 iter, btrfs_get_blocks_direct, NULL,
7959 btrfs_submit_direct, flags);
7960 if (iov_iter_rw(iter) == WRITE) {
7961 up_read(&BTRFS_I(inode)->dio_sem);
7962 current->journal_info = NULL;
7963 if (ret < 0 && ret != -EIOCBQUEUED) {
7964 if (dio_data.reserve)
7965 btrfs_delalloc_release_space(BTRFS_I(inode),
7966 data_reserved, offset, dio_data.reserve,
7967 true);
7968
7969
7970
7971
7972
7973
7974 if (dio_data.unsubmitted_oe_range_start <
7975 dio_data.unsubmitted_oe_range_end)
7976 __endio_write_update_ordered(BTRFS_I(inode),
7977 dio_data.unsubmitted_oe_range_start,
7978 dio_data.unsubmitted_oe_range_end -
7979 dio_data.unsubmitted_oe_range_start,
7980 false);
7981 } else if (ret >= 0 && (size_t)ret < count)
7982 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
7983 offset, count - (size_t)ret, true);
7984 btrfs_delalloc_release_extents(BTRFS_I(inode), count);
7985 }
7986out:
7987 if (wakeup)
7988 inode_dio_end(inode);
7989 if (relock)
7990 inode_lock(inode);
7991
7992 extent_changeset_free(data_reserved);
7993 return ret;
7994}
7995
7996static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
7997 u64 start, u64 len)
7998{
7999 int ret;
8000
8001 ret = fiemap_prep(inode, fieinfo, start, &len, 0);
8002 if (ret)
8003 return ret;
8004
8005 return extent_fiemap(inode, fieinfo, start, len);
8006}
8007
8008int btrfs_readpage(struct file *file, struct page *page)
8009{
8010 return extent_read_full_page(page, btrfs_get_extent, 0);
8011}
8012
8013static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8014{
8015 struct inode *inode = page->mapping->host;
8016 int ret;
8017
8018 if (current->flags & PF_MEMALLOC) {
8019 redirty_page_for_writepage(wbc, page);
8020 unlock_page(page);
8021 return 0;
8022 }
8023
8024
8025
8026
8027
8028
8029 if (!igrab(inode)) {
8030 redirty_page_for_writepage(wbc, page);
8031 return AOP_WRITEPAGE_ACTIVATE;
8032 }
8033 ret = extent_write_full_page(page, wbc);
8034 btrfs_add_delayed_iput(inode);
8035 return ret;
8036}
8037
8038static int btrfs_writepages(struct address_space *mapping,
8039 struct writeback_control *wbc)
8040{
8041 return extent_writepages(mapping, wbc);
8042}
8043
8044static void btrfs_readahead(struct readahead_control *rac)
8045{
8046 extent_readahead(rac);
8047}
8048
8049static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8050{
8051 int ret = try_release_extent_mapping(page, gfp_flags);
8052 if (ret == 1)
8053 detach_page_private(page);
8054 return ret;
8055}
8056
8057static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8058{
8059 if (PageWriteback(page) || PageDirty(page))
8060 return 0;
8061 return __btrfs_releasepage(page, gfp_flags);
8062}
8063
8064#ifdef CONFIG_MIGRATION
8065static int btrfs_migratepage(struct address_space *mapping,
8066 struct page *newpage, struct page *page,
8067 enum migrate_mode mode)
8068{
8069 int ret;
8070
8071 ret = migrate_page_move_mapping(mapping, newpage, page, 0);
8072 if (ret != MIGRATEPAGE_SUCCESS)
8073 return ret;
8074
8075 if (page_has_private(page))
8076 attach_page_private(newpage, detach_page_private(page));
8077
8078 if (PagePrivate2(page)) {
8079 ClearPagePrivate2(page);
8080 SetPagePrivate2(newpage);
8081 }
8082
8083 if (mode != MIGRATE_SYNC_NO_COPY)
8084 migrate_page_copy(newpage, page);
8085 else
8086 migrate_page_states(newpage, page);
8087 return MIGRATEPAGE_SUCCESS;
8088}
8089#endif
8090
8091static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8092 unsigned int length)
8093{
8094 struct inode *inode = page->mapping->host;
8095 struct extent_io_tree *tree;
8096 struct btrfs_ordered_extent *ordered;
8097 struct extent_state *cached_state = NULL;
8098 u64 page_start = page_offset(page);
8099 u64 page_end = page_start + PAGE_SIZE - 1;
8100 u64 start;
8101 u64 end;
8102 int inode_evicting = inode->i_state & I_FREEING;
8103
8104
8105
8106
8107
8108
8109
8110
8111 wait_on_page_writeback(page);
8112
8113 tree = &BTRFS_I(inode)->io_tree;
8114 if (offset) {
8115 btrfs_releasepage(page, GFP_NOFS);
8116 return;
8117 }
8118
8119 if (!inode_evicting)
8120 lock_extent_bits(tree, page_start, page_end, &cached_state);
8121again:
8122 start = page_start;
8123 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
8124 page_end - start + 1);
8125 if (ordered) {
8126 end = min(page_end,
8127 ordered->file_offset + ordered->num_bytes - 1);
8128
8129
8130
8131
8132 if (!inode_evicting)
8133 clear_extent_bit(tree, start, end,
8134 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
8135 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8136 EXTENT_DEFRAG, 1, 0, &cached_state);
8137
8138
8139
8140
8141 if (TestClearPagePrivate2(page)) {
8142 struct btrfs_ordered_inode_tree *tree;
8143 u64 new_len;
8144
8145 tree = &BTRFS_I(inode)->ordered_tree;
8146
8147 spin_lock_irq(&tree->lock);
8148 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8149 new_len = start - ordered->file_offset;
8150 if (new_len < ordered->truncated_len)
8151 ordered->truncated_len = new_len;
8152 spin_unlock_irq(&tree->lock);
8153
8154 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8155 start,
8156 end - start + 1, 1))
8157 btrfs_finish_ordered_io(ordered);
8158 }
8159 btrfs_put_ordered_extent(ordered);
8160 if (!inode_evicting) {
8161 cached_state = NULL;
8162 lock_extent_bits(tree, start, end,
8163 &cached_state);
8164 }
8165
8166 start = end + 1;
8167 if (start < page_end)
8168 goto again;
8169 }
8170
8171
8172
8173
8174
8175
8176
8177
8178
8179
8180
8181
8182
8183
8184 btrfs_qgroup_free_data(BTRFS_I(inode), NULL, page_start, PAGE_SIZE);
8185 if (!inode_evicting) {
8186 clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
8187 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
8188 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
8189 &cached_state);
8190
8191 __btrfs_releasepage(page, GFP_NOFS);
8192 }
8193
8194 ClearPageChecked(page);
8195 detach_page_private(page);
8196}
8197
8198
8199
8200
8201
8202
8203
8204
8205
8206
8207
8208
8209
8210
8211
8212
8213vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8214{
8215 struct page *page = vmf->page;
8216 struct inode *inode = file_inode(vmf->vma->vm_file);
8217 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8218 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8219 struct btrfs_ordered_extent *ordered;
8220 struct extent_state *cached_state = NULL;
8221 struct extent_changeset *data_reserved = NULL;
8222 char *kaddr;
8223 unsigned long zero_start;
8224 loff_t size;
8225 vm_fault_t ret;
8226 int ret2;
8227 int reserved = 0;
8228 u64 reserved_space;
8229 u64 page_start;
8230 u64 page_end;
8231 u64 end;
8232
8233 reserved_space = PAGE_SIZE;
8234
8235 sb_start_pagefault(inode->i_sb);
8236 page_start = page_offset(page);
8237 page_end = page_start + PAGE_SIZE - 1;
8238 end = page_end;
8239
8240
8241
8242
8243
8244
8245
8246
8247
8248 ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
8249 page_start, reserved_space);
8250 if (!ret2) {
8251 ret2 = file_update_time(vmf->vma->vm_file);
8252 reserved = 1;
8253 }
8254 if (ret2) {
8255 ret = vmf_error(ret2);
8256 if (reserved)
8257 goto out;
8258 goto out_noreserve;
8259 }
8260
8261 ret = VM_FAULT_NOPAGE;
8262again:
8263 lock_page(page);
8264 size = i_size_read(inode);
8265
8266 if ((page->mapping != inode->i_mapping) ||
8267 (page_start >= size)) {
8268
8269 goto out_unlock;
8270 }
8271 wait_on_page_writeback(page);
8272
8273 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8274 set_page_extent_mapped(page);
8275
8276
8277
8278
8279
8280 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8281 PAGE_SIZE);
8282 if (ordered) {
8283 unlock_extent_cached(io_tree, page_start, page_end,
8284 &cached_state);
8285 unlock_page(page);
8286 btrfs_start_ordered_extent(inode, ordered, 1);
8287 btrfs_put_ordered_extent(ordered);
8288 goto again;
8289 }
8290
8291 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8292 reserved_space = round_up(size - page_start,
8293 fs_info->sectorsize);
8294 if (reserved_space < PAGE_SIZE) {
8295 end = page_start + reserved_space - 1;
8296 btrfs_delalloc_release_space(BTRFS_I(inode),
8297 data_reserved, page_start,
8298 PAGE_SIZE - reserved_space, true);
8299 }
8300 }
8301
8302
8303
8304
8305
8306
8307
8308
8309 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8310 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8311 EXTENT_DEFRAG, 0, 0, &cached_state);
8312
8313 ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
8314 &cached_state);
8315 if (ret2) {
8316 unlock_extent_cached(io_tree, page_start, page_end,
8317 &cached_state);
8318 ret = VM_FAULT_SIGBUS;
8319 goto out_unlock;
8320 }
8321
8322
8323 if (page_start + PAGE_SIZE > size)
8324 zero_start = offset_in_page(size);
8325 else
8326 zero_start = PAGE_SIZE;
8327
8328 if (zero_start != PAGE_SIZE) {
8329 kaddr = kmap(page);
8330 memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
8331 flush_dcache_page(page);
8332 kunmap(page);
8333 }
8334 ClearPageChecked(page);
8335 set_page_dirty(page);
8336 SetPageUptodate(page);
8337
8338 BTRFS_I(inode)->last_trans = fs_info->generation;
8339 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
8340 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
8341
8342 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8343
8344 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8345 sb_end_pagefault(inode->i_sb);
8346 extent_changeset_free(data_reserved);
8347 return VM_FAULT_LOCKED;
8348
8349out_unlock:
8350 unlock_page(page);
8351out:
8352 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
8353 btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
8354 reserved_space, (ret != 0));
8355out_noreserve:
8356 sb_end_pagefault(inode->i_sb);
8357 extent_changeset_free(data_reserved);
8358 return ret;
8359}
8360
8361static int btrfs_truncate(struct inode *inode, bool skip_writeback)
8362{
8363 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8364 struct btrfs_root *root = BTRFS_I(inode)->root;
8365 struct btrfs_block_rsv *rsv;
8366 int ret;
8367 struct btrfs_trans_handle *trans;
8368 u64 mask = fs_info->sectorsize - 1;
8369 u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
8370
8371 if (!skip_writeback) {
8372 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
8373 (u64)-1);
8374 if (ret)
8375 return ret;
8376 }
8377
8378
8379
8380
8381
8382
8383
8384
8385
8386
8387
8388
8389
8390
8391
8392
8393
8394
8395
8396
8397
8398
8399
8400
8401
8402
8403
8404
8405
8406 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
8407 if (!rsv)
8408 return -ENOMEM;
8409 rsv->size = min_size;
8410 rsv->failfast = 1;
8411
8412
8413
8414
8415
8416 trans = btrfs_start_transaction(root, 2);
8417 if (IS_ERR(trans)) {
8418 ret = PTR_ERR(trans);
8419 goto out;
8420 }
8421
8422
8423 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
8424 min_size, false);
8425 BUG_ON(ret);
8426
8427
8428
8429
8430
8431
8432
8433
8434 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
8435 trans->block_rsv = rsv;
8436
8437 while (1) {
8438 ret = btrfs_truncate_inode_items(trans, root, inode,
8439 inode->i_size,
8440 BTRFS_EXTENT_DATA_KEY);
8441 trans->block_rsv = &fs_info->trans_block_rsv;
8442 if (ret != -ENOSPC && ret != -EAGAIN)
8443 break;
8444
8445 ret = btrfs_update_inode(trans, root, inode);
8446 if (ret)
8447 break;
8448
8449 btrfs_end_transaction(trans);
8450 btrfs_btree_balance_dirty(fs_info);
8451
8452 trans = btrfs_start_transaction(root, 2);
8453 if (IS_ERR(trans)) {
8454 ret = PTR_ERR(trans);
8455 trans = NULL;
8456 break;
8457 }
8458
8459 btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
8460 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
8461 rsv, min_size, false);
8462 BUG_ON(ret);
8463 trans->block_rsv = rsv;
8464 }
8465
8466
8467
8468
8469
8470
8471
8472 if (ret == NEED_TRUNCATE_BLOCK) {
8473 btrfs_end_transaction(trans);
8474 btrfs_btree_balance_dirty(fs_info);
8475
8476 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
8477 if (ret)
8478 goto out;
8479 trans = btrfs_start_transaction(root, 1);
8480 if (IS_ERR(trans)) {
8481 ret = PTR_ERR(trans);
8482 goto out;
8483 }
8484 btrfs_inode_safe_disk_i_size_write(inode, 0);
8485 }
8486
8487 if (trans) {
8488 int ret2;
8489
8490 trans->block_rsv = &fs_info->trans_block_rsv;
8491 ret2 = btrfs_update_inode(trans, root, inode);
8492 if (ret2 && !ret)
8493 ret = ret2;
8494
8495 ret2 = btrfs_end_transaction(trans);
8496 if (ret2 && !ret)
8497 ret = ret2;
8498 btrfs_btree_balance_dirty(fs_info);
8499 }
8500out:
8501 btrfs_free_block_rsv(fs_info, rsv);
8502
8503 return ret;
8504}
8505
8506
8507
8508
8509int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
8510 struct btrfs_root *new_root,
8511 struct btrfs_root *parent_root,
8512 u64 new_dirid)
8513{
8514 struct inode *inode;
8515 int err;
8516 u64 index = 0;
8517
8518 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
8519 new_dirid, new_dirid,
8520 S_IFDIR | (~current_umask() & S_IRWXUGO),
8521 &index);
8522 if (IS_ERR(inode))
8523 return PTR_ERR(inode);
8524 inode->i_op = &btrfs_dir_inode_operations;
8525 inode->i_fop = &btrfs_dir_file_operations;
8526
8527 set_nlink(inode, 1);
8528 btrfs_i_size_write(BTRFS_I(inode), 0);
8529 unlock_new_inode(inode);
8530
8531 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
8532 if (err)
8533 btrfs_err(new_root->fs_info,
8534 "error inheriting subvolume %llu properties: %d",
8535 new_root->root_key.objectid, err);
8536
8537 err = btrfs_update_inode(trans, new_root, inode);
8538
8539 iput(inode);
8540 return err;
8541}
8542
8543struct inode *btrfs_alloc_inode(struct super_block *sb)
8544{
8545 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
8546 struct btrfs_inode *ei;
8547 struct inode *inode;
8548
8549 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
8550 if (!ei)
8551 return NULL;
8552
8553 ei->root = NULL;
8554 ei->generation = 0;
8555 ei->last_trans = 0;
8556 ei->last_sub_trans = 0;
8557 ei->logged_trans = 0;
8558 ei->delalloc_bytes = 0;
8559 ei->new_delalloc_bytes = 0;
8560 ei->defrag_bytes = 0;
8561 ei->disk_i_size = 0;
8562 ei->flags = 0;
8563 ei->csum_bytes = 0;
8564 ei->index_cnt = (u64)-1;
8565 ei->dir_index = 0;
8566 ei->last_unlink_trans = 0;
8567 ei->last_reflink_trans = 0;
8568 ei->last_log_commit = 0;
8569
8570 spin_lock_init(&ei->lock);
8571 ei->outstanding_extents = 0;
8572 if (sb->s_magic != BTRFS_TEST_MAGIC)
8573 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
8574 BTRFS_BLOCK_RSV_DELALLOC);
8575 ei->runtime_flags = 0;
8576 ei->prop_compress = BTRFS_COMPRESS_NONE;
8577 ei->defrag_compress = BTRFS_COMPRESS_NONE;
8578
8579 ei->delayed_node = NULL;
8580
8581 ei->i_otime.tv_sec = 0;
8582 ei->i_otime.tv_nsec = 0;
8583
8584 inode = &ei->vfs_inode;
8585 extent_map_tree_init(&ei->extent_tree);
8586 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
8587 extent_io_tree_init(fs_info, &ei->io_failure_tree,
8588 IO_TREE_INODE_IO_FAILURE, inode);
8589 extent_io_tree_init(fs_info, &ei->file_extent_tree,
8590 IO_TREE_INODE_FILE_EXTENT, inode);
8591 ei->io_tree.track_uptodate = true;
8592 ei->io_failure_tree.track_uptodate = true;
8593 atomic_set(&ei->sync_writers, 0);
8594 mutex_init(&ei->log_mutex);
8595 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8596 INIT_LIST_HEAD(&ei->delalloc_inodes);
8597 INIT_LIST_HEAD(&ei->delayed_iput);
8598 RB_CLEAR_NODE(&ei->rb_node);
8599 init_rwsem(&ei->dio_sem);
8600
8601 return inode;
8602}
8603
8604#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8605void btrfs_test_destroy_inode(struct inode *inode)
8606{
8607 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
8608 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8609}
8610#endif
8611
8612void btrfs_free_inode(struct inode *inode)
8613{
8614 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8615}
8616
8617void btrfs_destroy_inode(struct inode *inode)
8618{
8619 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8620 struct btrfs_ordered_extent *ordered;
8621 struct btrfs_root *root = BTRFS_I(inode)->root;
8622
8623 WARN_ON(!hlist_empty(&inode->i_dentry));
8624 WARN_ON(inode->i_data.nrpages);
8625 WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
8626 WARN_ON(BTRFS_I(inode)->block_rsv.size);
8627 WARN_ON(BTRFS_I(inode)->outstanding_extents);
8628 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
8629 WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
8630 WARN_ON(BTRFS_I(inode)->csum_bytes);
8631 WARN_ON(BTRFS_I(inode)->defrag_bytes);
8632
8633
8634
8635
8636
8637
8638 if (!root)
8639 return;
8640
8641 while (1) {
8642 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
8643 if (!ordered)
8644 break;
8645 else {
8646 btrfs_err(fs_info,
8647 "found ordered extent %llu %llu on inode cleanup",
8648 ordered->file_offset, ordered->num_bytes);
8649 btrfs_remove_ordered_extent(inode, ordered);
8650 btrfs_put_ordered_extent(ordered);
8651 btrfs_put_ordered_extent(ordered);
8652 }
8653 }
8654 btrfs_qgroup_check_reserved_leak(BTRFS_I(inode));
8655 inode_tree_del(inode);
8656 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
8657 btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
8658 btrfs_put_root(BTRFS_I(inode)->root);
8659}
8660
8661int btrfs_drop_inode(struct inode *inode)
8662{
8663 struct btrfs_root *root = BTRFS_I(inode)->root;
8664
8665 if (root == NULL)
8666 return 1;
8667
8668
8669 if (btrfs_root_refs(&root->root_item) == 0)
8670 return 1;
8671 else
8672 return generic_drop_inode(inode);
8673}
8674
8675static void init_once(void *foo)
8676{
8677 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
8678
8679 inode_init_once(&ei->vfs_inode);
8680}
8681
8682void __cold btrfs_destroy_cachep(void)
8683{
8684
8685
8686
8687
8688 rcu_barrier();
8689 kmem_cache_destroy(btrfs_inode_cachep);
8690 kmem_cache_destroy(btrfs_trans_handle_cachep);
8691 kmem_cache_destroy(btrfs_path_cachep);
8692 kmem_cache_destroy(btrfs_free_space_cachep);
8693 kmem_cache_destroy(btrfs_free_space_bitmap_cachep);
8694}
8695
8696int __init btrfs_init_cachep(void)
8697{
8698 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
8699 sizeof(struct btrfs_inode), 0,
8700 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
8701 init_once);
8702 if (!btrfs_inode_cachep)
8703 goto fail;
8704
8705 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
8706 sizeof(struct btrfs_trans_handle), 0,
8707 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
8708 if (!btrfs_trans_handle_cachep)
8709 goto fail;
8710
8711 btrfs_path_cachep = kmem_cache_create("btrfs_path",
8712 sizeof(struct btrfs_path), 0,
8713 SLAB_MEM_SPREAD, NULL);
8714 if (!btrfs_path_cachep)
8715 goto fail;
8716
8717 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
8718 sizeof(struct btrfs_free_space), 0,
8719 SLAB_MEM_SPREAD, NULL);
8720 if (!btrfs_free_space_cachep)
8721 goto fail;
8722
8723 btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
8724 PAGE_SIZE, PAGE_SIZE,
8725 SLAB_RED_ZONE, NULL);
8726 if (!btrfs_free_space_bitmap_cachep)
8727 goto fail;
8728
8729 return 0;
8730fail:
8731 btrfs_destroy_cachep();
8732 return -ENOMEM;
8733}
8734
8735static int btrfs_getattr(const struct path *path, struct kstat *stat,
8736 u32 request_mask, unsigned int flags)
8737{
8738 u64 delalloc_bytes;
8739 struct inode *inode = d_inode(path->dentry);
8740 u32 blocksize = inode->i_sb->s_blocksize;
8741 u32 bi_flags = BTRFS_I(inode)->flags;
8742
8743 stat->result_mask |= STATX_BTIME;
8744 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
8745 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
8746 if (bi_flags & BTRFS_INODE_APPEND)
8747 stat->attributes |= STATX_ATTR_APPEND;
8748 if (bi_flags & BTRFS_INODE_COMPRESS)
8749 stat->attributes |= STATX_ATTR_COMPRESSED;
8750 if (bi_flags & BTRFS_INODE_IMMUTABLE)
8751 stat->attributes |= STATX_ATTR_IMMUTABLE;
8752 if (bi_flags & BTRFS_INODE_NODUMP)
8753 stat->attributes |= STATX_ATTR_NODUMP;
8754
8755 stat->attributes_mask |= (STATX_ATTR_APPEND |
8756 STATX_ATTR_COMPRESSED |
8757 STATX_ATTR_IMMUTABLE |
8758 STATX_ATTR_NODUMP);
8759
8760 generic_fillattr(inode, stat);
8761 stat->dev = BTRFS_I(inode)->root->anon_dev;
8762
8763 spin_lock(&BTRFS_I(inode)->lock);
8764 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
8765 spin_unlock(&BTRFS_I(inode)->lock);
8766 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
8767 ALIGN(delalloc_bytes, blocksize)) >> 9;
8768 return 0;
8769}
8770
8771static int btrfs_rename_exchange(struct inode *old_dir,
8772 struct dentry *old_dentry,
8773 struct inode *new_dir,
8774 struct dentry *new_dentry)
8775{
8776 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
8777 struct btrfs_trans_handle *trans;
8778 struct btrfs_root *root = BTRFS_I(old_dir)->root;
8779 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
8780 struct inode *new_inode = new_dentry->d_inode;
8781 struct inode *old_inode = old_dentry->d_inode;
8782 struct timespec64 ctime = current_time(old_inode);
8783 struct dentry *parent;
8784 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
8785 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
8786 u64 old_idx = 0;
8787 u64 new_idx = 0;
8788 int ret;
8789 bool root_log_pinned = false;
8790 bool dest_log_pinned = false;
8791 struct btrfs_log_ctx ctx_root;
8792 struct btrfs_log_ctx ctx_dest;
8793 bool sync_log_root = false;
8794 bool sync_log_dest = false;
8795 bool commit_transaction = false;
8796
8797
8798 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
8799 return -EXDEV;
8800
8801 btrfs_init_log_ctx(&ctx_root, old_inode);
8802 btrfs_init_log_ctx(&ctx_dest, new_inode);
8803
8804
8805 if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
8806 new_ino == BTRFS_FIRST_FREE_OBJECTID)
8807 down_read(&fs_info->subvol_sem);
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817 trans = btrfs_start_transaction(root, 12);
8818 if (IS_ERR(trans)) {
8819 ret = PTR_ERR(trans);
8820 goto out_notrans;
8821 }
8822
8823 if (dest != root)
8824 btrfs_record_root_in_trans(trans, dest);
8825
8826
8827
8828
8829
8830 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
8831 if (ret)
8832 goto out_fail;
8833 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
8834 if (ret)
8835 goto out_fail;
8836
8837 BTRFS_I(old_inode)->dir_index = 0ULL;
8838 BTRFS_I(new_inode)->dir_index = 0ULL;
8839
8840
8841 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
8842
8843 btrfs_set_log_full_commit(trans);
8844 } else {
8845 btrfs_pin_log_trans(root);
8846 root_log_pinned = true;
8847 ret = btrfs_insert_inode_ref(trans, dest,
8848 new_dentry->d_name.name,
8849 new_dentry->d_name.len,
8850 old_ino,
8851 btrfs_ino(BTRFS_I(new_dir)),
8852 old_idx);
8853 if (ret)
8854 goto out_fail;
8855 }
8856
8857
8858 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
8859
8860 btrfs_set_log_full_commit(trans);
8861 } else {
8862 btrfs_pin_log_trans(dest);
8863 dest_log_pinned = true;
8864 ret = btrfs_insert_inode_ref(trans, root,
8865 old_dentry->d_name.name,
8866 old_dentry->d_name.len,
8867 new_ino,
8868 btrfs_ino(BTRFS_I(old_dir)),
8869 new_idx);
8870 if (ret)
8871 goto out_fail;
8872 }
8873
8874
8875 inode_inc_iversion(old_dir);
8876 inode_inc_iversion(new_dir);
8877 inode_inc_iversion(old_inode);
8878 inode_inc_iversion(new_inode);
8879 old_dir->i_ctime = old_dir->i_mtime = ctime;
8880 new_dir->i_ctime = new_dir->i_mtime = ctime;
8881 old_inode->i_ctime = ctime;
8882 new_inode->i_ctime = ctime;
8883
8884 if (old_dentry->d_parent != new_dentry->d_parent) {
8885 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
8886 BTRFS_I(old_inode), 1);
8887 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
8888 BTRFS_I(new_inode), 1);
8889 }
8890
8891
8892 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
8893 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
8894 } else {
8895 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
8896 BTRFS_I(old_dentry->d_inode),
8897 old_dentry->d_name.name,
8898 old_dentry->d_name.len);
8899 if (!ret)
8900 ret = btrfs_update_inode(trans, root, old_inode);
8901 }
8902 if (ret) {
8903 btrfs_abort_transaction(trans, ret);
8904 goto out_fail;
8905 }
8906
8907
8908 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
8909 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
8910 } else {
8911 ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
8912 BTRFS_I(new_dentry->d_inode),
8913 new_dentry->d_name.name,
8914 new_dentry->d_name.len);
8915 if (!ret)
8916 ret = btrfs_update_inode(trans, dest, new_inode);
8917 }
8918 if (ret) {
8919 btrfs_abort_transaction(trans, ret);
8920 goto out_fail;
8921 }
8922
8923 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
8924 new_dentry->d_name.name,
8925 new_dentry->d_name.len, 0, old_idx);
8926 if (ret) {
8927 btrfs_abort_transaction(trans, ret);
8928 goto out_fail;
8929 }
8930
8931 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
8932 old_dentry->d_name.name,
8933 old_dentry->d_name.len, 0, new_idx);
8934 if (ret) {
8935 btrfs_abort_transaction(trans, ret);
8936 goto out_fail;
8937 }
8938
8939 if (old_inode->i_nlink == 1)
8940 BTRFS_I(old_inode)->dir_index = old_idx;
8941 if (new_inode->i_nlink == 1)
8942 BTRFS_I(new_inode)->dir_index = new_idx;
8943
8944 if (root_log_pinned) {
8945 parent = new_dentry->d_parent;
8946 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
8947 BTRFS_I(old_dir), parent,
8948 false, &ctx_root);
8949 if (ret == BTRFS_NEED_LOG_SYNC)
8950 sync_log_root = true;
8951 else if (ret == BTRFS_NEED_TRANS_COMMIT)
8952 commit_transaction = true;
8953 ret = 0;
8954 btrfs_end_log_trans(root);
8955 root_log_pinned = false;
8956 }
8957 if (dest_log_pinned) {
8958 if (!commit_transaction) {
8959 parent = old_dentry->d_parent;
8960 ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
8961 BTRFS_I(new_dir), parent,
8962 false, &ctx_dest);
8963 if (ret == BTRFS_NEED_LOG_SYNC)
8964 sync_log_dest = true;
8965 else if (ret == BTRFS_NEED_TRANS_COMMIT)
8966 commit_transaction = true;
8967 ret = 0;
8968 }
8969 btrfs_end_log_trans(dest);
8970 dest_log_pinned = false;
8971 }
8972out_fail:
8973
8974
8975
8976
8977
8978
8979
8980
8981
8982
8983
8984 if (ret && (root_log_pinned || dest_log_pinned)) {
8985 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
8986 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
8987 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
8988 (new_inode &&
8989 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
8990 btrfs_set_log_full_commit(trans);
8991
8992 if (root_log_pinned) {
8993 btrfs_end_log_trans(root);
8994 root_log_pinned = false;
8995 }
8996 if (dest_log_pinned) {
8997 btrfs_end_log_trans(dest);
8998 dest_log_pinned = false;
8999 }
9000 }
9001 if (!ret && sync_log_root && !commit_transaction) {
9002 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
9003 &ctx_root);
9004 if (ret)
9005 commit_transaction = true;
9006 }
9007 if (!ret && sync_log_dest && !commit_transaction) {
9008 ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
9009 &ctx_dest);
9010 if (ret)
9011 commit_transaction = true;
9012 }
9013 if (commit_transaction) {
9014
9015
9016
9017
9018
9019
9020
9021 if (sync_log_root) {
9022 mutex_lock(&root->log_mutex);
9023 list_del_init(&ctx_root.list);
9024 mutex_unlock(&root->log_mutex);
9025 }
9026 ret = btrfs_commit_transaction(trans);
9027 } else {
9028 int ret2;
9029
9030 ret2 = btrfs_end_transaction(trans);
9031 ret = ret ? ret : ret2;
9032 }
9033out_notrans:
9034 if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
9035 old_ino == BTRFS_FIRST_FREE_OBJECTID)
9036 up_read(&fs_info->subvol_sem);
9037
9038 ASSERT(list_empty(&ctx_root.list));
9039 ASSERT(list_empty(&ctx_dest.list));
9040
9041 return ret;
9042}
9043
9044static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9045 struct btrfs_root *root,
9046 struct inode *dir,
9047 struct dentry *dentry)
9048{
9049 int ret;
9050 struct inode *inode;
9051 u64 objectid;
9052 u64 index;
9053
9054 ret = btrfs_find_free_ino(root, &objectid);
9055 if (ret)
9056 return ret;
9057
9058 inode = btrfs_new_inode(trans, root, dir,
9059 dentry->d_name.name,
9060 dentry->d_name.len,
9061 btrfs_ino(BTRFS_I(dir)),
9062 objectid,
9063 S_IFCHR | WHITEOUT_MODE,
9064 &index);
9065
9066 if (IS_ERR(inode)) {
9067 ret = PTR_ERR(inode);
9068 return ret;
9069 }
9070
9071 inode->i_op = &btrfs_special_inode_operations;
9072 init_special_inode(inode, inode->i_mode,
9073 WHITEOUT_DEV);
9074
9075 ret = btrfs_init_inode_security(trans, inode, dir,
9076 &dentry->d_name);
9077 if (ret)
9078 goto out;
9079
9080 ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9081 BTRFS_I(inode), 0, index);
9082 if (ret)
9083 goto out;
9084
9085 ret = btrfs_update_inode(trans, root, inode);
9086out:
9087 unlock_new_inode(inode);
9088 if (ret)
9089 inode_dec_link_count(inode);
9090 iput(inode);
9091
9092 return ret;
9093}
9094
9095static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9096 struct inode *new_dir, struct dentry *new_dentry,
9097 unsigned int flags)
9098{
9099 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9100 struct btrfs_trans_handle *trans;
9101 unsigned int trans_num_items;
9102 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9103 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9104 struct inode *new_inode = d_inode(new_dentry);
9105 struct inode *old_inode = d_inode(old_dentry);
9106 u64 index = 0;
9107 int ret;
9108 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9109 bool log_pinned = false;
9110 struct btrfs_log_ctx ctx;
9111 bool sync_log = false;
9112 bool commit_transaction = false;
9113
9114 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9115 return -EPERM;
9116
9117
9118 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9119 return -EXDEV;
9120
9121 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9122 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9123 return -ENOTEMPTY;
9124
9125 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9126 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9127 return -ENOTEMPTY;
9128
9129
9130
9131 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9132 new_dentry->d_name.name,
9133 new_dentry->d_name.len);
9134
9135 if (ret) {
9136 if (ret == -EEXIST) {
9137
9138
9139 if (WARN_ON(!new_inode)) {
9140 return ret;
9141 }
9142 } else {
9143
9144 return ret;
9145 }
9146 }
9147 ret = 0;
9148
9149
9150
9151
9152
9153 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9154 filemap_flush(old_inode->i_mapping);
9155
9156
9157 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9158 down_read(&fs_info->subvol_sem);
9159
9160
9161
9162
9163
9164
9165
9166
9167
9168
9169
9170 trans_num_items = 11;
9171 if (flags & RENAME_WHITEOUT)
9172 trans_num_items += 5;
9173 trans = btrfs_start_transaction(root, trans_num_items);
9174 if (IS_ERR(trans)) {
9175 ret = PTR_ERR(trans);
9176 goto out_notrans;
9177 }
9178
9179 if (dest != root)
9180 btrfs_record_root_in_trans(trans, dest);
9181
9182 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9183 if (ret)
9184 goto out_fail;
9185
9186 BTRFS_I(old_inode)->dir_index = 0ULL;
9187 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9188
9189 btrfs_set_log_full_commit(trans);
9190 } else {
9191 btrfs_pin_log_trans(root);
9192 log_pinned = true;
9193 ret = btrfs_insert_inode_ref(trans, dest,
9194 new_dentry->d_name.name,
9195 new_dentry->d_name.len,
9196 old_ino,
9197 btrfs_ino(BTRFS_I(new_dir)), index);
9198 if (ret)
9199 goto out_fail;
9200 }
9201
9202 inode_inc_iversion(old_dir);
9203 inode_inc_iversion(new_dir);
9204 inode_inc_iversion(old_inode);
9205 old_dir->i_ctime = old_dir->i_mtime =
9206 new_dir->i_ctime = new_dir->i_mtime =
9207 old_inode->i_ctime = current_time(old_dir);
9208
9209 if (old_dentry->d_parent != new_dentry->d_parent)
9210 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9211 BTRFS_I(old_inode), 1);
9212
9213 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9214 ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
9215 } else {
9216 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9217 BTRFS_I(d_inode(old_dentry)),
9218 old_dentry->d_name.name,
9219 old_dentry->d_name.len);
9220 if (!ret)
9221 ret = btrfs_update_inode(trans, root, old_inode);
9222 }
9223 if (ret) {
9224 btrfs_abort_transaction(trans, ret);
9225 goto out_fail;
9226 }
9227
9228 if (new_inode) {
9229 inode_inc_iversion(new_inode);
9230 new_inode->i_ctime = current_time(new_inode);
9231 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9232 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9233 ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
9234 BUG_ON(new_inode->i_nlink == 0);
9235 } else {
9236 ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9237 BTRFS_I(d_inode(new_dentry)),
9238 new_dentry->d_name.name,
9239 new_dentry->d_name.len);
9240 }
9241 if (!ret && new_inode->i_nlink == 0)
9242 ret = btrfs_orphan_add(trans,
9243 BTRFS_I(d_inode(new_dentry)));
9244 if (ret) {
9245 btrfs_abort_transaction(trans, ret);
9246 goto out_fail;
9247 }
9248 }
9249
9250 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9251 new_dentry->d_name.name,
9252 new_dentry->d_name.len, 0, index);
9253 if (ret) {
9254 btrfs_abort_transaction(trans, ret);
9255 goto out_fail;
9256 }
9257
9258 if (old_inode->i_nlink == 1)
9259 BTRFS_I(old_inode)->dir_index = index;
9260
9261 if (log_pinned) {
9262 struct dentry *parent = new_dentry->d_parent;
9263
9264 btrfs_init_log_ctx(&ctx, old_inode);
9265 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9266 BTRFS_I(old_dir), parent,
9267 false, &ctx);
9268 if (ret == BTRFS_NEED_LOG_SYNC)
9269 sync_log = true;
9270 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9271 commit_transaction = true;
9272 ret = 0;
9273 btrfs_end_log_trans(root);
9274 log_pinned = false;
9275 }
9276
9277 if (flags & RENAME_WHITEOUT) {
9278 ret = btrfs_whiteout_for_rename(trans, root, old_dir,
9279 old_dentry);
9280
9281 if (ret) {
9282 btrfs_abort_transaction(trans, ret);
9283 goto out_fail;
9284 }
9285 }
9286out_fail:
9287
9288
9289
9290
9291
9292
9293
9294
9295
9296
9297
9298 if (ret && log_pinned) {
9299 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9300 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9301 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9302 (new_inode &&
9303 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9304 btrfs_set_log_full_commit(trans);
9305
9306 btrfs_end_log_trans(root);
9307 log_pinned = false;
9308 }
9309 if (!ret && sync_log) {
9310 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
9311 if (ret)
9312 commit_transaction = true;
9313 } else if (sync_log) {
9314 mutex_lock(&root->log_mutex);
9315 list_del(&ctx.list);
9316 mutex_unlock(&root->log_mutex);
9317 }
9318 if (commit_transaction) {
9319 ret = btrfs_commit_transaction(trans);
9320 } else {
9321 int ret2;
9322
9323 ret2 = btrfs_end_transaction(trans);
9324 ret = ret ? ret : ret2;
9325 }
9326out_notrans:
9327 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9328 up_read(&fs_info->subvol_sem);
9329
9330 return ret;
9331}
9332
9333static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
9334 struct inode *new_dir, struct dentry *new_dentry,
9335 unsigned int flags)
9336{
9337 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
9338 return -EINVAL;
9339
9340 if (flags & RENAME_EXCHANGE)
9341 return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9342 new_dentry);
9343
9344 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
9345}
9346
9347struct btrfs_delalloc_work {
9348 struct inode *inode;
9349 struct completion completion;
9350 struct list_head list;
9351 struct btrfs_work work;
9352};
9353
9354static void btrfs_run_delalloc_work(struct btrfs_work *work)
9355{
9356 struct btrfs_delalloc_work *delalloc_work;
9357 struct inode *inode;
9358
9359 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9360 work);
9361 inode = delalloc_work->inode;
9362 filemap_flush(inode->i_mapping);
9363 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9364 &BTRFS_I(inode)->runtime_flags))
9365 filemap_flush(inode->i_mapping);
9366
9367 iput(inode);
9368 complete(&delalloc_work->completion);
9369}
9370
9371static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
9372{
9373 struct btrfs_delalloc_work *work;
9374
9375 work = kmalloc(sizeof(*work), GFP_NOFS);
9376 if (!work)
9377 return NULL;
9378
9379 init_completion(&work->completion);
9380 INIT_LIST_HEAD(&work->list);
9381 work->inode = inode;
9382 btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
9383
9384 return work;
9385}
9386
9387
9388
9389
9390
9391static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot)
9392{
9393 struct btrfs_inode *binode;
9394 struct inode *inode;
9395 struct btrfs_delalloc_work *work, *next;
9396 struct list_head works;
9397 struct list_head splice;
9398 int ret = 0;
9399
9400 INIT_LIST_HEAD(&works);
9401 INIT_LIST_HEAD(&splice);
9402
9403 mutex_lock(&root->delalloc_mutex);
9404 spin_lock(&root->delalloc_lock);
9405 list_splice_init(&root->delalloc_inodes, &splice);
9406 while (!list_empty(&splice)) {
9407 binode = list_entry(splice.next, struct btrfs_inode,
9408 delalloc_inodes);
9409
9410 list_move_tail(&binode->delalloc_inodes,
9411 &root->delalloc_inodes);
9412 inode = igrab(&binode->vfs_inode);
9413 if (!inode) {
9414 cond_resched_lock(&root->delalloc_lock);
9415 continue;
9416 }
9417 spin_unlock(&root->delalloc_lock);
9418
9419 if (snapshot)
9420 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
9421 &binode->runtime_flags);
9422 work = btrfs_alloc_delalloc_work(inode);
9423 if (!work) {
9424 iput(inode);
9425 ret = -ENOMEM;
9426 goto out;
9427 }
9428 list_add_tail(&work->list, &works);
9429 btrfs_queue_work(root->fs_info->flush_workers,
9430 &work->work);
9431 ret++;
9432 if (nr != -1 && ret >= nr)
9433 goto out;
9434 cond_resched();
9435 spin_lock(&root->delalloc_lock);
9436 }
9437 spin_unlock(&root->delalloc_lock);
9438
9439out:
9440 list_for_each_entry_safe(work, next, &works, list) {
9441 list_del_init(&work->list);
9442 wait_for_completion(&work->completion);
9443 kfree(work);
9444 }
9445
9446 if (!list_empty(&splice)) {
9447 spin_lock(&root->delalloc_lock);
9448 list_splice_tail(&splice, &root->delalloc_inodes);
9449 spin_unlock(&root->delalloc_lock);
9450 }
9451 mutex_unlock(&root->delalloc_mutex);
9452 return ret;
9453}
9454
9455int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
9456{
9457 struct btrfs_fs_info *fs_info = root->fs_info;
9458 int ret;
9459
9460 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
9461 return -EROFS;
9462
9463 ret = start_delalloc_inodes(root, -1, true);
9464 if (ret > 0)
9465 ret = 0;
9466 return ret;
9467}
9468
9469int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
9470{
9471 struct btrfs_root *root;
9472 struct list_head splice;
9473 int ret;
9474
9475 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
9476 return -EROFS;
9477
9478 INIT_LIST_HEAD(&splice);
9479
9480 mutex_lock(&fs_info->delalloc_root_mutex);
9481 spin_lock(&fs_info->delalloc_root_lock);
9482 list_splice_init(&fs_info->delalloc_roots, &splice);
9483 while (!list_empty(&splice) && nr) {
9484 root = list_first_entry(&splice, struct btrfs_root,
9485 delalloc_root);
9486 root = btrfs_grab_root(root);
9487 BUG_ON(!root);
9488 list_move_tail(&root->delalloc_root,
9489 &fs_info->delalloc_roots);
9490 spin_unlock(&fs_info->delalloc_root_lock);
9491
9492 ret = start_delalloc_inodes(root, nr, false);
9493 btrfs_put_root(root);
9494 if (ret < 0)
9495 goto out;
9496
9497 if (nr != -1) {
9498 nr -= ret;
9499 WARN_ON(nr < 0);
9500 }
9501 spin_lock(&fs_info->delalloc_root_lock);
9502 }
9503 spin_unlock(&fs_info->delalloc_root_lock);
9504
9505 ret = 0;
9506out:
9507 if (!list_empty(&splice)) {
9508 spin_lock(&fs_info->delalloc_root_lock);
9509 list_splice_tail(&splice, &fs_info->delalloc_roots);
9510 spin_unlock(&fs_info->delalloc_root_lock);
9511 }
9512 mutex_unlock(&fs_info->delalloc_root_mutex);
9513 return ret;
9514}
9515
9516static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9517 const char *symname)
9518{
9519 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
9520 struct btrfs_trans_handle *trans;
9521 struct btrfs_root *root = BTRFS_I(dir)->root;
9522 struct btrfs_path *path;
9523 struct btrfs_key key;
9524 struct inode *inode = NULL;
9525 int err;
9526 u64 objectid;
9527 u64 index = 0;
9528 int name_len;
9529 int datasize;
9530 unsigned long ptr;
9531 struct btrfs_file_extent_item *ei;
9532 struct extent_buffer *leaf;
9533
9534 name_len = strlen(symname);
9535 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
9536 return -ENAMETOOLONG;
9537
9538
9539
9540
9541
9542
9543
9544
9545 trans = btrfs_start_transaction(root, 7);
9546 if (IS_ERR(trans))
9547 return PTR_ERR(trans);
9548
9549 err = btrfs_find_free_ino(root, &objectid);
9550 if (err)
9551 goto out_unlock;
9552
9553 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
9554 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
9555 objectid, S_IFLNK|S_IRWXUGO, &index);
9556 if (IS_ERR(inode)) {
9557 err = PTR_ERR(inode);
9558 inode = NULL;
9559 goto out_unlock;
9560 }
9561
9562
9563
9564
9565
9566
9567
9568 inode->i_fop = &btrfs_file_operations;
9569 inode->i_op = &btrfs_file_inode_operations;
9570 inode->i_mapping->a_ops = &btrfs_aops;
9571 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9572
9573 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
9574 if (err)
9575 goto out_unlock;
9576
9577 path = btrfs_alloc_path();
9578 if (!path) {
9579 err = -ENOMEM;
9580 goto out_unlock;
9581 }
9582 key.objectid = btrfs_ino(BTRFS_I(inode));
9583 key.offset = 0;
9584 key.type = BTRFS_EXTENT_DATA_KEY;
9585 datasize = btrfs_file_extent_calc_inline_size(name_len);
9586 err = btrfs_insert_empty_item(trans, root, path, &key,
9587 datasize);
9588 if (err) {
9589 btrfs_free_path(path);
9590 goto out_unlock;
9591 }
9592 leaf = path->nodes[0];
9593 ei = btrfs_item_ptr(leaf, path->slots[0],
9594 struct btrfs_file_extent_item);
9595 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
9596 btrfs_set_file_extent_type(leaf, ei,
9597 BTRFS_FILE_EXTENT_INLINE);
9598 btrfs_set_file_extent_encryption(leaf, ei, 0);
9599 btrfs_set_file_extent_compression(leaf, ei, 0);
9600 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
9601 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
9602
9603 ptr = btrfs_file_extent_inline_start(ei);
9604 write_extent_buffer(leaf, symname, ptr, name_len);
9605 btrfs_mark_buffer_dirty(leaf);
9606 btrfs_free_path(path);
9607
9608 inode->i_op = &btrfs_symlink_inode_operations;
9609 inode_nohighmem(inode);
9610 inode_set_bytes(inode, name_len);
9611 btrfs_i_size_write(BTRFS_I(inode), name_len);
9612 err = btrfs_update_inode(trans, root, inode);
9613
9614
9615
9616
9617
9618 if (!err)
9619 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9620 BTRFS_I(inode), 0, index);
9621 if (err)
9622 goto out_unlock;
9623
9624 d_instantiate_new(dentry, inode);
9625
9626out_unlock:
9627 btrfs_end_transaction(trans);
9628 if (err && inode) {
9629 inode_dec_link_count(inode);
9630 discard_new_inode(inode);
9631 }
9632 btrfs_btree_balance_dirty(fs_info);
9633 return err;
9634}
9635
9636static int insert_prealloc_file_extent(struct btrfs_trans_handle *trans,
9637 struct inode *inode, struct btrfs_key *ins,
9638 u64 file_offset)
9639{
9640 struct btrfs_file_extent_item stack_fi;
9641 u64 start = ins->objectid;
9642 u64 len = ins->offset;
9643 int ret;
9644
9645 memset(&stack_fi, 0, sizeof(stack_fi));
9646
9647 btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
9648 btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
9649 btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
9650 btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
9651 btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
9652 btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
9653
9654
9655 ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
9656 if (ret < 0)
9657 return ret;
9658 return insert_reserved_file_extent(trans, BTRFS_I(inode), file_offset,
9659 &stack_fi, ret);
9660}
9661static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9662 u64 start, u64 num_bytes, u64 min_size,
9663 loff_t actual_len, u64 *alloc_hint,
9664 struct btrfs_trans_handle *trans)
9665{
9666 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9667 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
9668 struct extent_map *em;
9669 struct btrfs_root *root = BTRFS_I(inode)->root;
9670 struct btrfs_key ins;
9671 u64 cur_offset = start;
9672 u64 clear_offset = start;
9673 u64 i_size;
9674 u64 cur_bytes;
9675 u64 last_alloc = (u64)-1;
9676 int ret = 0;
9677 bool own_trans = true;
9678 u64 end = start + num_bytes - 1;
9679
9680 if (trans)
9681 own_trans = false;
9682 while (num_bytes > 0) {
9683 if (own_trans) {
9684 trans = btrfs_start_transaction(root, 3);
9685 if (IS_ERR(trans)) {
9686 ret = PTR_ERR(trans);
9687 break;
9688 }
9689 }
9690
9691 cur_bytes = min_t(u64, num_bytes, SZ_256M);
9692 cur_bytes = max(cur_bytes, min_size);
9693
9694
9695
9696
9697
9698
9699 cur_bytes = min(cur_bytes, last_alloc);
9700 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
9701 min_size, 0, *alloc_hint, &ins, 1, 0);
9702 if (ret) {
9703 if (own_trans)
9704 btrfs_end_transaction(trans);
9705 break;
9706 }
9707
9708
9709
9710
9711
9712
9713
9714
9715 clear_offset += ins.offset;
9716 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
9717
9718 last_alloc = ins.offset;
9719 ret = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
9720 if (ret) {
9721 btrfs_free_reserved_extent(fs_info, ins.objectid,
9722 ins.offset, 0);
9723 btrfs_abort_transaction(trans, ret);
9724 if (own_trans)
9725 btrfs_end_transaction(trans);
9726 break;
9727 }
9728
9729 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
9730 cur_offset + ins.offset -1, 0);
9731
9732 em = alloc_extent_map();
9733 if (!em) {
9734 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
9735 &BTRFS_I(inode)->runtime_flags);
9736 goto next;
9737 }
9738
9739 em->start = cur_offset;
9740 em->orig_start = cur_offset;
9741 em->len = ins.offset;
9742 em->block_start = ins.objectid;
9743 em->block_len = ins.offset;
9744 em->orig_block_len = ins.offset;
9745 em->ram_bytes = ins.offset;
9746 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
9747 em->generation = trans->transid;
9748
9749 while (1) {
9750 write_lock(&em_tree->lock);
9751 ret = add_extent_mapping(em_tree, em, 1);
9752 write_unlock(&em_tree->lock);
9753 if (ret != -EEXIST)
9754 break;
9755 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
9756 cur_offset + ins.offset - 1,
9757 0);
9758 }
9759 free_extent_map(em);
9760next:
9761 num_bytes -= ins.offset;
9762 cur_offset += ins.offset;
9763 *alloc_hint = ins.objectid + ins.offset;
9764
9765 inode_inc_iversion(inode);
9766 inode->i_ctime = current_time(inode);
9767 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
9768 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
9769 (actual_len > inode->i_size) &&
9770 (cur_offset > inode->i_size)) {
9771 if (cur_offset > actual_len)
9772 i_size = actual_len;
9773 else
9774 i_size = cur_offset;
9775 i_size_write(inode, i_size);
9776 btrfs_inode_safe_disk_i_size_write(inode, 0);
9777 }
9778
9779 ret = btrfs_update_inode(trans, root, inode);
9780
9781 if (ret) {
9782 btrfs_abort_transaction(trans, ret);
9783 if (own_trans)
9784 btrfs_end_transaction(trans);
9785 break;
9786 }
9787
9788 if (own_trans)
9789 btrfs_end_transaction(trans);
9790 }
9791 if (clear_offset < end)
9792 btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
9793 end - clear_offset + 1);
9794 return ret;
9795}
9796
9797int btrfs_prealloc_file_range(struct inode *inode, int mode,
9798 u64 start, u64 num_bytes, u64 min_size,
9799 loff_t actual_len, u64 *alloc_hint)
9800{
9801 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
9802 min_size, actual_len, alloc_hint,
9803 NULL);
9804}
9805
9806int btrfs_prealloc_file_range_trans(struct inode *inode,
9807 struct btrfs_trans_handle *trans, int mode,
9808 u64 start, u64 num_bytes, u64 min_size,
9809 loff_t actual_len, u64 *alloc_hint)
9810{
9811 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
9812 min_size, actual_len, alloc_hint, trans);
9813}
9814
9815static int btrfs_set_page_dirty(struct page *page)
9816{
9817 return __set_page_dirty_nobuffers(page);
9818}
9819
9820static int btrfs_permission(struct inode *inode, int mask)
9821{
9822 struct btrfs_root *root = BTRFS_I(inode)->root;
9823 umode_t mode = inode->i_mode;
9824
9825 if (mask & MAY_WRITE &&
9826 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
9827 if (btrfs_root_readonly(root))
9828 return -EROFS;
9829 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
9830 return -EACCES;
9831 }
9832 return generic_permission(inode, mask);
9833}
9834
9835static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
9836{
9837 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
9838 struct btrfs_trans_handle *trans;
9839 struct btrfs_root *root = BTRFS_I(dir)->root;
9840 struct inode *inode = NULL;
9841 u64 objectid;
9842 u64 index;
9843 int ret = 0;
9844
9845
9846
9847
9848 trans = btrfs_start_transaction(root, 5);
9849 if (IS_ERR(trans))
9850 return PTR_ERR(trans);
9851
9852 ret = btrfs_find_free_ino(root, &objectid);
9853 if (ret)
9854 goto out;
9855
9856 inode = btrfs_new_inode(trans, root, dir, NULL, 0,
9857 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
9858 if (IS_ERR(inode)) {
9859 ret = PTR_ERR(inode);
9860 inode = NULL;
9861 goto out;
9862 }
9863
9864 inode->i_fop = &btrfs_file_operations;
9865 inode->i_op = &btrfs_file_inode_operations;
9866
9867 inode->i_mapping->a_ops = &btrfs_aops;
9868 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9869
9870 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
9871 if (ret)
9872 goto out;
9873
9874 ret = btrfs_update_inode(trans, root, inode);
9875 if (ret)
9876 goto out;
9877 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
9878 if (ret)
9879 goto out;
9880
9881
9882
9883
9884
9885
9886
9887
9888 set_nlink(inode, 1);
9889 d_tmpfile(dentry, inode);
9890 unlock_new_inode(inode);
9891 mark_inode_dirty(inode);
9892out:
9893 btrfs_end_transaction(trans);
9894 if (ret && inode)
9895 discard_new_inode(inode);
9896 btrfs_btree_balance_dirty(fs_info);
9897 return ret;
9898}
9899
9900void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
9901{
9902 struct inode *inode = tree->private_data;
9903 unsigned long index = start >> PAGE_SHIFT;
9904 unsigned long end_index = end >> PAGE_SHIFT;
9905 struct page *page;
9906
9907 while (index <= end_index) {
9908 page = find_get_page(inode->i_mapping, index);
9909 ASSERT(page);
9910 set_page_writeback(page);
9911 put_page(page);
9912 index++;
9913 }
9914}
9915
9916#ifdef CONFIG_SWAP
9917
9918
9919
9920
9921
9922static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
9923 bool is_block_group)
9924{
9925 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
9926 struct btrfs_swapfile_pin *sp, *entry;
9927 struct rb_node **p;
9928 struct rb_node *parent = NULL;
9929
9930 sp = kmalloc(sizeof(*sp), GFP_NOFS);
9931 if (!sp)
9932 return -ENOMEM;
9933 sp->ptr = ptr;
9934 sp->inode = inode;
9935 sp->is_block_group = is_block_group;
9936
9937 spin_lock(&fs_info->swapfile_pins_lock);
9938 p = &fs_info->swapfile_pins.rb_node;
9939 while (*p) {
9940 parent = *p;
9941 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
9942 if (sp->ptr < entry->ptr ||
9943 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
9944 p = &(*p)->rb_left;
9945 } else if (sp->ptr > entry->ptr ||
9946 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
9947 p = &(*p)->rb_right;
9948 } else {
9949 spin_unlock(&fs_info->swapfile_pins_lock);
9950 kfree(sp);
9951 return 1;
9952 }
9953 }
9954 rb_link_node(&sp->node, parent, p);
9955 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
9956 spin_unlock(&fs_info->swapfile_pins_lock);
9957 return 0;
9958}
9959
9960
9961static void btrfs_free_swapfile_pins(struct inode *inode)
9962{
9963 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
9964 struct btrfs_swapfile_pin *sp;
9965 struct rb_node *node, *next;
9966
9967 spin_lock(&fs_info->swapfile_pins_lock);
9968 node = rb_first(&fs_info->swapfile_pins);
9969 while (node) {
9970 next = rb_next(node);
9971 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
9972 if (sp->inode == inode) {
9973 rb_erase(&sp->node, &fs_info->swapfile_pins);
9974 if (sp->is_block_group)
9975 btrfs_put_block_group(sp->ptr);
9976 kfree(sp);
9977 }
9978 node = next;
9979 }
9980 spin_unlock(&fs_info->swapfile_pins_lock);
9981}
9982
9983struct btrfs_swap_info {
9984 u64 start;
9985 u64 block_start;
9986 u64 block_len;
9987 u64 lowest_ppage;
9988 u64 highest_ppage;
9989 unsigned long nr_pages;
9990 int nr_extents;
9991};
9992
9993static int btrfs_add_swap_extent(struct swap_info_struct *sis,
9994 struct btrfs_swap_info *bsi)
9995{
9996 unsigned long nr_pages;
9997 u64 first_ppage, first_ppage_reported, next_ppage;
9998 int ret;
9999
10000 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
10001 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
10002 PAGE_SIZE) >> PAGE_SHIFT;
10003
10004 if (first_ppage >= next_ppage)
10005 return 0;
10006 nr_pages = next_ppage - first_ppage;
10007
10008 first_ppage_reported = first_ppage;
10009 if (bsi->start == 0)
10010 first_ppage_reported++;
10011 if (bsi->lowest_ppage > first_ppage_reported)
10012 bsi->lowest_ppage = first_ppage_reported;
10013 if (bsi->highest_ppage < (next_ppage - 1))
10014 bsi->highest_ppage = next_ppage - 1;
10015
10016 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
10017 if (ret < 0)
10018 return ret;
10019 bsi->nr_extents += ret;
10020 bsi->nr_pages += nr_pages;
10021 return 0;
10022}
10023
10024static void btrfs_swap_deactivate(struct file *file)
10025{
10026 struct inode *inode = file_inode(file);
10027
10028 btrfs_free_swapfile_pins(inode);
10029 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
10030}
10031
10032static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10033 sector_t *span)
10034{
10035 struct inode *inode = file_inode(file);
10036 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10037 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
10038 struct extent_state *cached_state = NULL;
10039 struct extent_map *em = NULL;
10040 struct btrfs_device *device = NULL;
10041 struct btrfs_swap_info bsi = {
10042 .lowest_ppage = (sector_t)-1ULL,
10043 };
10044 int ret = 0;
10045 u64 isize;
10046 u64 start;
10047
10048
10049
10050
10051
10052
10053 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
10054 if (ret)
10055 return ret;
10056
10057
10058
10059
10060 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
10061 btrfs_warn(fs_info, "swapfile must not be compressed");
10062 return -EINVAL;
10063 }
10064 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
10065 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
10066 return -EINVAL;
10067 }
10068 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
10069 btrfs_warn(fs_info, "swapfile must not be checksummed");
10070 return -EINVAL;
10071 }
10072
10073
10074
10075
10076
10077
10078
10079
10080
10081
10082 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
10083 btrfs_warn(fs_info,
10084 "cannot activate swapfile while exclusive operation is running");
10085 return -EBUSY;
10086 }
10087
10088
10089
10090
10091
10092
10093 atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
10094
10095 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
10096
10097 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
10098 start = 0;
10099 while (start < isize) {
10100 u64 logical_block_start, physical_block_start;
10101 struct btrfs_block_group *bg;
10102 u64 len = isize - start;
10103
10104 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
10105 if (IS_ERR(em)) {
10106 ret = PTR_ERR(em);
10107 goto out;
10108 }
10109
10110 if (em->block_start == EXTENT_MAP_HOLE) {
10111 btrfs_warn(fs_info, "swapfile must not have holes");
10112 ret = -EINVAL;
10113 goto out;
10114 }
10115 if (em->block_start == EXTENT_MAP_INLINE) {
10116
10117
10118
10119
10120
10121
10122
10123 btrfs_warn(fs_info, "swapfile must not be inline");
10124 ret = -EINVAL;
10125 goto out;
10126 }
10127 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10128 btrfs_warn(fs_info, "swapfile must not be compressed");
10129 ret = -EINVAL;
10130 goto out;
10131 }
10132
10133 logical_block_start = em->block_start + (start - em->start);
10134 len = min(len, em->len - (start - em->start));
10135 free_extent_map(em);
10136 em = NULL;
10137
10138 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL, true);
10139 if (ret < 0) {
10140 goto out;
10141 } else if (ret) {
10142 ret = 0;
10143 } else {
10144 btrfs_warn(fs_info,
10145 "swapfile must not be copy-on-write");
10146 ret = -EINVAL;
10147 goto out;
10148 }
10149
10150 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
10151 if (IS_ERR(em)) {
10152 ret = PTR_ERR(em);
10153 goto out;
10154 }
10155
10156 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
10157 btrfs_warn(fs_info,
10158 "swapfile must have single data profile");
10159 ret = -EINVAL;
10160 goto out;
10161 }
10162
10163 if (device == NULL) {
10164 device = em->map_lookup->stripes[0].dev;
10165 ret = btrfs_add_swapfile_pin(inode, device, false);
10166 if (ret == 1)
10167 ret = 0;
10168 else if (ret)
10169 goto out;
10170 } else if (device != em->map_lookup->stripes[0].dev) {
10171 btrfs_warn(fs_info, "swapfile must be on one device");
10172 ret = -EINVAL;
10173 goto out;
10174 }
10175
10176 physical_block_start = (em->map_lookup->stripes[0].physical +
10177 (logical_block_start - em->start));
10178 len = min(len, em->len - (logical_block_start - em->start));
10179 free_extent_map(em);
10180 em = NULL;
10181
10182 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
10183 if (!bg) {
10184 btrfs_warn(fs_info,
10185 "could not find block group containing swapfile");
10186 ret = -EINVAL;
10187 goto out;
10188 }
10189
10190 ret = btrfs_add_swapfile_pin(inode, bg, true);
10191 if (ret) {
10192 btrfs_put_block_group(bg);
10193 if (ret == 1)
10194 ret = 0;
10195 else
10196 goto out;
10197 }
10198
10199 if (bsi.block_len &&
10200 bsi.block_start + bsi.block_len == physical_block_start) {
10201 bsi.block_len += len;
10202 } else {
10203 if (bsi.block_len) {
10204 ret = btrfs_add_swap_extent(sis, &bsi);
10205 if (ret)
10206 goto out;
10207 }
10208 bsi.start = start;
10209 bsi.block_start = physical_block_start;
10210 bsi.block_len = len;
10211 }
10212
10213 start += len;
10214 }
10215
10216 if (bsi.block_len)
10217 ret = btrfs_add_swap_extent(sis, &bsi);
10218
10219out:
10220 if (!IS_ERR_OR_NULL(em))
10221 free_extent_map(em);
10222
10223 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
10224
10225 if (ret)
10226 btrfs_swap_deactivate(file);
10227
10228 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
10229
10230 if (ret)
10231 return ret;
10232
10233 if (device)
10234 sis->bdev = device->bdev;
10235 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
10236 sis->max = bsi.nr_pages;
10237 sis->pages = bsi.nr_pages - 1;
10238 sis->highest_bit = bsi.nr_pages - 1;
10239 return bsi.nr_extents;
10240}
10241#else
10242static void btrfs_swap_deactivate(struct file *file)
10243{
10244}
10245
10246static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10247 sector_t *span)
10248{
10249 return -EOPNOTSUPP;
10250}
10251#endif
10252
10253static const struct inode_operations btrfs_dir_inode_operations = {
10254 .getattr = btrfs_getattr,
10255 .lookup = btrfs_lookup,
10256 .create = btrfs_create,
10257 .unlink = btrfs_unlink,
10258 .link = btrfs_link,
10259 .mkdir = btrfs_mkdir,
10260 .rmdir = btrfs_rmdir,
10261 .rename = btrfs_rename2,
10262 .symlink = btrfs_symlink,
10263 .setattr = btrfs_setattr,
10264 .mknod = btrfs_mknod,
10265 .listxattr = btrfs_listxattr,
10266 .permission = btrfs_permission,
10267 .get_acl = btrfs_get_acl,
10268 .set_acl = btrfs_set_acl,
10269 .update_time = btrfs_update_time,
10270 .tmpfile = btrfs_tmpfile,
10271};
10272
10273static const struct file_operations btrfs_dir_file_operations = {
10274 .llseek = generic_file_llseek,
10275 .read = generic_read_dir,
10276 .iterate_shared = btrfs_real_readdir,
10277 .open = btrfs_opendir,
10278 .unlocked_ioctl = btrfs_ioctl,
10279#ifdef CONFIG_COMPAT
10280 .compat_ioctl = btrfs_compat_ioctl,
10281#endif
10282 .release = btrfs_release_file,
10283 .fsync = btrfs_sync_file,
10284};
10285
10286static const struct extent_io_ops btrfs_extent_io_ops = {
10287
10288 .submit_bio_hook = btrfs_submit_bio_hook,
10289 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10290};
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300
10301
10302
10303
10304static const struct address_space_operations btrfs_aops = {
10305 .readpage = btrfs_readpage,
10306 .writepage = btrfs_writepage,
10307 .writepages = btrfs_writepages,
10308 .readahead = btrfs_readahead,
10309 .direct_IO = btrfs_direct_IO,
10310 .invalidatepage = btrfs_invalidatepage,
10311 .releasepage = btrfs_releasepage,
10312#ifdef CONFIG_MIGRATION
10313 .migratepage = btrfs_migratepage,
10314#endif
10315 .set_page_dirty = btrfs_set_page_dirty,
10316 .error_remove_page = generic_error_remove_page,
10317 .swap_activate = btrfs_swap_activate,
10318 .swap_deactivate = btrfs_swap_deactivate,
10319};
10320
10321static const struct inode_operations btrfs_file_inode_operations = {
10322 .getattr = btrfs_getattr,
10323 .setattr = btrfs_setattr,
10324 .listxattr = btrfs_listxattr,
10325 .permission = btrfs_permission,
10326 .fiemap = btrfs_fiemap,
10327 .get_acl = btrfs_get_acl,
10328 .set_acl = btrfs_set_acl,
10329 .update_time = btrfs_update_time,
10330};
10331static const struct inode_operations btrfs_special_inode_operations = {
10332 .getattr = btrfs_getattr,
10333 .setattr = btrfs_setattr,
10334 .permission = btrfs_permission,
10335 .listxattr = btrfs_listxattr,
10336 .get_acl = btrfs_get_acl,
10337 .set_acl = btrfs_set_acl,
10338 .update_time = btrfs_update_time,
10339};
10340static const struct inode_operations btrfs_symlink_inode_operations = {
10341 .get_link = page_get_link,
10342 .getattr = btrfs_getattr,
10343 .setattr = btrfs_setattr,
10344 .permission = btrfs_permission,
10345 .listxattr = btrfs_listxattr,
10346 .update_time = btrfs_update_time,
10347};
10348
10349const struct dentry_operations btrfs_dentry_operations = {
10350 .d_delete = btrfs_dentry_delete,
10351};
10352