1
2
3
4
5
6#include <linux/kernel.h>
7#include <linux/bio.h>
8#include <linux/buffer_head.h>
9#include <linux/file.h>
10#include <linux/fs.h>
11#include <linux/pagemap.h>
12#include <linux/highmem.h>
13#include <linux/time.h>
14#include <linux/init.h>
15#include <linux/string.h>
16#include <linux/backing-dev.h>
17#include <linux/writeback.h>
18#include <linux/compat.h>
19#include <linux/xattr.h>
20#include <linux/posix_acl.h>
21#include <linux/falloc.h>
22#include <linux/slab.h>
23#include <linux/ratelimit.h>
24#include <linux/btrfs.h>
25#include <linux/blkdev.h>
26#include <linux/posix_acl_xattr.h>
27#include <linux/uio.h>
28#include <linux/magic.h>
29#include <linux/iversion.h>
30#include <linux/swap.h>
31#include <linux/sched/mm.h>
32#include <asm/unaligned.h>
33#include "ctree.h"
34#include "disk-io.h"
35#include "transaction.h"
36#include "btrfs_inode.h"
37#include "print-tree.h"
38#include "ordered-data.h"
39#include "xattr.h"
40#include "tree-log.h"
41#include "volumes.h"
42#include "compression.h"
43#include "locking.h"
44#include "free-space-cache.h"
45#include "inode-map.h"
46#include "backref.h"
47#include "props.h"
48#include "qgroup.h"
49#include "dedupe.h"
50
51struct btrfs_iget_args {
52 struct btrfs_key *location;
53 struct btrfs_root *root;
54};
55
56struct btrfs_dio_data {
57 u64 reserve;
58 u64 unsubmitted_oe_range_start;
59 u64 unsubmitted_oe_range_end;
60 int overwrite;
61};
62
63static const struct inode_operations btrfs_dir_inode_operations;
64static const struct inode_operations btrfs_symlink_inode_operations;
65static const struct inode_operations btrfs_dir_ro_inode_operations;
66static const struct inode_operations btrfs_special_inode_operations;
67static const struct inode_operations btrfs_file_inode_operations;
68static const struct address_space_operations btrfs_aops;
69static const struct file_operations btrfs_dir_file_operations;
70static const struct extent_io_ops btrfs_extent_io_ops;
71
72static struct kmem_cache *btrfs_inode_cachep;
73struct kmem_cache *btrfs_trans_handle_cachep;
74struct kmem_cache *btrfs_path_cachep;
75struct kmem_cache *btrfs_free_space_cachep;
76
77static int btrfs_setsize(struct inode *inode, struct iattr *attr);
78static int btrfs_truncate(struct inode *inode, bool skip_writeback);
79static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
80static noinline int cow_file_range(struct inode *inode,
81 struct page *locked_page,
82 u64 start, u64 end, u64 delalloc_end,
83 int *page_started, unsigned long *nr_written,
84 int unlock, struct btrfs_dedupe_hash *hash);
85static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
86 u64 orig_start, u64 block_start,
87 u64 block_len, u64 orig_block_len,
88 u64 ram_bytes, int compress_type,
89 int type);
90
91static void __endio_write_update_ordered(struct inode *inode,
92 const u64 offset, const u64 bytes,
93 const bool uptodate);
94
95
96
97
98
99
100
101
102
103
104
105static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
106 struct page *locked_page,
107 u64 offset, u64 bytes)
108{
109 unsigned long index = offset >> PAGE_SHIFT;
110 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
111 u64 page_start = page_offset(locked_page);
112 u64 page_end = page_start + PAGE_SIZE - 1;
113
114 struct page *page;
115
116 while (index <= end_index) {
117 page = find_get_page(inode->i_mapping, index);
118 index++;
119 if (!page)
120 continue;
121 ClearPagePrivate2(page);
122 put_page(page);
123 }
124
125
126
127
128
129
130 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
131 offset += PAGE_SIZE;
132 bytes -= PAGE_SIZE;
133 }
134
135 return __endio_write_update_ordered(inode, offset, bytes, false);
136}
137
138static int btrfs_dirty_inode(struct inode *inode);
139
140#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
141void btrfs_test_inode_set_ops(struct inode *inode)
142{
143 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
144}
145#endif
146
147static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
148 struct inode *inode, struct inode *dir,
149 const struct qstr *qstr)
150{
151 int err;
152
153 err = btrfs_init_acl(trans, inode, dir);
154 if (!err)
155 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
156 return err;
157}
158
159
160
161
162
163
164static int insert_inline_extent(struct btrfs_trans_handle *trans,
165 struct btrfs_path *path, int extent_inserted,
166 struct btrfs_root *root, struct inode *inode,
167 u64 start, size_t size, size_t compressed_size,
168 int compress_type,
169 struct page **compressed_pages)
170{
171 struct extent_buffer *leaf;
172 struct page *page = NULL;
173 char *kaddr;
174 unsigned long ptr;
175 struct btrfs_file_extent_item *ei;
176 int ret;
177 size_t cur_size = size;
178 unsigned long offset;
179
180 if (compressed_size && compressed_pages)
181 cur_size = compressed_size;
182
183 inode_add_bytes(inode, size);
184
185 if (!extent_inserted) {
186 struct btrfs_key key;
187 size_t datasize;
188
189 key.objectid = btrfs_ino(BTRFS_I(inode));
190 key.offset = start;
191 key.type = BTRFS_EXTENT_DATA_KEY;
192
193 datasize = btrfs_file_extent_calc_inline_size(cur_size);
194 path->leave_spinning = 1;
195 ret = btrfs_insert_empty_item(trans, root, path, &key,
196 datasize);
197 if (ret)
198 goto fail;
199 }
200 leaf = path->nodes[0];
201 ei = btrfs_item_ptr(leaf, path->slots[0],
202 struct btrfs_file_extent_item);
203 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
204 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
205 btrfs_set_file_extent_encryption(leaf, ei, 0);
206 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
207 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
208 ptr = btrfs_file_extent_inline_start(ei);
209
210 if (compress_type != BTRFS_COMPRESS_NONE) {
211 struct page *cpage;
212 int i = 0;
213 while (compressed_size > 0) {
214 cpage = compressed_pages[i];
215 cur_size = min_t(unsigned long, compressed_size,
216 PAGE_SIZE);
217
218 kaddr = kmap_atomic(cpage);
219 write_extent_buffer(leaf, kaddr, ptr, cur_size);
220 kunmap_atomic(kaddr);
221
222 i++;
223 ptr += cur_size;
224 compressed_size -= cur_size;
225 }
226 btrfs_set_file_extent_compression(leaf, ei,
227 compress_type);
228 } else {
229 page = find_get_page(inode->i_mapping,
230 start >> PAGE_SHIFT);
231 btrfs_set_file_extent_compression(leaf, ei, 0);
232 kaddr = kmap_atomic(page);
233 offset = offset_in_page(start);
234 write_extent_buffer(leaf, kaddr + offset, ptr, size);
235 kunmap_atomic(kaddr);
236 put_page(page);
237 }
238 btrfs_mark_buffer_dirty(leaf);
239 btrfs_release_path(path);
240
241
242
243
244
245
246
247
248
249
250 BTRFS_I(inode)->disk_i_size = inode->i_size;
251 ret = btrfs_update_inode(trans, root, inode);
252
253fail:
254 return ret;
255}
256
257
258
259
260
261
262
263static noinline int cow_file_range_inline(struct inode *inode, u64 start,
264 u64 end, size_t compressed_size,
265 int compress_type,
266 struct page **compressed_pages)
267{
268 struct btrfs_root *root = BTRFS_I(inode)->root;
269 struct btrfs_fs_info *fs_info = root->fs_info;
270 struct btrfs_trans_handle *trans;
271 u64 isize = i_size_read(inode);
272 u64 actual_end = min(end + 1, isize);
273 u64 inline_len = actual_end - start;
274 u64 aligned_end = ALIGN(end, fs_info->sectorsize);
275 u64 data_len = inline_len;
276 int ret;
277 struct btrfs_path *path;
278 int extent_inserted = 0;
279 u32 extent_item_size;
280
281 if (compressed_size)
282 data_len = compressed_size;
283
284 if (start > 0 ||
285 actual_end > fs_info->sectorsize ||
286 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
287 (!compressed_size &&
288 (actual_end & (fs_info->sectorsize - 1)) == 0) ||
289 end + 1 < isize ||
290 data_len > fs_info->max_inline) {
291 return 1;
292 }
293
294 path = btrfs_alloc_path();
295 if (!path)
296 return -ENOMEM;
297
298 trans = btrfs_join_transaction(root);
299 if (IS_ERR(trans)) {
300 btrfs_free_path(path);
301 return PTR_ERR(trans);
302 }
303 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
304
305 if (compressed_size && compressed_pages)
306 extent_item_size = btrfs_file_extent_calc_inline_size(
307 compressed_size);
308 else
309 extent_item_size = btrfs_file_extent_calc_inline_size(
310 inline_len);
311
312 ret = __btrfs_drop_extents(trans, root, inode, path,
313 start, aligned_end, NULL,
314 1, 1, extent_item_size, &extent_inserted);
315 if (ret) {
316 btrfs_abort_transaction(trans, ret);
317 goto out;
318 }
319
320 if (isize > actual_end)
321 inline_len = min_t(u64, isize, actual_end);
322 ret = insert_inline_extent(trans, path, extent_inserted,
323 root, inode, start,
324 inline_len, compressed_size,
325 compress_type, compressed_pages);
326 if (ret && ret != -ENOSPC) {
327 btrfs_abort_transaction(trans, ret);
328 goto out;
329 } else if (ret == -ENOSPC) {
330 ret = 1;
331 goto out;
332 }
333
334 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
335 btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
336out:
337
338
339
340
341
342
343 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
344 btrfs_free_path(path);
345 btrfs_end_transaction(trans);
346 return ret;
347}
348
349struct async_extent {
350 u64 start;
351 u64 ram_size;
352 u64 compressed_size;
353 struct page **pages;
354 unsigned long nr_pages;
355 int compress_type;
356 struct list_head list;
357};
358
359struct async_chunk {
360 struct inode *inode;
361 struct page *locked_page;
362 u64 start;
363 u64 end;
364 unsigned int write_flags;
365 struct list_head extents;
366 struct btrfs_work work;
367 atomic_t *pending;
368};
369
370struct async_cow {
371
372 atomic_t num_chunks;
373 struct async_chunk chunks[];
374};
375
376static noinline int add_async_extent(struct async_chunk *cow,
377 u64 start, u64 ram_size,
378 u64 compressed_size,
379 struct page **pages,
380 unsigned long nr_pages,
381 int compress_type)
382{
383 struct async_extent *async_extent;
384
385 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
386 BUG_ON(!async_extent);
387 async_extent->start = start;
388 async_extent->ram_size = ram_size;
389 async_extent->compressed_size = compressed_size;
390 async_extent->pages = pages;
391 async_extent->nr_pages = nr_pages;
392 async_extent->compress_type = compress_type;
393 list_add_tail(&async_extent->list, &cow->extents);
394 return 0;
395}
396
397static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
398{
399 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
400
401
402 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
403 return 1;
404
405 if (BTRFS_I(inode)->defrag_compress)
406 return 1;
407
408 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
409 return 0;
410 if (btrfs_test_opt(fs_info, COMPRESS) ||
411 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
412 BTRFS_I(inode)->prop_compress)
413 return btrfs_compress_heuristic(inode, start, end);
414 return 0;
415}
416
417static inline void inode_should_defrag(struct btrfs_inode *inode,
418 u64 start, u64 end, u64 num_bytes, u64 small_write)
419{
420
421 if (num_bytes < small_write &&
422 (start > 0 || end + 1 < inode->disk_i_size))
423 btrfs_add_inode_defrag(NULL, inode);
424}
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443static noinline void compress_file_range(struct async_chunk *async_chunk,
444 int *num_added)
445{
446 struct inode *inode = async_chunk->inode;
447 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
448 u64 blocksize = fs_info->sectorsize;
449 u64 start = async_chunk->start;
450 u64 end = async_chunk->end;
451 u64 actual_end;
452 int ret = 0;
453 struct page **pages = NULL;
454 unsigned long nr_pages;
455 unsigned long total_compressed = 0;
456 unsigned long total_in = 0;
457 int i;
458 int will_compress;
459 int compress_type = fs_info->compress_type;
460 int redirty = 0;
461
462 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
463 SZ_16K);
464
465 actual_end = min_t(u64, i_size_read(inode), end + 1);
466again:
467 will_compress = 0;
468 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
469 BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
470 nr_pages = min_t(unsigned long, nr_pages,
471 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
472
473
474
475
476
477
478
479
480
481
482
483 if (actual_end <= start)
484 goto cleanup_and_bail_uncompressed;
485
486 total_compressed = actual_end - start;
487
488
489
490
491
492 if (total_compressed <= blocksize &&
493 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
494 goto cleanup_and_bail_uncompressed;
495
496 total_compressed = min_t(unsigned long, total_compressed,
497 BTRFS_MAX_UNCOMPRESSED);
498 total_in = 0;
499 ret = 0;
500
501
502
503
504
505
506 if (inode_need_compress(inode, start, end)) {
507 WARN_ON(pages);
508 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
509 if (!pages) {
510
511 nr_pages = 0;
512 goto cont;
513 }
514
515 if (BTRFS_I(inode)->defrag_compress)
516 compress_type = BTRFS_I(inode)->defrag_compress;
517 else if (BTRFS_I(inode)->prop_compress)
518 compress_type = BTRFS_I(inode)->prop_compress;
519
520
521
522
523
524
525
526
527
528
529
530
531
532 if (!redirty) {
533 extent_range_clear_dirty_for_io(inode, start, end);
534 redirty = 1;
535 }
536
537
538 ret = btrfs_compress_pages(
539 compress_type | (fs_info->compress_level << 4),
540 inode->i_mapping, start,
541 pages,
542 &nr_pages,
543 &total_in,
544 &total_compressed);
545
546 if (!ret) {
547 unsigned long offset = offset_in_page(total_compressed);
548 struct page *page = pages[nr_pages - 1];
549 char *kaddr;
550
551
552
553
554 if (offset) {
555 kaddr = kmap_atomic(page);
556 memset(kaddr + offset, 0,
557 PAGE_SIZE - offset);
558 kunmap_atomic(kaddr);
559 }
560 will_compress = 1;
561 }
562 }
563cont:
564 if (start == 0) {
565
566 if (ret || total_in < actual_end) {
567
568
569
570 ret = cow_file_range_inline(inode, start, end, 0,
571 BTRFS_COMPRESS_NONE, NULL);
572 } else {
573
574 ret = cow_file_range_inline(inode, start, end,
575 total_compressed,
576 compress_type, pages);
577 }
578 if (ret <= 0) {
579 unsigned long clear_flags = EXTENT_DELALLOC |
580 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
581 EXTENT_DO_ACCOUNTING;
582 unsigned long page_error_op;
583
584 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
585
586
587
588
589
590
591
592
593
594
595
596 extent_clear_unlock_delalloc(inode, start, end, end,
597 NULL, clear_flags,
598 PAGE_UNLOCK |
599 PAGE_CLEAR_DIRTY |
600 PAGE_SET_WRITEBACK |
601 page_error_op |
602 PAGE_END_WRITEBACK);
603 goto free_pages_out;
604 }
605 }
606
607 if (will_compress) {
608
609
610
611
612
613 total_compressed = ALIGN(total_compressed, blocksize);
614
615
616
617
618
619
620 total_in = ALIGN(total_in, PAGE_SIZE);
621 if (total_compressed + blocksize <= total_in) {
622 *num_added += 1;
623
624
625
626
627
628
629 add_async_extent(async_chunk, start, total_in,
630 total_compressed, pages, nr_pages,
631 compress_type);
632
633 if (start + total_in < end) {
634 start += total_in;
635 pages = NULL;
636 cond_resched();
637 goto again;
638 }
639 return;
640 }
641 }
642 if (pages) {
643
644
645
646
647 for (i = 0; i < nr_pages; i++) {
648 WARN_ON(pages[i]->mapping);
649 put_page(pages[i]);
650 }
651 kfree(pages);
652 pages = NULL;
653 total_compressed = 0;
654 nr_pages = 0;
655
656
657 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
658 !(BTRFS_I(inode)->prop_compress)) {
659 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
660 }
661 }
662cleanup_and_bail_uncompressed:
663
664
665
666
667
668
669 if (page_offset(async_chunk->locked_page) >= start &&
670 page_offset(async_chunk->locked_page) <= end)
671 __set_page_dirty_nobuffers(async_chunk->locked_page);
672
673
674 if (redirty)
675 extent_range_redirty_for_io(inode, start, end);
676 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
677 BTRFS_COMPRESS_NONE);
678 *num_added += 1;
679
680 return;
681
682free_pages_out:
683 for (i = 0; i < nr_pages; i++) {
684 WARN_ON(pages[i]->mapping);
685 put_page(pages[i]);
686 }
687 kfree(pages);
688}
689
690static void free_async_extent_pages(struct async_extent *async_extent)
691{
692 int i;
693
694 if (!async_extent->pages)
695 return;
696
697 for (i = 0; i < async_extent->nr_pages; i++) {
698 WARN_ON(async_extent->pages[i]->mapping);
699 put_page(async_extent->pages[i]);
700 }
701 kfree(async_extent->pages);
702 async_extent->nr_pages = 0;
703 async_extent->pages = NULL;
704}
705
706
707
708
709
710
711
712static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
713{
714 struct inode *inode = async_chunk->inode;
715 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
716 struct async_extent *async_extent;
717 u64 alloc_hint = 0;
718 struct btrfs_key ins;
719 struct extent_map *em;
720 struct btrfs_root *root = BTRFS_I(inode)->root;
721 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
722 int ret = 0;
723
724again:
725 while (!list_empty(&async_chunk->extents)) {
726 async_extent = list_entry(async_chunk->extents.next,
727 struct async_extent, list);
728 list_del(&async_extent->list);
729
730retry:
731 lock_extent(io_tree, async_extent->start,
732 async_extent->start + async_extent->ram_size - 1);
733
734 if (!async_extent->pages) {
735 int page_started = 0;
736 unsigned long nr_written = 0;
737
738
739 ret = cow_file_range(inode, async_chunk->locked_page,
740 async_extent->start,
741 async_extent->start +
742 async_extent->ram_size - 1,
743 async_extent->start +
744 async_extent->ram_size - 1,
745 &page_started, &nr_written, 0,
746 NULL);
747
748
749
750
751
752
753
754
755
756 if (!page_started && !ret)
757 extent_write_locked_range(inode,
758 async_extent->start,
759 async_extent->start +
760 async_extent->ram_size - 1,
761 WB_SYNC_ALL);
762 else if (ret)
763 unlock_page(async_chunk->locked_page);
764 kfree(async_extent);
765 cond_resched();
766 continue;
767 }
768
769 ret = btrfs_reserve_extent(root, async_extent->ram_size,
770 async_extent->compressed_size,
771 async_extent->compressed_size,
772 0, alloc_hint, &ins, 1, 1);
773 if (ret) {
774 free_async_extent_pages(async_extent);
775
776 if (ret == -ENOSPC) {
777 unlock_extent(io_tree, async_extent->start,
778 async_extent->start +
779 async_extent->ram_size - 1);
780
781
782
783
784
785
786
787 extent_range_redirty_for_io(inode,
788 async_extent->start,
789 async_extent->start +
790 async_extent->ram_size - 1);
791
792 goto retry;
793 }
794 goto out_free;
795 }
796
797
798
799
800 em = create_io_em(inode, async_extent->start,
801 async_extent->ram_size,
802 async_extent->start,
803 ins.objectid,
804 ins.offset,
805 ins.offset,
806 async_extent->ram_size,
807 async_extent->compress_type,
808 BTRFS_ORDERED_COMPRESSED);
809 if (IS_ERR(em))
810
811 goto out_free_reserve;
812 free_extent_map(em);
813
814 ret = btrfs_add_ordered_extent_compress(inode,
815 async_extent->start,
816 ins.objectid,
817 async_extent->ram_size,
818 ins.offset,
819 BTRFS_ORDERED_COMPRESSED,
820 async_extent->compress_type);
821 if (ret) {
822 btrfs_drop_extent_cache(BTRFS_I(inode),
823 async_extent->start,
824 async_extent->start +
825 async_extent->ram_size - 1, 0);
826 goto out_free_reserve;
827 }
828 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
829
830
831
832
833 extent_clear_unlock_delalloc(inode, async_extent->start,
834 async_extent->start +
835 async_extent->ram_size - 1,
836 async_extent->start +
837 async_extent->ram_size - 1,
838 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
839 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
840 PAGE_SET_WRITEBACK);
841 if (btrfs_submit_compressed_write(inode,
842 async_extent->start,
843 async_extent->ram_size,
844 ins.objectid,
845 ins.offset, async_extent->pages,
846 async_extent->nr_pages,
847 async_chunk->write_flags)) {
848 struct page *p = async_extent->pages[0];
849 const u64 start = async_extent->start;
850 const u64 end = start + async_extent->ram_size - 1;
851
852 p->mapping = inode->i_mapping;
853 btrfs_writepage_endio_finish_ordered(p, start, end, 0);
854
855 p->mapping = NULL;
856 extent_clear_unlock_delalloc(inode, start, end, end,
857 NULL, 0,
858 PAGE_END_WRITEBACK |
859 PAGE_SET_ERROR);
860 free_async_extent_pages(async_extent);
861 }
862 alloc_hint = ins.objectid + ins.offset;
863 kfree(async_extent);
864 cond_resched();
865 }
866 return;
867out_free_reserve:
868 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
869 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
870out_free:
871 extent_clear_unlock_delalloc(inode, async_extent->start,
872 async_extent->start +
873 async_extent->ram_size - 1,
874 async_extent->start +
875 async_extent->ram_size - 1,
876 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
877 EXTENT_DELALLOC_NEW |
878 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
879 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
880 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
881 PAGE_SET_ERROR);
882 free_async_extent_pages(async_extent);
883 kfree(async_extent);
884 goto again;
885}
886
887static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
888 u64 num_bytes)
889{
890 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
891 struct extent_map *em;
892 u64 alloc_hint = 0;
893
894 read_lock(&em_tree->lock);
895 em = search_extent_mapping(em_tree, start, num_bytes);
896 if (em) {
897
898
899
900
901
902 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
903 free_extent_map(em);
904 em = search_extent_mapping(em_tree, 0, 0);
905 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
906 alloc_hint = em->block_start;
907 if (em)
908 free_extent_map(em);
909 } else {
910 alloc_hint = em->block_start;
911 free_extent_map(em);
912 }
913 }
914 read_unlock(&em_tree->lock);
915
916 return alloc_hint;
917}
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932static noinline int cow_file_range(struct inode *inode,
933 struct page *locked_page,
934 u64 start, u64 end, u64 delalloc_end,
935 int *page_started, unsigned long *nr_written,
936 int unlock, struct btrfs_dedupe_hash *hash)
937{
938 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
939 struct btrfs_root *root = BTRFS_I(inode)->root;
940 u64 alloc_hint = 0;
941 u64 num_bytes;
942 unsigned long ram_size;
943 u64 cur_alloc_size = 0;
944 u64 blocksize = fs_info->sectorsize;
945 struct btrfs_key ins;
946 struct extent_map *em;
947 unsigned clear_bits;
948 unsigned long page_ops;
949 bool extent_reserved = false;
950 int ret = 0;
951
952 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
953 WARN_ON_ONCE(1);
954 ret = -EINVAL;
955 goto out_unlock;
956 }
957
958 num_bytes = ALIGN(end - start + 1, blocksize);
959 num_bytes = max(blocksize, num_bytes);
960 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
961
962 inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
963
964 if (start == 0) {
965
966 ret = cow_file_range_inline(inode, start, end, 0,
967 BTRFS_COMPRESS_NONE, NULL);
968 if (ret == 0) {
969
970
971
972
973
974
975 extent_clear_unlock_delalloc(inode, start, end,
976 delalloc_end, NULL,
977 EXTENT_LOCKED | EXTENT_DELALLOC |
978 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
979 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
980 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
981 PAGE_END_WRITEBACK);
982 *nr_written = *nr_written +
983 (end - start + PAGE_SIZE) / PAGE_SIZE;
984 *page_started = 1;
985 goto out;
986 } else if (ret < 0) {
987 goto out_unlock;
988 }
989 }
990
991 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
992 btrfs_drop_extent_cache(BTRFS_I(inode), start,
993 start + num_bytes - 1, 0);
994
995 while (num_bytes > 0) {
996 cur_alloc_size = num_bytes;
997 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
998 fs_info->sectorsize, 0, alloc_hint,
999 &ins, 1, 1);
1000 if (ret < 0)
1001 goto out_unlock;
1002 cur_alloc_size = ins.offset;
1003 extent_reserved = true;
1004
1005 ram_size = ins.offset;
1006 em = create_io_em(inode, start, ins.offset,
1007 start,
1008 ins.objectid,
1009 ins.offset,
1010 ins.offset,
1011 ram_size,
1012 BTRFS_COMPRESS_NONE,
1013 BTRFS_ORDERED_REGULAR );
1014 if (IS_ERR(em)) {
1015 ret = PTR_ERR(em);
1016 goto out_reserve;
1017 }
1018 free_extent_map(em);
1019
1020 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1021 ram_size, cur_alloc_size, 0);
1022 if (ret)
1023 goto out_drop_extent_cache;
1024
1025 if (root->root_key.objectid ==
1026 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1027 ret = btrfs_reloc_clone_csums(inode, start,
1028 cur_alloc_size);
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040 if (ret)
1041 btrfs_drop_extent_cache(BTRFS_I(inode), start,
1042 start + ram_size - 1, 0);
1043 }
1044
1045 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1046
1047
1048
1049
1050
1051
1052
1053
1054 page_ops = unlock ? PAGE_UNLOCK : 0;
1055 page_ops |= PAGE_SET_PRIVATE2;
1056
1057 extent_clear_unlock_delalloc(inode, start,
1058 start + ram_size - 1,
1059 delalloc_end, locked_page,
1060 EXTENT_LOCKED | EXTENT_DELALLOC,
1061 page_ops);
1062 if (num_bytes < cur_alloc_size)
1063 num_bytes = 0;
1064 else
1065 num_bytes -= cur_alloc_size;
1066 alloc_hint = ins.objectid + ins.offset;
1067 start += cur_alloc_size;
1068 extent_reserved = false;
1069
1070
1071
1072
1073
1074
1075 if (ret)
1076 goto out_unlock;
1077 }
1078out:
1079 return ret;
1080
1081out_drop_extent_cache:
1082 btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
1083out_reserve:
1084 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1085 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1086out_unlock:
1087 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1088 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1089 page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1090 PAGE_END_WRITEBACK;
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101 if (extent_reserved) {
1102 extent_clear_unlock_delalloc(inode, start,
1103 start + cur_alloc_size,
1104 start + cur_alloc_size,
1105 locked_page,
1106 clear_bits,
1107 page_ops);
1108 start += cur_alloc_size;
1109 if (start >= end)
1110 goto out;
1111 }
1112 extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
1113 locked_page,
1114 clear_bits | EXTENT_CLEAR_DATA_RESV,
1115 page_ops);
1116 goto out;
1117}
1118
1119
1120
1121
1122static noinline void async_cow_start(struct btrfs_work *work)
1123{
1124 struct async_chunk *async_chunk;
1125 int num_added = 0;
1126
1127 async_chunk = container_of(work, struct async_chunk, work);
1128
1129 compress_file_range(async_chunk, &num_added);
1130 if (num_added == 0) {
1131 btrfs_add_delayed_iput(async_chunk->inode);
1132 async_chunk->inode = NULL;
1133 }
1134}
1135
1136
1137
1138
1139static noinline void async_cow_submit(struct btrfs_work *work)
1140{
1141 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1142 work);
1143 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1144 unsigned long nr_pages;
1145
1146 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1147 PAGE_SHIFT;
1148
1149
1150 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1151 5 * SZ_1M)
1152 cond_wake_up_nomb(&fs_info->async_submit_wait);
1153
1154
1155
1156
1157
1158
1159
1160 if (async_chunk->inode)
1161 submit_compressed_extents(async_chunk);
1162}
1163
1164static noinline void async_cow_free(struct btrfs_work *work)
1165{
1166 struct async_chunk *async_chunk;
1167
1168 async_chunk = container_of(work, struct async_chunk, work);
1169 if (async_chunk->inode)
1170 btrfs_add_delayed_iput(async_chunk->inode);
1171
1172
1173
1174
1175 if (atomic_dec_and_test(async_chunk->pending))
1176 kvfree(async_chunk->pending);
1177}
1178
1179static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1180 u64 start, u64 end, int *page_started,
1181 unsigned long *nr_written,
1182 unsigned int write_flags)
1183{
1184 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1185 struct async_cow *ctx;
1186 struct async_chunk *async_chunk;
1187 unsigned long nr_pages;
1188 u64 cur_end;
1189 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1190 int i;
1191 bool should_compress;
1192 unsigned nofs_flag;
1193
1194 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1195
1196 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1197 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1198 num_chunks = 1;
1199 should_compress = false;
1200 } else {
1201 should_compress = true;
1202 }
1203
1204 nofs_flag = memalloc_nofs_save();
1205 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1206 memalloc_nofs_restore(nofs_flag);
1207
1208 if (!ctx) {
1209 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1210 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1211 EXTENT_DO_ACCOUNTING;
1212 unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1213 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
1214 PAGE_SET_ERROR;
1215
1216 extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
1217 clear_bits, page_ops);
1218 return -ENOMEM;
1219 }
1220
1221 async_chunk = ctx->chunks;
1222 atomic_set(&ctx->num_chunks, num_chunks);
1223
1224 for (i = 0; i < num_chunks; i++) {
1225 if (should_compress)
1226 cur_end = min(end, start + SZ_512K - 1);
1227 else
1228 cur_end = end;
1229
1230
1231
1232
1233
1234 ihold(inode);
1235 async_chunk[i].pending = &ctx->num_chunks;
1236 async_chunk[i].inode = inode;
1237 async_chunk[i].start = start;
1238 async_chunk[i].end = cur_end;
1239 async_chunk[i].locked_page = locked_page;
1240 async_chunk[i].write_flags = write_flags;
1241 INIT_LIST_HEAD(&async_chunk[i].extents);
1242
1243 btrfs_init_work(&async_chunk[i].work,
1244 btrfs_delalloc_helper,
1245 async_cow_start, async_cow_submit,
1246 async_cow_free);
1247
1248 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1249 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1250
1251 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1252
1253 *nr_written += nr_pages;
1254 start = cur_end + 1;
1255 }
1256 *page_started = 1;
1257 return 0;
1258}
1259
1260static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1261 u64 bytenr, u64 num_bytes)
1262{
1263 int ret;
1264 struct btrfs_ordered_sum *sums;
1265 LIST_HEAD(list);
1266
1267 ret = btrfs_lookup_csums_range(fs_info->csum_root, bytenr,
1268 bytenr + num_bytes - 1, &list, 0);
1269 if (ret == 0 && list_empty(&list))
1270 return 0;
1271
1272 while (!list_empty(&list)) {
1273 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1274 list_del(&sums->list);
1275 kfree(sums);
1276 }
1277 if (ret < 0)
1278 return ret;
1279 return 1;
1280}
1281
1282
1283
1284
1285
1286
1287
1288
1289static noinline int run_delalloc_nocow(struct inode *inode,
1290 struct page *locked_page,
1291 u64 start, u64 end, int *page_started, int force,
1292 unsigned long *nr_written)
1293{
1294 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1295 struct btrfs_root *root = BTRFS_I(inode)->root;
1296 struct extent_buffer *leaf;
1297 struct btrfs_path *path;
1298 struct btrfs_file_extent_item *fi;
1299 struct btrfs_key found_key;
1300 struct extent_map *em;
1301 u64 cow_start;
1302 u64 cur_offset;
1303 u64 extent_end;
1304 u64 extent_offset;
1305 u64 disk_bytenr;
1306 u64 num_bytes;
1307 u64 disk_num_bytes;
1308 u64 ram_bytes;
1309 int extent_type;
1310 int ret;
1311 int type;
1312 int nocow;
1313 int check_prev = 1;
1314 bool nolock;
1315 u64 ino = btrfs_ino(BTRFS_I(inode));
1316
1317 path = btrfs_alloc_path();
1318 if (!path) {
1319 extent_clear_unlock_delalloc(inode, start, end, end,
1320 locked_page,
1321 EXTENT_LOCKED | EXTENT_DELALLOC |
1322 EXTENT_DO_ACCOUNTING |
1323 EXTENT_DEFRAG, PAGE_UNLOCK |
1324 PAGE_CLEAR_DIRTY |
1325 PAGE_SET_WRITEBACK |
1326 PAGE_END_WRITEBACK);
1327 return -ENOMEM;
1328 }
1329
1330 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
1331
1332 cow_start = (u64)-1;
1333 cur_offset = start;
1334 while (1) {
1335 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1336 cur_offset, 0);
1337 if (ret < 0)
1338 goto error;
1339 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1340 leaf = path->nodes[0];
1341 btrfs_item_key_to_cpu(leaf, &found_key,
1342 path->slots[0] - 1);
1343 if (found_key.objectid == ino &&
1344 found_key.type == BTRFS_EXTENT_DATA_KEY)
1345 path->slots[0]--;
1346 }
1347 check_prev = 0;
1348next_slot:
1349 leaf = path->nodes[0];
1350 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1351 ret = btrfs_next_leaf(root, path);
1352 if (ret < 0) {
1353 if (cow_start != (u64)-1)
1354 cur_offset = cow_start;
1355 goto error;
1356 }
1357 if (ret > 0)
1358 break;
1359 leaf = path->nodes[0];
1360 }
1361
1362 nocow = 0;
1363 disk_bytenr = 0;
1364 num_bytes = 0;
1365 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1366
1367 if (found_key.objectid > ino)
1368 break;
1369 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1370 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1371 path->slots[0]++;
1372 goto next_slot;
1373 }
1374 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1375 found_key.offset > end)
1376 break;
1377
1378 if (found_key.offset > cur_offset) {
1379 extent_end = found_key.offset;
1380 extent_type = 0;
1381 goto out_check;
1382 }
1383
1384 fi = btrfs_item_ptr(leaf, path->slots[0],
1385 struct btrfs_file_extent_item);
1386 extent_type = btrfs_file_extent_type(leaf, fi);
1387
1388 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1389 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1390 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1391 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1392 extent_offset = btrfs_file_extent_offset(leaf, fi);
1393 extent_end = found_key.offset +
1394 btrfs_file_extent_num_bytes(leaf, fi);
1395 disk_num_bytes =
1396 btrfs_file_extent_disk_num_bytes(leaf, fi);
1397 if (extent_end <= start) {
1398 path->slots[0]++;
1399 goto next_slot;
1400 }
1401 if (disk_bytenr == 0)
1402 goto out_check;
1403 if (btrfs_file_extent_compression(leaf, fi) ||
1404 btrfs_file_extent_encryption(leaf, fi) ||
1405 btrfs_file_extent_other_encoding(leaf, fi))
1406 goto out_check;
1407
1408
1409
1410
1411 if (!nolock &&
1412 btrfs_file_extent_generation(leaf, fi) <=
1413 btrfs_root_last_snapshot(&root->root_item))
1414 goto out_check;
1415 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1416 goto out_check;
1417 if (btrfs_extent_readonly(fs_info, disk_bytenr))
1418 goto out_check;
1419 ret = btrfs_cross_ref_exist(root, ino,
1420 found_key.offset -
1421 extent_offset, disk_bytenr);
1422 if (ret) {
1423
1424
1425
1426
1427 if (ret < 0) {
1428 if (cow_start != (u64)-1)
1429 cur_offset = cow_start;
1430 goto error;
1431 }
1432
1433 WARN_ON_ONCE(nolock);
1434 goto out_check;
1435 }
1436 disk_bytenr += extent_offset;
1437 disk_bytenr += cur_offset - found_key.offset;
1438 num_bytes = min(end + 1, extent_end) - cur_offset;
1439
1440
1441
1442
1443 if (!nolock && atomic_read(&root->snapshot_force_cow))
1444 goto out_check;
1445
1446
1447
1448
1449
1450 ret = csum_exist_in_range(fs_info, disk_bytenr,
1451 num_bytes);
1452 if (ret) {
1453
1454
1455
1456
1457 if (ret < 0) {
1458 if (cow_start != (u64)-1)
1459 cur_offset = cow_start;
1460 goto error;
1461 }
1462 WARN_ON_ONCE(nolock);
1463 goto out_check;
1464 }
1465 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
1466 goto out_check;
1467 nocow = 1;
1468 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1469 extent_end = found_key.offset +
1470 btrfs_file_extent_ram_bytes(leaf, fi);
1471 extent_end = ALIGN(extent_end,
1472 fs_info->sectorsize);
1473 } else {
1474 BUG();
1475 }
1476out_check:
1477 if (extent_end <= start) {
1478 path->slots[0]++;
1479 if (nocow)
1480 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1481 goto next_slot;
1482 }
1483 if (!nocow) {
1484 if (cow_start == (u64)-1)
1485 cow_start = cur_offset;
1486 cur_offset = extent_end;
1487 if (cur_offset > end)
1488 break;
1489 path->slots[0]++;
1490 goto next_slot;
1491 }
1492
1493 btrfs_release_path(path);
1494 if (cow_start != (u64)-1) {
1495 ret = cow_file_range(inode, locked_page,
1496 cow_start, found_key.offset - 1,
1497 end, page_started, nr_written, 1,
1498 NULL);
1499 if (ret) {
1500 if (nocow)
1501 btrfs_dec_nocow_writers(fs_info,
1502 disk_bytenr);
1503 goto error;
1504 }
1505 cow_start = (u64)-1;
1506 }
1507
1508 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1509 u64 orig_start = found_key.offset - extent_offset;
1510
1511 em = create_io_em(inode, cur_offset, num_bytes,
1512 orig_start,
1513 disk_bytenr,
1514 num_bytes,
1515 disk_num_bytes,
1516 ram_bytes, BTRFS_COMPRESS_NONE,
1517 BTRFS_ORDERED_PREALLOC);
1518 if (IS_ERR(em)) {
1519 if (nocow)
1520 btrfs_dec_nocow_writers(fs_info,
1521 disk_bytenr);
1522 ret = PTR_ERR(em);
1523 goto error;
1524 }
1525 free_extent_map(em);
1526 }
1527
1528 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1529 type = BTRFS_ORDERED_PREALLOC;
1530 } else {
1531 type = BTRFS_ORDERED_NOCOW;
1532 }
1533
1534 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1535 num_bytes, num_bytes, type);
1536 if (nocow)
1537 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1538 BUG_ON(ret);
1539
1540 if (root->root_key.objectid ==
1541 BTRFS_DATA_RELOC_TREE_OBJECTID)
1542
1543
1544
1545
1546
1547 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1548 num_bytes);
1549
1550 extent_clear_unlock_delalloc(inode, cur_offset,
1551 cur_offset + num_bytes - 1, end,
1552 locked_page, EXTENT_LOCKED |
1553 EXTENT_DELALLOC |
1554 EXTENT_CLEAR_DATA_RESV,
1555 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1556
1557 cur_offset = extent_end;
1558
1559
1560
1561
1562
1563
1564 if (ret)
1565 goto error;
1566 if (cur_offset > end)
1567 break;
1568 }
1569 btrfs_release_path(path);
1570
1571 if (cur_offset <= end && cow_start == (u64)-1)
1572 cow_start = cur_offset;
1573
1574 if (cow_start != (u64)-1) {
1575 cur_offset = end;
1576 ret = cow_file_range(inode, locked_page, cow_start, end, end,
1577 page_started, nr_written, 1, NULL);
1578 if (ret)
1579 goto error;
1580 }
1581
1582error:
1583 if (ret && cur_offset < end)
1584 extent_clear_unlock_delalloc(inode, cur_offset, end, end,
1585 locked_page, EXTENT_LOCKED |
1586 EXTENT_DELALLOC | EXTENT_DEFRAG |
1587 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1588 PAGE_CLEAR_DIRTY |
1589 PAGE_SET_WRITEBACK |
1590 PAGE_END_WRITEBACK);
1591 btrfs_free_path(path);
1592 return ret;
1593}
1594
1595static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
1596{
1597
1598 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
1599 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
1600 return 0;
1601
1602
1603
1604
1605
1606
1607 if (BTRFS_I(inode)->defrag_bytes &&
1608 test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1609 EXTENT_DEFRAG, 0, NULL))
1610 return 1;
1611
1612 return 0;
1613}
1614
1615
1616
1617
1618
1619int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
1620 u64 start, u64 end, int *page_started, unsigned long *nr_written,
1621 struct writeback_control *wbc)
1622{
1623 int ret;
1624 int force_cow = need_force_cow(inode, start, end);
1625 unsigned int write_flags = wbc_to_write_flags(wbc);
1626
1627 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1628 ret = run_delalloc_nocow(inode, locked_page, start, end,
1629 page_started, 1, nr_written);
1630 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1631 ret = run_delalloc_nocow(inode, locked_page, start, end,
1632 page_started, 0, nr_written);
1633 } else if (!inode_need_compress(inode, start, end)) {
1634 ret = cow_file_range(inode, locked_page, start, end, end,
1635 page_started, nr_written, 1, NULL);
1636 } else {
1637 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1638 &BTRFS_I(inode)->runtime_flags);
1639 ret = cow_file_range_async(inode, locked_page, start, end,
1640 page_started, nr_written,
1641 write_flags);
1642 }
1643 if (ret)
1644 btrfs_cleanup_ordered_extents(inode, locked_page, start,
1645 end - start + 1);
1646 return ret;
1647}
1648
1649void btrfs_split_delalloc_extent(struct inode *inode,
1650 struct extent_state *orig, u64 split)
1651{
1652 u64 size;
1653
1654
1655 if (!(orig->state & EXTENT_DELALLOC))
1656 return;
1657
1658 size = orig->end - orig->start + 1;
1659 if (size > BTRFS_MAX_EXTENT_SIZE) {
1660 u32 num_extents;
1661 u64 new_size;
1662
1663
1664
1665
1666
1667 new_size = orig->end - split + 1;
1668 num_extents = count_max_extents(new_size);
1669 new_size = split - orig->start;
1670 num_extents += count_max_extents(new_size);
1671 if (count_max_extents(size) >= num_extents)
1672 return;
1673 }
1674
1675 spin_lock(&BTRFS_I(inode)->lock);
1676 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
1677 spin_unlock(&BTRFS_I(inode)->lock);
1678}
1679
1680
1681
1682
1683
1684
1685void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
1686 struct extent_state *other)
1687{
1688 u64 new_size, old_size;
1689 u32 num_extents;
1690
1691
1692 if (!(other->state & EXTENT_DELALLOC))
1693 return;
1694
1695 if (new->start > other->start)
1696 new_size = new->end - other->start + 1;
1697 else
1698 new_size = other->end - new->start + 1;
1699
1700
1701 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1702 spin_lock(&BTRFS_I(inode)->lock);
1703 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1704 spin_unlock(&BTRFS_I(inode)->lock);
1705 return;
1706 }
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726 old_size = other->end - other->start + 1;
1727 num_extents = count_max_extents(old_size);
1728 old_size = new->end - new->start + 1;
1729 num_extents += count_max_extents(old_size);
1730 if (count_max_extents(new_size) >= num_extents)
1731 return;
1732
1733 spin_lock(&BTRFS_I(inode)->lock);
1734 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1735 spin_unlock(&BTRFS_I(inode)->lock);
1736}
1737
1738static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1739 struct inode *inode)
1740{
1741 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1742
1743 spin_lock(&root->delalloc_lock);
1744 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1745 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1746 &root->delalloc_inodes);
1747 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1748 &BTRFS_I(inode)->runtime_flags);
1749 root->nr_delalloc_inodes++;
1750 if (root->nr_delalloc_inodes == 1) {
1751 spin_lock(&fs_info->delalloc_root_lock);
1752 BUG_ON(!list_empty(&root->delalloc_root));
1753 list_add_tail(&root->delalloc_root,
1754 &fs_info->delalloc_roots);
1755 spin_unlock(&fs_info->delalloc_root_lock);
1756 }
1757 }
1758 spin_unlock(&root->delalloc_lock);
1759}
1760
1761
1762void __btrfs_del_delalloc_inode(struct btrfs_root *root,
1763 struct btrfs_inode *inode)
1764{
1765 struct btrfs_fs_info *fs_info = root->fs_info;
1766
1767 if (!list_empty(&inode->delalloc_inodes)) {
1768 list_del_init(&inode->delalloc_inodes);
1769 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1770 &inode->runtime_flags);
1771 root->nr_delalloc_inodes--;
1772 if (!root->nr_delalloc_inodes) {
1773 ASSERT(list_empty(&root->delalloc_inodes));
1774 spin_lock(&fs_info->delalloc_root_lock);
1775 BUG_ON(list_empty(&root->delalloc_root));
1776 list_del_init(&root->delalloc_root);
1777 spin_unlock(&fs_info->delalloc_root_lock);
1778 }
1779 }
1780}
1781
1782static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1783 struct btrfs_inode *inode)
1784{
1785 spin_lock(&root->delalloc_lock);
1786 __btrfs_del_delalloc_inode(root, inode);
1787 spin_unlock(&root->delalloc_lock);
1788}
1789
1790
1791
1792
1793
1794void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
1795 unsigned *bits)
1796{
1797 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1798
1799 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
1800 WARN_ON(1);
1801
1802
1803
1804
1805
1806 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1807 struct btrfs_root *root = BTRFS_I(inode)->root;
1808 u64 len = state->end + 1 - state->start;
1809 u32 num_extents = count_max_extents(len);
1810 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
1811
1812 spin_lock(&BTRFS_I(inode)->lock);
1813 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
1814 spin_unlock(&BTRFS_I(inode)->lock);
1815
1816
1817 if (btrfs_is_testing(fs_info))
1818 return;
1819
1820 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
1821 fs_info->delalloc_batch);
1822 spin_lock(&BTRFS_I(inode)->lock);
1823 BTRFS_I(inode)->delalloc_bytes += len;
1824 if (*bits & EXTENT_DEFRAG)
1825 BTRFS_I(inode)->defrag_bytes += len;
1826 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1827 &BTRFS_I(inode)->runtime_flags))
1828 btrfs_add_delalloc_inodes(root, inode);
1829 spin_unlock(&BTRFS_I(inode)->lock);
1830 }
1831
1832 if (!(state->state & EXTENT_DELALLOC_NEW) &&
1833 (*bits & EXTENT_DELALLOC_NEW)) {
1834 spin_lock(&BTRFS_I(inode)->lock);
1835 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
1836 state->start;
1837 spin_unlock(&BTRFS_I(inode)->lock);
1838 }
1839}
1840
1841
1842
1843
1844
1845void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
1846 struct extent_state *state, unsigned *bits)
1847{
1848 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
1849 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
1850 u64 len = state->end + 1 - state->start;
1851 u32 num_extents = count_max_extents(len);
1852
1853 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
1854 spin_lock(&inode->lock);
1855 inode->defrag_bytes -= len;
1856 spin_unlock(&inode->lock);
1857 }
1858
1859
1860
1861
1862
1863
1864 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1865 struct btrfs_root *root = inode->root;
1866 bool do_list = !btrfs_is_free_space_inode(inode);
1867
1868 spin_lock(&inode->lock);
1869 btrfs_mod_outstanding_extents(inode, -num_extents);
1870 spin_unlock(&inode->lock);
1871
1872
1873
1874
1875
1876
1877 if (*bits & EXTENT_CLEAR_META_RESV &&
1878 root != fs_info->tree_root)
1879 btrfs_delalloc_release_metadata(inode, len, false);
1880
1881
1882 if (btrfs_is_testing(fs_info))
1883 return;
1884
1885 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
1886 do_list && !(state->state & EXTENT_NORESERVE) &&
1887 (*bits & EXTENT_CLEAR_DATA_RESV))
1888 btrfs_free_reserved_data_space_noquota(
1889 &inode->vfs_inode,
1890 state->start, len);
1891
1892 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
1893 fs_info->delalloc_batch);
1894 spin_lock(&inode->lock);
1895 inode->delalloc_bytes -= len;
1896 if (do_list && inode->delalloc_bytes == 0 &&
1897 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1898 &inode->runtime_flags))
1899 btrfs_del_delalloc_inode(root, inode);
1900 spin_unlock(&inode->lock);
1901 }
1902
1903 if ((state->state & EXTENT_DELALLOC_NEW) &&
1904 (*bits & EXTENT_DELALLOC_NEW)) {
1905 spin_lock(&inode->lock);
1906 ASSERT(inode->new_delalloc_bytes >= len);
1907 inode->new_delalloc_bytes -= len;
1908 spin_unlock(&inode->lock);
1909 }
1910}
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
1927 unsigned long bio_flags)
1928{
1929 struct inode *inode = page->mapping->host;
1930 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1931 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1932 u64 length = 0;
1933 u64 map_length;
1934 int ret;
1935
1936 if (bio_flags & EXTENT_BIO_COMPRESSED)
1937 return 0;
1938
1939 length = bio->bi_iter.bi_size;
1940 map_length = length;
1941 ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
1942 NULL, 0);
1943 if (ret < 0)
1944 return ret;
1945 if (map_length < length + size)
1946 return 1;
1947 return 0;
1948}
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
1959 u64 bio_offset)
1960{
1961 struct inode *inode = private_data;
1962 blk_status_t ret = 0;
1963
1964 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
1965 BUG_ON(ret);
1966 return 0;
1967}
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
1988 int mirror_num,
1989 unsigned long bio_flags)
1990
1991{
1992 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1993 struct btrfs_root *root = BTRFS_I(inode)->root;
1994 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1995 blk_status_t ret = 0;
1996 int skip_sum;
1997 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1998
1999 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
2000
2001 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
2002 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
2003
2004 if (bio_op(bio) != REQ_OP_WRITE) {
2005 ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
2006 if (ret)
2007 goto out;
2008
2009 if (bio_flags & EXTENT_BIO_COMPRESSED) {
2010 ret = btrfs_submit_compressed_read(inode, bio,
2011 mirror_num,
2012 bio_flags);
2013 goto out;
2014 } else if (!skip_sum) {
2015 ret = btrfs_lookup_bio_sums(inode, bio, NULL);
2016 if (ret)
2017 goto out;
2018 }
2019 goto mapit;
2020 } else if (async && !skip_sum) {
2021
2022 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2023 goto mapit;
2024
2025 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
2026 0, inode, btrfs_submit_bio_start);
2027 goto out;
2028 } else if (!skip_sum) {
2029 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
2030 if (ret)
2031 goto out;
2032 }
2033
2034mapit:
2035 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
2036
2037out:
2038 if (ret) {
2039 bio->bi_status = ret;
2040 bio_endio(bio);
2041 }
2042 return ret;
2043}
2044
2045
2046
2047
2048
2049static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
2050 struct inode *inode, struct list_head *list)
2051{
2052 struct btrfs_ordered_sum *sum;
2053 int ret;
2054
2055 list_for_each_entry(sum, list, list) {
2056 trans->adding_csums = true;
2057 ret = btrfs_csum_file_blocks(trans,
2058 BTRFS_I(inode)->root->fs_info->csum_root, sum);
2059 trans->adding_csums = false;
2060 if (ret)
2061 return ret;
2062 }
2063 return 0;
2064}
2065
2066int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2067 unsigned int extra_bits,
2068 struct extent_state **cached_state, int dedupe)
2069{
2070 WARN_ON(PAGE_ALIGNED(end));
2071 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
2072 extra_bits, cached_state);
2073}
2074
2075
2076struct btrfs_writepage_fixup {
2077 struct page *page;
2078 struct btrfs_work work;
2079};
2080
2081static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2082{
2083 struct btrfs_writepage_fixup *fixup;
2084 struct btrfs_ordered_extent *ordered;
2085 struct extent_state *cached_state = NULL;
2086 struct extent_changeset *data_reserved = NULL;
2087 struct page *page;
2088 struct inode *inode;
2089 u64 page_start;
2090 u64 page_end;
2091 int ret;
2092
2093 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2094 page = fixup->page;
2095again:
2096 lock_page(page);
2097 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2098 ClearPageChecked(page);
2099 goto out_page;
2100 }
2101
2102 inode = page->mapping->host;
2103 page_start = page_offset(page);
2104 page_end = page_offset(page) + PAGE_SIZE - 1;
2105
2106 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
2107 &cached_state);
2108
2109
2110 if (PagePrivate2(page))
2111 goto out;
2112
2113 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
2114 PAGE_SIZE);
2115 if (ordered) {
2116 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
2117 page_end, &cached_state);
2118 unlock_page(page);
2119 btrfs_start_ordered_extent(inode, ordered, 1);
2120 btrfs_put_ordered_extent(ordered);
2121 goto again;
2122 }
2123
2124 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2125 PAGE_SIZE);
2126 if (ret) {
2127 mapping_set_error(page->mapping, ret);
2128 end_extent_writepage(page, ret, page_start, page_end);
2129 ClearPageChecked(page);
2130 goto out;
2131 }
2132
2133 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2134 &cached_state, 0);
2135 if (ret) {
2136 mapping_set_error(page->mapping, ret);
2137 end_extent_writepage(page, ret, page_start, page_end);
2138 ClearPageChecked(page);
2139 goto out;
2140 }
2141
2142 ClearPageChecked(page);
2143 set_page_dirty(page);
2144 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
2145out:
2146 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2147 &cached_state);
2148out_page:
2149 unlock_page(page);
2150 put_page(page);
2151 kfree(fixup);
2152 extent_changeset_free(data_reserved);
2153}
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
2167{
2168 struct inode *inode = page->mapping->host;
2169 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2170 struct btrfs_writepage_fixup *fixup;
2171
2172
2173 if (TestClearPagePrivate2(page))
2174 return 0;
2175
2176 if (PageChecked(page))
2177 return -EAGAIN;
2178
2179 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2180 if (!fixup)
2181 return -EAGAIN;
2182
2183 SetPageChecked(page);
2184 get_page(page);
2185 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
2186 btrfs_writepage_fixup_worker, NULL, NULL);
2187 fixup->page = page;
2188 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
2189 return -EBUSY;
2190}
2191
2192static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2193 struct inode *inode, u64 file_pos,
2194 u64 disk_bytenr, u64 disk_num_bytes,
2195 u64 num_bytes, u64 ram_bytes,
2196 u8 compression, u8 encryption,
2197 u16 other_encoding, int extent_type)
2198{
2199 struct btrfs_root *root = BTRFS_I(inode)->root;
2200 struct btrfs_file_extent_item *fi;
2201 struct btrfs_path *path;
2202 struct extent_buffer *leaf;
2203 struct btrfs_key ins;
2204 u64 qg_released;
2205 int extent_inserted = 0;
2206 int ret;
2207
2208 path = btrfs_alloc_path();
2209 if (!path)
2210 return -ENOMEM;
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
2222 file_pos + num_bytes, NULL, 0,
2223 1, sizeof(*fi), &extent_inserted);
2224 if (ret)
2225 goto out;
2226
2227 if (!extent_inserted) {
2228 ins.objectid = btrfs_ino(BTRFS_I(inode));
2229 ins.offset = file_pos;
2230 ins.type = BTRFS_EXTENT_DATA_KEY;
2231
2232 path->leave_spinning = 1;
2233 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2234 sizeof(*fi));
2235 if (ret)
2236 goto out;
2237 }
2238 leaf = path->nodes[0];
2239 fi = btrfs_item_ptr(leaf, path->slots[0],
2240 struct btrfs_file_extent_item);
2241 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
2242 btrfs_set_file_extent_type(leaf, fi, extent_type);
2243 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
2244 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
2245 btrfs_set_file_extent_offset(leaf, fi, 0);
2246 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2247 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
2248 btrfs_set_file_extent_compression(leaf, fi, compression);
2249 btrfs_set_file_extent_encryption(leaf, fi, encryption);
2250 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
2251
2252 btrfs_mark_buffer_dirty(leaf);
2253 btrfs_release_path(path);
2254
2255 inode_add_bytes(inode, num_bytes);
2256
2257 ins.objectid = disk_bytenr;
2258 ins.offset = disk_num_bytes;
2259 ins.type = BTRFS_EXTENT_ITEM_KEY;
2260
2261
2262
2263
2264
2265 ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
2266 if (ret < 0)
2267 goto out;
2268 qg_released = ret;
2269 ret = btrfs_alloc_reserved_file_extent(trans, root,
2270 btrfs_ino(BTRFS_I(inode)),
2271 file_pos, qg_released, &ins);
2272out:
2273 btrfs_free_path(path);
2274
2275 return ret;
2276}
2277
2278
2279struct sa_defrag_extent_backref {
2280 struct rb_node node;
2281 struct old_sa_defrag_extent *old;
2282 u64 root_id;
2283 u64 inum;
2284 u64 file_pos;
2285 u64 extent_offset;
2286 u64 num_bytes;
2287 u64 generation;
2288};
2289
2290struct old_sa_defrag_extent {
2291 struct list_head list;
2292 struct new_sa_defrag_extent *new;
2293
2294 u64 extent_offset;
2295 u64 bytenr;
2296 u64 offset;
2297 u64 len;
2298 int count;
2299};
2300
2301struct new_sa_defrag_extent {
2302 struct rb_root root;
2303 struct list_head head;
2304 struct btrfs_path *path;
2305 struct inode *inode;
2306 u64 file_pos;
2307 u64 len;
2308 u64 bytenr;
2309 u64 disk_len;
2310 u8 compress_type;
2311};
2312
2313static int backref_comp(struct sa_defrag_extent_backref *b1,
2314 struct sa_defrag_extent_backref *b2)
2315{
2316 if (b1->root_id < b2->root_id)
2317 return -1;
2318 else if (b1->root_id > b2->root_id)
2319 return 1;
2320
2321 if (b1->inum < b2->inum)
2322 return -1;
2323 else if (b1->inum > b2->inum)
2324 return 1;
2325
2326 if (b1->file_pos < b2->file_pos)
2327 return -1;
2328 else if (b1->file_pos > b2->file_pos)
2329 return 1;
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343 return 0;
2344}
2345
2346static void backref_insert(struct rb_root *root,
2347 struct sa_defrag_extent_backref *backref)
2348{
2349 struct rb_node **p = &root->rb_node;
2350 struct rb_node *parent = NULL;
2351 struct sa_defrag_extent_backref *entry;
2352 int ret;
2353
2354 while (*p) {
2355 parent = *p;
2356 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2357
2358 ret = backref_comp(backref, entry);
2359 if (ret < 0)
2360 p = &(*p)->rb_left;
2361 else
2362 p = &(*p)->rb_right;
2363 }
2364
2365 rb_link_node(&backref->node, parent, p);
2366 rb_insert_color(&backref->node, root);
2367}
2368
2369
2370
2371
2372static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2373 void *ctx)
2374{
2375 struct btrfs_file_extent_item *extent;
2376 struct old_sa_defrag_extent *old = ctx;
2377 struct new_sa_defrag_extent *new = old->new;
2378 struct btrfs_path *path = new->path;
2379 struct btrfs_key key;
2380 struct btrfs_root *root;
2381 struct sa_defrag_extent_backref *backref;
2382 struct extent_buffer *leaf;
2383 struct inode *inode = new->inode;
2384 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2385 int slot;
2386 int ret;
2387 u64 extent_offset;
2388 u64 num_bytes;
2389
2390 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2391 inum == btrfs_ino(BTRFS_I(inode)))
2392 return 0;
2393
2394 key.objectid = root_id;
2395 key.type = BTRFS_ROOT_ITEM_KEY;
2396 key.offset = (u64)-1;
2397
2398 root = btrfs_read_fs_root_no_name(fs_info, &key);
2399 if (IS_ERR(root)) {
2400 if (PTR_ERR(root) == -ENOENT)
2401 return 0;
2402 WARN_ON(1);
2403 btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu",
2404 inum, offset, root_id);
2405 return PTR_ERR(root);
2406 }
2407
2408 key.objectid = inum;
2409 key.type = BTRFS_EXTENT_DATA_KEY;
2410 if (offset > (u64)-1 << 32)
2411 key.offset = 0;
2412 else
2413 key.offset = offset;
2414
2415 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2416 if (WARN_ON(ret < 0))
2417 return ret;
2418 ret = 0;
2419
2420 while (1) {
2421 cond_resched();
2422
2423 leaf = path->nodes[0];
2424 slot = path->slots[0];
2425
2426 if (slot >= btrfs_header_nritems(leaf)) {
2427 ret = btrfs_next_leaf(root, path);
2428 if (ret < 0) {
2429 goto out;
2430 } else if (ret > 0) {
2431 ret = 0;
2432 goto out;
2433 }
2434 continue;
2435 }
2436
2437 path->slots[0]++;
2438
2439 btrfs_item_key_to_cpu(leaf, &key, slot);
2440
2441 if (key.objectid > inum)
2442 goto out;
2443
2444 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2445 continue;
2446
2447 extent = btrfs_item_ptr(leaf, slot,
2448 struct btrfs_file_extent_item);
2449
2450 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2451 continue;
2452
2453
2454
2455
2456
2457
2458 if (key.offset != offset)
2459 continue;
2460
2461 extent_offset = btrfs_file_extent_offset(leaf, extent);
2462 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2463
2464 if (extent_offset >= old->extent_offset + old->offset +
2465 old->len || extent_offset + num_bytes <=
2466 old->extent_offset + old->offset)
2467 continue;
2468 break;
2469 }
2470
2471 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2472 if (!backref) {
2473 ret = -ENOENT;
2474 goto out;
2475 }
2476
2477 backref->root_id = root_id;
2478 backref->inum = inum;
2479 backref->file_pos = offset;
2480 backref->num_bytes = num_bytes;
2481 backref->extent_offset = extent_offset;
2482 backref->generation = btrfs_file_extent_generation(leaf, extent);
2483 backref->old = old;
2484 backref_insert(&new->root, backref);
2485 old->count++;
2486out:
2487 btrfs_release_path(path);
2488 WARN_ON(ret);
2489 return ret;
2490}
2491
2492static noinline bool record_extent_backrefs(struct btrfs_path *path,
2493 struct new_sa_defrag_extent *new)
2494{
2495 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2496 struct old_sa_defrag_extent *old, *tmp;
2497 int ret;
2498
2499 new->path = path;
2500
2501 list_for_each_entry_safe(old, tmp, &new->head, list) {
2502 ret = iterate_inodes_from_logical(old->bytenr +
2503 old->extent_offset, fs_info,
2504 path, record_one_backref,
2505 old, false);
2506 if (ret < 0 && ret != -ENOENT)
2507 return false;
2508
2509
2510 if (!old->count) {
2511 list_del(&old->list);
2512 kfree(old);
2513 }
2514 }
2515
2516 if (list_empty(&new->head))
2517 return false;
2518
2519 return true;
2520}
2521
2522static int relink_is_mergable(struct extent_buffer *leaf,
2523 struct btrfs_file_extent_item *fi,
2524 struct new_sa_defrag_extent *new)
2525{
2526 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2527 return 0;
2528
2529 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2530 return 0;
2531
2532 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2533 return 0;
2534
2535 if (btrfs_file_extent_encryption(leaf, fi) ||
2536 btrfs_file_extent_other_encoding(leaf, fi))
2537 return 0;
2538
2539 return 1;
2540}
2541
2542
2543
2544
2545static noinline int relink_extent_backref(struct btrfs_path *path,
2546 struct sa_defrag_extent_backref *prev,
2547 struct sa_defrag_extent_backref *backref)
2548{
2549 struct btrfs_file_extent_item *extent;
2550 struct btrfs_file_extent_item *item;
2551 struct btrfs_ordered_extent *ordered;
2552 struct btrfs_trans_handle *trans;
2553 struct btrfs_ref ref = { 0 };
2554 struct btrfs_root *root;
2555 struct btrfs_key key;
2556 struct extent_buffer *leaf;
2557 struct old_sa_defrag_extent *old = backref->old;
2558 struct new_sa_defrag_extent *new = old->new;
2559 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2560 struct inode *inode;
2561 struct extent_state *cached = NULL;
2562 int ret = 0;
2563 u64 start;
2564 u64 len;
2565 u64 lock_start;
2566 u64 lock_end;
2567 bool merge = false;
2568 int index;
2569
2570 if (prev && prev->root_id == backref->root_id &&
2571 prev->inum == backref->inum &&
2572 prev->file_pos + prev->num_bytes == backref->file_pos)
2573 merge = true;
2574
2575
2576 key.objectid = backref->root_id;
2577 key.type = BTRFS_ROOT_ITEM_KEY;
2578 key.offset = (u64)-1;
2579
2580 index = srcu_read_lock(&fs_info->subvol_srcu);
2581
2582 root = btrfs_read_fs_root_no_name(fs_info, &key);
2583 if (IS_ERR(root)) {
2584 srcu_read_unlock(&fs_info->subvol_srcu, index);
2585 if (PTR_ERR(root) == -ENOENT)
2586 return 0;
2587 return PTR_ERR(root);
2588 }
2589
2590 if (btrfs_root_readonly(root)) {
2591 srcu_read_unlock(&fs_info->subvol_srcu, index);
2592 return 0;
2593 }
2594
2595
2596 key.objectid = backref->inum;
2597 key.type = BTRFS_INODE_ITEM_KEY;
2598 key.offset = 0;
2599
2600 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2601 if (IS_ERR(inode)) {
2602 srcu_read_unlock(&fs_info->subvol_srcu, index);
2603 return 0;
2604 }
2605
2606 srcu_read_unlock(&fs_info->subvol_srcu, index);
2607
2608
2609 lock_start = backref->file_pos;
2610 lock_end = backref->file_pos + backref->num_bytes - 1;
2611 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2612 &cached);
2613
2614 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2615 if (ordered) {
2616 btrfs_put_ordered_extent(ordered);
2617 goto out_unlock;
2618 }
2619
2620 trans = btrfs_join_transaction(root);
2621 if (IS_ERR(trans)) {
2622 ret = PTR_ERR(trans);
2623 goto out_unlock;
2624 }
2625
2626 key.objectid = backref->inum;
2627 key.type = BTRFS_EXTENT_DATA_KEY;
2628 key.offset = backref->file_pos;
2629
2630 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2631 if (ret < 0) {
2632 goto out_free_path;
2633 } else if (ret > 0) {
2634 ret = 0;
2635 goto out_free_path;
2636 }
2637
2638 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2639 struct btrfs_file_extent_item);
2640
2641 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2642 backref->generation)
2643 goto out_free_path;
2644
2645 btrfs_release_path(path);
2646
2647 start = backref->file_pos;
2648 if (backref->extent_offset < old->extent_offset + old->offset)
2649 start += old->extent_offset + old->offset -
2650 backref->extent_offset;
2651
2652 len = min(backref->extent_offset + backref->num_bytes,
2653 old->extent_offset + old->offset + old->len);
2654 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2655
2656 ret = btrfs_drop_extents(trans, root, inode, start,
2657 start + len, 1);
2658 if (ret)
2659 goto out_free_path;
2660again:
2661 key.objectid = btrfs_ino(BTRFS_I(inode));
2662 key.type = BTRFS_EXTENT_DATA_KEY;
2663 key.offset = start;
2664
2665 path->leave_spinning = 1;
2666 if (merge) {
2667 struct btrfs_file_extent_item *fi;
2668 u64 extent_len;
2669 struct btrfs_key found_key;
2670
2671 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2672 if (ret < 0)
2673 goto out_free_path;
2674
2675 path->slots[0]--;
2676 leaf = path->nodes[0];
2677 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2678
2679 fi = btrfs_item_ptr(leaf, path->slots[0],
2680 struct btrfs_file_extent_item);
2681 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2682
2683 if (extent_len + found_key.offset == start &&
2684 relink_is_mergable(leaf, fi, new)) {
2685 btrfs_set_file_extent_num_bytes(leaf, fi,
2686 extent_len + len);
2687 btrfs_mark_buffer_dirty(leaf);
2688 inode_add_bytes(inode, len);
2689
2690 ret = 1;
2691 goto out_free_path;
2692 } else {
2693 merge = false;
2694 btrfs_release_path(path);
2695 goto again;
2696 }
2697 }
2698
2699 ret = btrfs_insert_empty_item(trans, root, path, &key,
2700 sizeof(*extent));
2701 if (ret) {
2702 btrfs_abort_transaction(trans, ret);
2703 goto out_free_path;
2704 }
2705
2706 leaf = path->nodes[0];
2707 item = btrfs_item_ptr(leaf, path->slots[0],
2708 struct btrfs_file_extent_item);
2709 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2710 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2711 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2712 btrfs_set_file_extent_num_bytes(leaf, item, len);
2713 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2714 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2715 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2716 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2717 btrfs_set_file_extent_encryption(leaf, item, 0);
2718 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2719
2720 btrfs_mark_buffer_dirty(leaf);
2721 inode_add_bytes(inode, len);
2722 btrfs_release_path(path);
2723
2724 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
2725 new->disk_len, 0);
2726 btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
2727 new->file_pos);
2728 ret = btrfs_inc_extent_ref(trans, &ref);
2729 if (ret) {
2730 btrfs_abort_transaction(trans, ret);
2731 goto out_free_path;
2732 }
2733
2734 ret = 1;
2735out_free_path:
2736 btrfs_release_path(path);
2737 path->leave_spinning = 0;
2738 btrfs_end_transaction(trans);
2739out_unlock:
2740 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2741 &cached);
2742 iput(inode);
2743 return ret;
2744}
2745
2746static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2747{
2748 struct old_sa_defrag_extent *old, *tmp;
2749
2750 if (!new)
2751 return;
2752
2753 list_for_each_entry_safe(old, tmp, &new->head, list) {
2754 kfree(old);
2755 }
2756 kfree(new);
2757}
2758
2759static void relink_file_extents(struct new_sa_defrag_extent *new)
2760{
2761 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2762 struct btrfs_path *path;
2763 struct sa_defrag_extent_backref *backref;
2764 struct sa_defrag_extent_backref *prev = NULL;
2765 struct rb_node *node;
2766 int ret;
2767
2768 path = btrfs_alloc_path();
2769 if (!path)
2770 return;
2771
2772 if (!record_extent_backrefs(path, new)) {
2773 btrfs_free_path(path);
2774 goto out;
2775 }
2776 btrfs_release_path(path);
2777
2778 while (1) {
2779 node = rb_first(&new->root);
2780 if (!node)
2781 break;
2782 rb_erase(node, &new->root);
2783
2784 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2785
2786 ret = relink_extent_backref(path, prev, backref);
2787 WARN_ON(ret < 0);
2788
2789 kfree(prev);
2790
2791 if (ret == 1)
2792 prev = backref;
2793 else
2794 prev = NULL;
2795 cond_resched();
2796 }
2797 kfree(prev);
2798
2799 btrfs_free_path(path);
2800out:
2801 free_sa_defrag_extent(new);
2802
2803 atomic_dec(&fs_info->defrag_running);
2804 wake_up(&fs_info->transaction_wait);
2805}
2806
2807static struct new_sa_defrag_extent *
2808record_old_file_extents(struct inode *inode,
2809 struct btrfs_ordered_extent *ordered)
2810{
2811 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2812 struct btrfs_root *root = BTRFS_I(inode)->root;
2813 struct btrfs_path *path;
2814 struct btrfs_key key;
2815 struct old_sa_defrag_extent *old;
2816 struct new_sa_defrag_extent *new;
2817 int ret;
2818
2819 new = kmalloc(sizeof(*new), GFP_NOFS);
2820 if (!new)
2821 return NULL;
2822
2823 new->inode = inode;
2824 new->file_pos = ordered->file_offset;
2825 new->len = ordered->len;
2826 new->bytenr = ordered->start;
2827 new->disk_len = ordered->disk_len;
2828 new->compress_type = ordered->compress_type;
2829 new->root = RB_ROOT;
2830 INIT_LIST_HEAD(&new->head);
2831
2832 path = btrfs_alloc_path();
2833 if (!path)
2834 goto out_kfree;
2835
2836 key.objectid = btrfs_ino(BTRFS_I(inode));
2837 key.type = BTRFS_EXTENT_DATA_KEY;
2838 key.offset = new->file_pos;
2839
2840 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2841 if (ret < 0)
2842 goto out_free_path;
2843 if (ret > 0 && path->slots[0] > 0)
2844 path->slots[0]--;
2845
2846
2847 while (1) {
2848 struct btrfs_file_extent_item *extent;
2849 struct extent_buffer *l;
2850 int slot;
2851 u64 num_bytes;
2852 u64 offset;
2853 u64 end;
2854 u64 disk_bytenr;
2855 u64 extent_offset;
2856
2857 l = path->nodes[0];
2858 slot = path->slots[0];
2859
2860 if (slot >= btrfs_header_nritems(l)) {
2861 ret = btrfs_next_leaf(root, path);
2862 if (ret < 0)
2863 goto out_free_path;
2864 else if (ret > 0)
2865 break;
2866 continue;
2867 }
2868
2869 btrfs_item_key_to_cpu(l, &key, slot);
2870
2871 if (key.objectid != btrfs_ino(BTRFS_I(inode)))
2872 break;
2873 if (key.type != BTRFS_EXTENT_DATA_KEY)
2874 break;
2875 if (key.offset >= new->file_pos + new->len)
2876 break;
2877
2878 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2879
2880 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2881 if (key.offset + num_bytes < new->file_pos)
2882 goto next;
2883
2884 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2885 if (!disk_bytenr)
2886 goto next;
2887
2888 extent_offset = btrfs_file_extent_offset(l, extent);
2889
2890 old = kmalloc(sizeof(*old), GFP_NOFS);
2891 if (!old)
2892 goto out_free_path;
2893
2894 offset = max(new->file_pos, key.offset);
2895 end = min(new->file_pos + new->len, key.offset + num_bytes);
2896
2897 old->bytenr = disk_bytenr;
2898 old->extent_offset = extent_offset;
2899 old->offset = offset - key.offset;
2900 old->len = end - offset;
2901 old->new = new;
2902 old->count = 0;
2903 list_add_tail(&old->list, &new->head);
2904next:
2905 path->slots[0]++;
2906 cond_resched();
2907 }
2908
2909 btrfs_free_path(path);
2910 atomic_inc(&fs_info->defrag_running);
2911
2912 return new;
2913
2914out_free_path:
2915 btrfs_free_path(path);
2916out_kfree:
2917 free_sa_defrag_extent(new);
2918 return NULL;
2919}
2920
2921static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
2922 u64 start, u64 len)
2923{
2924 struct btrfs_block_group_cache *cache;
2925
2926 cache = btrfs_lookup_block_group(fs_info, start);
2927 ASSERT(cache);
2928
2929 spin_lock(&cache->lock);
2930 cache->delalloc_bytes -= len;
2931 spin_unlock(&cache->lock);
2932
2933 btrfs_put_block_group(cache);
2934}
2935
2936
2937
2938
2939
2940static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2941{
2942 struct inode *inode = ordered_extent->inode;
2943 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2944 struct btrfs_root *root = BTRFS_I(inode)->root;
2945 struct btrfs_trans_handle *trans = NULL;
2946 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2947 struct extent_state *cached_state = NULL;
2948 struct new_sa_defrag_extent *new = NULL;
2949 int compress_type = 0;
2950 int ret = 0;
2951 u64 logical_len = ordered_extent->len;
2952 bool nolock;
2953 bool truncated = false;
2954 bool range_locked = false;
2955 bool clear_new_delalloc_bytes = false;
2956 bool clear_reserved_extent = true;
2957
2958 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2959 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
2960 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
2961 clear_new_delalloc_bytes = true;
2962
2963 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
2964
2965 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2966 ret = -EIO;
2967 goto out;
2968 }
2969
2970 btrfs_free_io_failure_record(BTRFS_I(inode),
2971 ordered_extent->file_offset,
2972 ordered_extent->file_offset +
2973 ordered_extent->len - 1);
2974
2975 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2976 truncated = true;
2977 logical_len = ordered_extent->truncated_len;
2978
2979 if (!logical_len)
2980 goto out;
2981 }
2982
2983 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2984 BUG_ON(!list_empty(&ordered_extent->list));
2985
2986
2987
2988
2989
2990
2991 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
2992 ordered_extent->len);
2993 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2994 if (nolock)
2995 trans = btrfs_join_transaction_nolock(root);
2996 else
2997 trans = btrfs_join_transaction(root);
2998 if (IS_ERR(trans)) {
2999 ret = PTR_ERR(trans);
3000 trans = NULL;
3001 goto out;
3002 }
3003 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
3004 ret = btrfs_update_inode_fallback(trans, root, inode);
3005 if (ret)
3006 btrfs_abort_transaction(trans, ret);
3007 goto out;
3008 }
3009
3010 range_locked = true;
3011 lock_extent_bits(io_tree, ordered_extent->file_offset,
3012 ordered_extent->file_offset + ordered_extent->len - 1,
3013 &cached_state);
3014
3015 ret = test_range_bit(io_tree, ordered_extent->file_offset,
3016 ordered_extent->file_offset + ordered_extent->len - 1,
3017 EXTENT_DEFRAG, 0, cached_state);
3018 if (ret) {
3019 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
3020 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
3021
3022 new = record_old_file_extents(inode, ordered_extent);
3023
3024 clear_extent_bit(io_tree, ordered_extent->file_offset,
3025 ordered_extent->file_offset + ordered_extent->len - 1,
3026 EXTENT_DEFRAG, 0, 0, &cached_state);
3027 }
3028
3029 if (nolock)
3030 trans = btrfs_join_transaction_nolock(root);
3031 else
3032 trans = btrfs_join_transaction(root);
3033 if (IS_ERR(trans)) {
3034 ret = PTR_ERR(trans);
3035 trans = NULL;
3036 goto out;
3037 }
3038
3039 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
3040
3041 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
3042 compress_type = ordered_extent->compress_type;
3043 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3044 BUG_ON(compress_type);
3045 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
3046 ordered_extent->len);
3047 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
3048 ordered_extent->file_offset,
3049 ordered_extent->file_offset +
3050 logical_len);
3051 } else {
3052 BUG_ON(root == fs_info->tree_root);
3053 ret = insert_reserved_file_extent(trans, inode,
3054 ordered_extent->file_offset,
3055 ordered_extent->start,
3056 ordered_extent->disk_len,
3057 logical_len, logical_len,
3058 compress_type, 0, 0,
3059 BTRFS_FILE_EXTENT_REG);
3060 if (!ret) {
3061 clear_reserved_extent = false;
3062 btrfs_release_delalloc_bytes(fs_info,
3063 ordered_extent->start,
3064 ordered_extent->disk_len);
3065 }
3066 }
3067 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
3068 ordered_extent->file_offset, ordered_extent->len,
3069 trans->transid);
3070 if (ret < 0) {
3071 btrfs_abort_transaction(trans, ret);
3072 goto out;
3073 }
3074
3075 ret = add_pending_csums(trans, inode, &ordered_extent->list);
3076 if (ret) {
3077 btrfs_abort_transaction(trans, ret);
3078 goto out;
3079 }
3080
3081 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
3082 ret = btrfs_update_inode_fallback(trans, root, inode);
3083 if (ret) {
3084 btrfs_abort_transaction(trans, ret);
3085 goto out;
3086 }
3087 ret = 0;
3088out:
3089 if (range_locked || clear_new_delalloc_bytes) {
3090 unsigned int clear_bits = 0;
3091
3092 if (range_locked)
3093 clear_bits |= EXTENT_LOCKED;
3094 if (clear_new_delalloc_bytes)
3095 clear_bits |= EXTENT_DELALLOC_NEW;
3096 clear_extent_bit(&BTRFS_I(inode)->io_tree,
3097 ordered_extent->file_offset,
3098 ordered_extent->file_offset +
3099 ordered_extent->len - 1,
3100 clear_bits,
3101 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
3102 0, &cached_state);
3103 }
3104
3105 if (trans)
3106 btrfs_end_transaction(trans);
3107
3108 if (ret || truncated) {
3109 u64 start, end;
3110
3111 if (truncated)
3112 start = ordered_extent->file_offset + logical_len;
3113 else
3114 start = ordered_extent->file_offset;
3115 end = ordered_extent->file_offset + ordered_extent->len - 1;
3116 clear_extent_uptodate(io_tree, start, end, NULL);
3117
3118
3119 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131 if ((ret || !logical_len) &&
3132 clear_reserved_extent &&
3133 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3134 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
3135 btrfs_free_reserved_extent(fs_info,
3136 ordered_extent->start,
3137 ordered_extent->disk_len, 1);
3138 }
3139
3140
3141
3142
3143
3144
3145 btrfs_remove_ordered_extent(inode, ordered_extent);
3146
3147
3148 if (new) {
3149 if (ret) {
3150 free_sa_defrag_extent(new);
3151 atomic_dec(&fs_info->defrag_running);
3152 } else {
3153 relink_file_extents(new);
3154 }
3155 }
3156
3157
3158 btrfs_put_ordered_extent(ordered_extent);
3159
3160 btrfs_put_ordered_extent(ordered_extent);
3161
3162 return ret;
3163}
3164
3165static void finish_ordered_fn(struct btrfs_work *work)
3166{
3167 struct btrfs_ordered_extent *ordered_extent;
3168 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3169 btrfs_finish_ordered_io(ordered_extent);
3170}
3171
3172void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
3173 u64 end, int uptodate)
3174{
3175 struct inode *inode = page->mapping->host;
3176 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3177 struct btrfs_ordered_extent *ordered_extent = NULL;
3178 struct btrfs_workqueue *wq;
3179 btrfs_work_func_t func;
3180
3181 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
3182
3183 ClearPagePrivate2(page);
3184 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
3185 end - start + 1, uptodate))
3186 return;
3187
3188 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
3189 wq = fs_info->endio_freespace_worker;
3190 func = btrfs_freespace_write_helper;
3191 } else {
3192 wq = fs_info->endio_write_workers;
3193 func = btrfs_endio_write_helper;
3194 }
3195
3196 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
3197 NULL);
3198 btrfs_queue_work(wq, &ordered_extent->work);
3199}
3200
3201static int __readpage_endio_check(struct inode *inode,
3202 struct btrfs_io_bio *io_bio,
3203 int icsum, struct page *page,
3204 int pgoff, u64 start, size_t len)
3205{
3206 char *kaddr;
3207 u32 csum_expected;
3208 u32 csum = ~(u32)0;
3209
3210 csum_expected = *(((u32 *)io_bio->csum) + icsum);
3211
3212 kaddr = kmap_atomic(page);
3213 csum = btrfs_csum_data(kaddr + pgoff, csum, len);
3214 btrfs_csum_final(csum, (u8 *)&csum);
3215 if (csum != csum_expected)
3216 goto zeroit;
3217
3218 kunmap_atomic(kaddr);
3219 return 0;
3220zeroit:
3221 btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
3222 io_bio->mirror_num);
3223 memset(kaddr + pgoff, 1, len);
3224 flush_dcache_page(page);
3225 kunmap_atomic(kaddr);
3226 return -EIO;
3227}
3228
3229
3230
3231
3232
3233
3234static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
3235 u64 phy_offset, struct page *page,
3236 u64 start, u64 end, int mirror)
3237{
3238 size_t offset = start - page_offset(page);
3239 struct inode *inode = page->mapping->host;
3240 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3241 struct btrfs_root *root = BTRFS_I(inode)->root;
3242
3243 if (PageChecked(page)) {
3244 ClearPageChecked(page);
3245 return 0;
3246 }
3247
3248 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3249 return 0;
3250
3251 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
3252 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
3253 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
3254 return 0;
3255 }
3256
3257 phy_offset >>= inode->i_sb->s_blocksize_bits;
3258 return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
3259 start, (size_t)(end - start + 1));
3260}
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272void btrfs_add_delayed_iput(struct inode *inode)
3273{
3274 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3275 struct btrfs_inode *binode = BTRFS_I(inode);
3276
3277 if (atomic_add_unless(&inode->i_count, -1, 1))
3278 return;
3279
3280 atomic_inc(&fs_info->nr_delayed_iputs);
3281 spin_lock(&fs_info->delayed_iput_lock);
3282 ASSERT(list_empty(&binode->delayed_iput));
3283 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3284 spin_unlock(&fs_info->delayed_iput_lock);
3285 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3286 wake_up_process(fs_info->cleaner_kthread);
3287}
3288
3289void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
3290{
3291
3292 spin_lock(&fs_info->delayed_iput_lock);
3293 while (!list_empty(&fs_info->delayed_iputs)) {
3294 struct btrfs_inode *inode;
3295
3296 inode = list_first_entry(&fs_info->delayed_iputs,
3297 struct btrfs_inode, delayed_iput);
3298 list_del_init(&inode->delayed_iput);
3299 spin_unlock(&fs_info->delayed_iput_lock);
3300 iput(&inode->vfs_inode);
3301 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
3302 wake_up(&fs_info->delayed_iputs_wait);
3303 spin_lock(&fs_info->delayed_iput_lock);
3304 }
3305 spin_unlock(&fs_info->delayed_iput_lock);
3306}
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
3319{
3320 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
3321 atomic_read(&fs_info->nr_delayed_iputs) == 0);
3322 if (ret)
3323 return -EINTR;
3324 return 0;
3325}
3326
3327
3328
3329
3330
3331int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3332 struct btrfs_inode *inode)
3333{
3334 int ret;
3335
3336 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
3337 if (ret && ret != -EEXIST) {
3338 btrfs_abort_transaction(trans, ret);
3339 return ret;
3340 }
3341
3342 return 0;
3343}
3344
3345
3346
3347
3348
3349static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3350 struct btrfs_inode *inode)
3351{
3352 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
3353}
3354
3355
3356
3357
3358
3359int btrfs_orphan_cleanup(struct btrfs_root *root)
3360{
3361 struct btrfs_fs_info *fs_info = root->fs_info;
3362 struct btrfs_path *path;
3363 struct extent_buffer *leaf;
3364 struct btrfs_key key, found_key;
3365 struct btrfs_trans_handle *trans;
3366 struct inode *inode;
3367 u64 last_objectid = 0;
3368 int ret = 0, nr_unlink = 0;
3369
3370 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3371 return 0;
3372
3373 path = btrfs_alloc_path();
3374 if (!path) {
3375 ret = -ENOMEM;
3376 goto out;
3377 }
3378 path->reada = READA_BACK;
3379
3380 key.objectid = BTRFS_ORPHAN_OBJECTID;
3381 key.type = BTRFS_ORPHAN_ITEM_KEY;
3382 key.offset = (u64)-1;
3383
3384 while (1) {
3385 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3386 if (ret < 0)
3387 goto out;
3388
3389
3390
3391
3392
3393
3394 if (ret > 0) {
3395 ret = 0;
3396 if (path->slots[0] == 0)
3397 break;
3398 path->slots[0]--;
3399 }
3400
3401
3402 leaf = path->nodes[0];
3403 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3404
3405
3406 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3407 break;
3408 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3409 break;
3410
3411
3412 btrfs_release_path(path);
3413
3414
3415
3416
3417
3418
3419
3420 if (found_key.offset == last_objectid) {
3421 btrfs_err(fs_info,
3422 "Error removing orphan entry, stopping orphan cleanup");
3423 ret = -EINVAL;
3424 goto out;
3425 }
3426
3427 last_objectid = found_key.offset;
3428
3429 found_key.objectid = found_key.offset;
3430 found_key.type = BTRFS_INODE_ITEM_KEY;
3431 found_key.offset = 0;
3432 inode = btrfs_iget(fs_info->sb, &found_key, root, NULL);
3433 ret = PTR_ERR_OR_ZERO(inode);
3434 if (ret && ret != -ENOENT)
3435 goto out;
3436
3437 if (ret == -ENOENT && root == fs_info->tree_root) {
3438 struct btrfs_root *dead_root;
3439 struct btrfs_fs_info *fs_info = root->fs_info;
3440 int is_dead_root = 0;
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453 spin_lock(&fs_info->trans_lock);
3454 list_for_each_entry(dead_root, &fs_info->dead_roots,
3455 root_list) {
3456 if (dead_root->root_key.objectid ==
3457 found_key.objectid) {
3458 is_dead_root = 1;
3459 break;
3460 }
3461 }
3462 spin_unlock(&fs_info->trans_lock);
3463 if (is_dead_root) {
3464
3465 key.offset = found_key.objectid - 1;
3466 continue;
3467 }
3468
3469 }
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490 if (ret == -ENOENT || inode->i_nlink) {
3491 if (!ret)
3492 iput(inode);
3493 trans = btrfs_start_transaction(root, 1);
3494 if (IS_ERR(trans)) {
3495 ret = PTR_ERR(trans);
3496 goto out;
3497 }
3498 btrfs_debug(fs_info, "auto deleting %Lu",
3499 found_key.objectid);
3500 ret = btrfs_del_orphan_item(trans, root,
3501 found_key.objectid);
3502 btrfs_end_transaction(trans);
3503 if (ret)
3504 goto out;
3505 continue;
3506 }
3507
3508 nr_unlink++;
3509
3510
3511 iput(inode);
3512 }
3513
3514 btrfs_release_path(path);
3515
3516 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3517
3518 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3519 trans = btrfs_join_transaction(root);
3520 if (!IS_ERR(trans))
3521 btrfs_end_transaction(trans);
3522 }
3523
3524 if (nr_unlink)
3525 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3526
3527out:
3528 if (ret)
3529 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3530 btrfs_free_path(path);
3531 return ret;
3532}
3533
3534
3535
3536
3537
3538
3539
3540static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3541 int slot, u64 objectid,
3542 int *first_xattr_slot)
3543{
3544 u32 nritems = btrfs_header_nritems(leaf);
3545 struct btrfs_key found_key;
3546 static u64 xattr_access = 0;
3547 static u64 xattr_default = 0;
3548 int scanned = 0;
3549
3550 if (!xattr_access) {
3551 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3552 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3553 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3554 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3555 }
3556
3557 slot++;
3558 *first_xattr_slot = -1;
3559 while (slot < nritems) {
3560 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3561
3562
3563 if (found_key.objectid != objectid)
3564 return 0;
3565
3566
3567 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3568 if (*first_xattr_slot == -1)
3569 *first_xattr_slot = slot;
3570 if (found_key.offset == xattr_access ||
3571 found_key.offset == xattr_default)
3572 return 1;
3573 }
3574
3575
3576
3577
3578
3579 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3580 return 0;
3581
3582 slot++;
3583 scanned++;
3584
3585
3586
3587
3588
3589
3590
3591 if (scanned >= 8)
3592 break;
3593 }
3594
3595
3596
3597
3598 if (*first_xattr_slot == -1)
3599 *first_xattr_slot = slot;
3600 return 1;
3601}
3602
3603
3604
3605
3606static int btrfs_read_locked_inode(struct inode *inode,
3607 struct btrfs_path *in_path)
3608{
3609 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3610 struct btrfs_path *path = in_path;
3611 struct extent_buffer *leaf;
3612 struct btrfs_inode_item *inode_item;
3613 struct btrfs_root *root = BTRFS_I(inode)->root;
3614 struct btrfs_key location;
3615 unsigned long ptr;
3616 int maybe_acls;
3617 u32 rdev;
3618 int ret;
3619 bool filled = false;
3620 int first_xattr_slot;
3621
3622 ret = btrfs_fill_inode(inode, &rdev);
3623 if (!ret)
3624 filled = true;
3625
3626 if (!path) {
3627 path = btrfs_alloc_path();
3628 if (!path)
3629 return -ENOMEM;
3630 }
3631
3632 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3633
3634 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3635 if (ret) {
3636 if (path != in_path)
3637 btrfs_free_path(path);
3638 return ret;
3639 }
3640
3641 leaf = path->nodes[0];
3642
3643 if (filled)
3644 goto cache_index;
3645
3646 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3647 struct btrfs_inode_item);
3648 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3649 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3650 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3651 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3652 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3653
3654 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3655 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3656
3657 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3658 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3659
3660 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3661 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3662
3663 BTRFS_I(inode)->i_otime.tv_sec =
3664 btrfs_timespec_sec(leaf, &inode_item->otime);
3665 BTRFS_I(inode)->i_otime.tv_nsec =
3666 btrfs_timespec_nsec(leaf, &inode_item->otime);
3667
3668 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3669 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3670 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3671
3672 inode_set_iversion_queried(inode,
3673 btrfs_inode_sequence(leaf, inode_item));
3674 inode->i_generation = BTRFS_I(inode)->generation;
3675 inode->i_rdev = 0;
3676 rdev = btrfs_inode_rdev(leaf, inode_item);
3677
3678 BTRFS_I(inode)->index_cnt = (u64)-1;
3679 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3680
3681cache_index:
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691 if (BTRFS_I(inode)->last_trans == fs_info->generation)
3692 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3693 &BTRFS_I(inode)->runtime_flags);
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713
3714
3715
3716
3717
3718
3719
3720
3721
3722 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3723
3724 path->slots[0]++;
3725 if (inode->i_nlink != 1 ||
3726 path->slots[0] >= btrfs_header_nritems(leaf))
3727 goto cache_acl;
3728
3729 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3730 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
3731 goto cache_acl;
3732
3733 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3734 if (location.type == BTRFS_INODE_REF_KEY) {
3735 struct btrfs_inode_ref *ref;
3736
3737 ref = (struct btrfs_inode_ref *)ptr;
3738 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3739 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3740 struct btrfs_inode_extref *extref;
3741
3742 extref = (struct btrfs_inode_extref *)ptr;
3743 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3744 extref);
3745 }
3746cache_acl:
3747
3748
3749
3750
3751 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3752 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
3753 if (first_xattr_slot != -1) {
3754 path->slots[0] = first_xattr_slot;
3755 ret = btrfs_load_inode_props(inode, path);
3756 if (ret)
3757 btrfs_err(fs_info,
3758 "error loading props for ino %llu (root %llu): %d",
3759 btrfs_ino(BTRFS_I(inode)),
3760 root->root_key.objectid, ret);
3761 }
3762 if (path != in_path)
3763 btrfs_free_path(path);
3764
3765 if (!maybe_acls)
3766 cache_no_acl(inode);
3767
3768 switch (inode->i_mode & S_IFMT) {
3769 case S_IFREG:
3770 inode->i_mapping->a_ops = &btrfs_aops;
3771 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3772 inode->i_fop = &btrfs_file_operations;
3773 inode->i_op = &btrfs_file_inode_operations;
3774 break;
3775 case S_IFDIR:
3776 inode->i_fop = &btrfs_dir_file_operations;
3777 inode->i_op = &btrfs_dir_inode_operations;
3778 break;
3779 case S_IFLNK:
3780 inode->i_op = &btrfs_symlink_inode_operations;
3781 inode_nohighmem(inode);
3782 inode->i_mapping->a_ops = &btrfs_aops;
3783 break;
3784 default:
3785 inode->i_op = &btrfs_special_inode_operations;
3786 init_special_inode(inode, inode->i_mode, rdev);
3787 break;
3788 }
3789
3790 btrfs_sync_inode_flags_to_i_flags(inode);
3791 return 0;
3792}
3793
3794
3795
3796
3797static void fill_inode_item(struct btrfs_trans_handle *trans,
3798 struct extent_buffer *leaf,
3799 struct btrfs_inode_item *item,
3800 struct inode *inode)
3801{
3802 struct btrfs_map_token token;
3803
3804 btrfs_init_map_token(&token);
3805
3806 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3807 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3808 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3809 &token);
3810 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3811 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3812
3813 btrfs_set_token_timespec_sec(leaf, &item->atime,
3814 inode->i_atime.tv_sec, &token);
3815 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3816 inode->i_atime.tv_nsec, &token);
3817
3818 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3819 inode->i_mtime.tv_sec, &token);
3820 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3821 inode->i_mtime.tv_nsec, &token);
3822
3823 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3824 inode->i_ctime.tv_sec, &token);
3825 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3826 inode->i_ctime.tv_nsec, &token);
3827
3828 btrfs_set_token_timespec_sec(leaf, &item->otime,
3829 BTRFS_I(inode)->i_otime.tv_sec, &token);
3830 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3831 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3832
3833 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3834 &token);
3835 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
3836 &token);
3837 btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode),
3838 &token);
3839 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3840 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3841 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3842 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
3843}
3844
3845
3846
3847
3848static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3849 struct btrfs_root *root, struct inode *inode)
3850{
3851 struct btrfs_inode_item *inode_item;
3852 struct btrfs_path *path;
3853 struct extent_buffer *leaf;
3854 int ret;
3855
3856 path = btrfs_alloc_path();
3857 if (!path)
3858 return -ENOMEM;
3859
3860 path->leave_spinning = 1;
3861 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3862 1);
3863 if (ret) {
3864 if (ret > 0)
3865 ret = -ENOENT;
3866 goto failed;
3867 }
3868
3869 leaf = path->nodes[0];
3870 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3871 struct btrfs_inode_item);
3872
3873 fill_inode_item(trans, leaf, inode_item, inode);
3874 btrfs_mark_buffer_dirty(leaf);
3875 btrfs_set_inode_last_trans(trans, inode);
3876 ret = 0;
3877failed:
3878 btrfs_free_path(path);
3879 return ret;
3880}
3881
3882
3883
3884
3885noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3886 struct btrfs_root *root, struct inode *inode)
3887{
3888 struct btrfs_fs_info *fs_info = root->fs_info;
3889 int ret;
3890
3891
3892
3893
3894
3895
3896
3897
3898 if (!btrfs_is_free_space_inode(BTRFS_I(inode))
3899 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3900 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
3901 btrfs_update_root_times(trans, root);
3902
3903 ret = btrfs_delayed_update_inode(trans, root, inode);
3904 if (!ret)
3905 btrfs_set_inode_last_trans(trans, inode);
3906 return ret;
3907 }
3908
3909 return btrfs_update_inode_item(trans, root, inode);
3910}
3911
3912noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3913 struct btrfs_root *root,
3914 struct inode *inode)
3915{
3916 int ret;
3917
3918 ret = btrfs_update_inode(trans, root, inode);
3919 if (ret == -ENOSPC)
3920 return btrfs_update_inode_item(trans, root, inode);
3921 return ret;
3922}
3923
3924
3925
3926
3927
3928
3929static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3930 struct btrfs_root *root,
3931 struct btrfs_inode *dir,
3932 struct btrfs_inode *inode,
3933 const char *name, int name_len)
3934{
3935 struct btrfs_fs_info *fs_info = root->fs_info;
3936 struct btrfs_path *path;
3937 int ret = 0;
3938 struct extent_buffer *leaf;
3939 struct btrfs_dir_item *di;
3940 struct btrfs_key key;
3941 u64 index;
3942 u64 ino = btrfs_ino(inode);
3943 u64 dir_ino = btrfs_ino(dir);
3944
3945 path = btrfs_alloc_path();
3946 if (!path) {
3947 ret = -ENOMEM;
3948 goto out;
3949 }
3950
3951 path->leave_spinning = 1;
3952 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3953 name, name_len, -1);
3954 if (IS_ERR_OR_NULL(di)) {
3955 ret = di ? PTR_ERR(di) : -ENOENT;
3956 goto err;
3957 }
3958 leaf = path->nodes[0];
3959 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3960 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3961 if (ret)
3962 goto err;
3963 btrfs_release_path(path);
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975 if (inode->dir_index) {
3976 ret = btrfs_delayed_delete_inode_ref(inode);
3977 if (!ret) {
3978 index = inode->dir_index;
3979 goto skip_backref;
3980 }
3981 }
3982
3983 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3984 dir_ino, &index);
3985 if (ret) {
3986 btrfs_info(fs_info,
3987 "failed to delete reference to %.*s, inode %llu parent %llu",
3988 name_len, name, ino, dir_ino);
3989 btrfs_abort_transaction(trans, ret);
3990 goto err;
3991 }
3992skip_backref:
3993 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
3994 if (ret) {
3995 btrfs_abort_transaction(trans, ret);
3996 goto err;
3997 }
3998
3999 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
4000 dir_ino);
4001 if (ret != 0 && ret != -ENOENT) {
4002 btrfs_abort_transaction(trans, ret);
4003 goto err;
4004 }
4005
4006 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
4007 index);
4008 if (ret == -ENOENT)
4009 ret = 0;
4010 else if (ret)
4011 btrfs_abort_transaction(trans, ret);
4012err:
4013 btrfs_free_path(path);
4014 if (ret)
4015 goto out;
4016
4017 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
4018 inode_inc_iversion(&inode->vfs_inode);
4019 inode_inc_iversion(&dir->vfs_inode);
4020 inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
4021 dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
4022 ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
4023out:
4024 return ret;
4025}
4026
4027int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4028 struct btrfs_root *root,
4029 struct btrfs_inode *dir, struct btrfs_inode *inode,
4030 const char *name, int name_len)
4031{
4032 int ret;
4033 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
4034 if (!ret) {
4035 drop_nlink(&inode->vfs_inode);
4036 ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
4037 }
4038 return ret;
4039}
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4050{
4051 struct btrfs_root *root = BTRFS_I(dir)->root;
4052
4053
4054
4055
4056
4057
4058
4059
4060 return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
4061}
4062
4063static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4064{
4065 struct btrfs_root *root = BTRFS_I(dir)->root;
4066 struct btrfs_trans_handle *trans;
4067 struct inode *inode = d_inode(dentry);
4068 int ret;
4069
4070 trans = __unlink_start_trans(dir);
4071 if (IS_ERR(trans))
4072 return PTR_ERR(trans);
4073
4074 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
4075 0);
4076
4077 ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4078 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4079 dentry->d_name.len);
4080 if (ret)
4081 goto out;
4082
4083 if (inode->i_nlink == 0) {
4084 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
4085 if (ret)
4086 goto out;
4087 }
4088
4089out:
4090 btrfs_end_transaction(trans);
4091 btrfs_btree_balance_dirty(root->fs_info);
4092 return ret;
4093}
4094
4095static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4096 struct inode *dir, u64 objectid,
4097 const char *name, int name_len)
4098{
4099 struct btrfs_root *root = BTRFS_I(dir)->root;
4100 struct btrfs_path *path;
4101 struct extent_buffer *leaf;
4102 struct btrfs_dir_item *di;
4103 struct btrfs_key key;
4104 u64 index;
4105 int ret;
4106 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
4107
4108 path = btrfs_alloc_path();
4109 if (!path)
4110 return -ENOMEM;
4111
4112 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4113 name, name_len, -1);
4114 if (IS_ERR_OR_NULL(di)) {
4115 ret = di ? PTR_ERR(di) : -ENOENT;
4116 goto out;
4117 }
4118
4119 leaf = path->nodes[0];
4120 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4121 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4122 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4123 if (ret) {
4124 btrfs_abort_transaction(trans, ret);
4125 goto out;
4126 }
4127 btrfs_release_path(path);
4128
4129 ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
4130 dir_ino, &index, name, name_len);
4131 if (ret < 0) {
4132 if (ret != -ENOENT) {
4133 btrfs_abort_transaction(trans, ret);
4134 goto out;
4135 }
4136 di = btrfs_search_dir_index_item(root, path, dir_ino,
4137 name, name_len);
4138 if (IS_ERR_OR_NULL(di)) {
4139 if (!di)
4140 ret = -ENOENT;
4141 else
4142 ret = PTR_ERR(di);
4143 btrfs_abort_transaction(trans, ret);
4144 goto out;
4145 }
4146
4147 leaf = path->nodes[0];
4148 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4149 index = key.offset;
4150 }
4151 btrfs_release_path(path);
4152
4153 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4154 if (ret) {
4155 btrfs_abort_transaction(trans, ret);
4156 goto out;
4157 }
4158
4159 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
4160 inode_inc_iversion(dir);
4161 dir->i_mtime = dir->i_ctime = current_time(dir);
4162 ret = btrfs_update_inode_fallback(trans, root, dir);
4163 if (ret)
4164 btrfs_abort_transaction(trans, ret);
4165out:
4166 btrfs_free_path(path);
4167 return ret;
4168}
4169
4170
4171
4172
4173
4174static noinline int may_destroy_subvol(struct btrfs_root *root)
4175{
4176 struct btrfs_fs_info *fs_info = root->fs_info;
4177 struct btrfs_path *path;
4178 struct btrfs_dir_item *di;
4179 struct btrfs_key key;
4180 u64 dir_id;
4181 int ret;
4182
4183 path = btrfs_alloc_path();
4184 if (!path)
4185 return -ENOMEM;
4186
4187
4188 dir_id = btrfs_super_root_dir(fs_info->super_copy);
4189 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
4190 dir_id, "default", 7, 0);
4191 if (di && !IS_ERR(di)) {
4192 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
4193 if (key.objectid == root->root_key.objectid) {
4194 ret = -EPERM;
4195 btrfs_err(fs_info,
4196 "deleting default subvolume %llu is not allowed",
4197 key.objectid);
4198 goto out;
4199 }
4200 btrfs_release_path(path);
4201 }
4202
4203 key.objectid = root->root_key.objectid;
4204 key.type = BTRFS_ROOT_REF_KEY;
4205 key.offset = (u64)-1;
4206
4207 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4208 if (ret < 0)
4209 goto out;
4210 BUG_ON(ret == 0);
4211
4212 ret = 0;
4213 if (path->slots[0] > 0) {
4214 path->slots[0]--;
4215 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4216 if (key.objectid == root->root_key.objectid &&
4217 key.type == BTRFS_ROOT_REF_KEY)
4218 ret = -ENOTEMPTY;
4219 }
4220out:
4221 btrfs_free_path(path);
4222 return ret;
4223}
4224
4225
4226static void btrfs_prune_dentries(struct btrfs_root *root)
4227{
4228 struct btrfs_fs_info *fs_info = root->fs_info;
4229 struct rb_node *node;
4230 struct rb_node *prev;
4231 struct btrfs_inode *entry;
4232 struct inode *inode;
4233 u64 objectid = 0;
4234
4235 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
4236 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
4237
4238 spin_lock(&root->inode_lock);
4239again:
4240 node = root->inode_tree.rb_node;
4241 prev = NULL;
4242 while (node) {
4243 prev = node;
4244 entry = rb_entry(node, struct btrfs_inode, rb_node);
4245
4246 if (objectid < btrfs_ino(entry))
4247 node = node->rb_left;
4248 else if (objectid > btrfs_ino(entry))
4249 node = node->rb_right;
4250 else
4251 break;
4252 }
4253 if (!node) {
4254 while (prev) {
4255 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4256 if (objectid <= btrfs_ino(entry)) {
4257 node = prev;
4258 break;
4259 }
4260 prev = rb_next(prev);
4261 }
4262 }
4263 while (node) {
4264 entry = rb_entry(node, struct btrfs_inode, rb_node);
4265 objectid = btrfs_ino(entry) + 1;
4266 inode = igrab(&entry->vfs_inode);
4267 if (inode) {
4268 spin_unlock(&root->inode_lock);
4269 if (atomic_read(&inode->i_count) > 1)
4270 d_prune_aliases(inode);
4271
4272
4273
4274
4275 iput(inode);
4276 cond_resched();
4277 spin_lock(&root->inode_lock);
4278 goto again;
4279 }
4280
4281 if (cond_resched_lock(&root->inode_lock))
4282 goto again;
4283
4284 node = rb_next(node);
4285 }
4286 spin_unlock(&root->inode_lock);
4287}
4288
4289int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4290{
4291 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
4292 struct btrfs_root *root = BTRFS_I(dir)->root;
4293 struct inode *inode = d_inode(dentry);
4294 struct btrfs_root *dest = BTRFS_I(inode)->root;
4295 struct btrfs_trans_handle *trans;
4296 struct btrfs_block_rsv block_rsv;
4297 u64 root_flags;
4298 int ret;
4299 int err;
4300
4301
4302
4303
4304
4305
4306 spin_lock(&dest->root_item_lock);
4307 if (dest->send_in_progress) {
4308 spin_unlock(&dest->root_item_lock);
4309 btrfs_warn(fs_info,
4310 "attempt to delete subvolume %llu during send",
4311 dest->root_key.objectid);
4312 return -EPERM;
4313 }
4314 root_flags = btrfs_root_flags(&dest->root_item);
4315 btrfs_set_root_flags(&dest->root_item,
4316 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
4317 spin_unlock(&dest->root_item_lock);
4318
4319 down_write(&fs_info->subvol_sem);
4320
4321 err = may_destroy_subvol(dest);
4322 if (err)
4323 goto out_up_write;
4324
4325 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
4326
4327
4328
4329
4330
4331 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
4332 if (err)
4333 goto out_up_write;
4334
4335 trans = btrfs_start_transaction(root, 0);
4336 if (IS_ERR(trans)) {
4337 err = PTR_ERR(trans);
4338 goto out_release;
4339 }
4340 trans->block_rsv = &block_rsv;
4341 trans->bytes_reserved = block_rsv.size;
4342
4343 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4344
4345 ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
4346 dentry->d_name.name, dentry->d_name.len);
4347 if (ret) {
4348 err = ret;
4349 btrfs_abort_transaction(trans, ret);
4350 goto out_end_trans;
4351 }
4352
4353 btrfs_record_root_in_trans(trans, dest);
4354
4355 memset(&dest->root_item.drop_progress, 0,
4356 sizeof(dest->root_item.drop_progress));
4357 dest->root_item.drop_level = 0;
4358 btrfs_set_root_refs(&dest->root_item, 0);
4359
4360 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4361 ret = btrfs_insert_orphan_item(trans,
4362 fs_info->tree_root,
4363 dest->root_key.objectid);
4364 if (ret) {
4365 btrfs_abort_transaction(trans, ret);
4366 err = ret;
4367 goto out_end_trans;
4368 }
4369 }
4370
4371 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4372 BTRFS_UUID_KEY_SUBVOL,
4373 dest->root_key.objectid);
4374 if (ret && ret != -ENOENT) {
4375 btrfs_abort_transaction(trans, ret);
4376 err = ret;
4377 goto out_end_trans;
4378 }
4379 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4380 ret = btrfs_uuid_tree_remove(trans,
4381 dest->root_item.received_uuid,
4382 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4383 dest->root_key.objectid);
4384 if (ret && ret != -ENOENT) {
4385 btrfs_abort_transaction(trans, ret);
4386 err = ret;
4387 goto out_end_trans;
4388 }
4389 }
4390
4391out_end_trans:
4392 trans->block_rsv = NULL;
4393 trans->bytes_reserved = 0;
4394 ret = btrfs_end_transaction(trans);
4395 if (ret && !err)
4396 err = ret;
4397 inode->i_flags |= S_DEAD;
4398out_release:
4399 btrfs_subvolume_release_metadata(fs_info, &block_rsv);
4400out_up_write:
4401 up_write(&fs_info->subvol_sem);
4402 if (err) {
4403 spin_lock(&dest->root_item_lock);
4404 root_flags = btrfs_root_flags(&dest->root_item);
4405 btrfs_set_root_flags(&dest->root_item,
4406 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4407 spin_unlock(&dest->root_item_lock);
4408 } else {
4409 d_invalidate(dentry);
4410 btrfs_prune_dentries(dest);
4411 ASSERT(dest->send_in_progress == 0);
4412
4413
4414 if (dest->ino_cache_inode) {
4415 iput(dest->ino_cache_inode);
4416 dest->ino_cache_inode = NULL;
4417 }
4418 }
4419
4420 return err;
4421}
4422
4423static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4424{
4425 struct inode *inode = d_inode(dentry);
4426 int err = 0;
4427 struct btrfs_root *root = BTRFS_I(dir)->root;
4428 struct btrfs_trans_handle *trans;
4429 u64 last_unlink_trans;
4430
4431 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4432 return -ENOTEMPTY;
4433 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
4434 return btrfs_delete_subvolume(dir, dentry);
4435
4436 trans = __unlink_start_trans(dir);
4437 if (IS_ERR(trans))
4438 return PTR_ERR(trans);
4439
4440 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4441 err = btrfs_unlink_subvol(trans, dir,
4442 BTRFS_I(inode)->location.objectid,
4443 dentry->d_name.name,
4444 dentry->d_name.len);
4445 goto out;
4446 }
4447
4448 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4449 if (err)
4450 goto out;
4451
4452 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4453
4454
4455 err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4456 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4457 dentry->d_name.len);
4458 if (!err) {
4459 btrfs_i_size_write(BTRFS_I(inode), 0);
4460
4461
4462
4463
4464
4465
4466
4467
4468
4469
4470
4471 if (last_unlink_trans >= trans->transid)
4472 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4473 }
4474out:
4475 btrfs_end_transaction(trans);
4476 btrfs_btree_balance_dirty(root->fs_info);
4477
4478 return err;
4479}
4480
4481
4482
4483
4484
4485#define NEED_TRUNCATE_BLOCK 1
4486
4487
4488
4489
4490
4491
4492
4493
4494
4495
4496
4497
4498int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4499 struct btrfs_root *root,
4500 struct inode *inode,
4501 u64 new_size, u32 min_type)
4502{
4503 struct btrfs_fs_info *fs_info = root->fs_info;
4504 struct btrfs_path *path;
4505 struct extent_buffer *leaf;
4506 struct btrfs_file_extent_item *fi;
4507 struct btrfs_key key;
4508 struct btrfs_key found_key;
4509 u64 extent_start = 0;
4510 u64 extent_num_bytes = 0;
4511 u64 extent_offset = 0;
4512 u64 item_end = 0;
4513 u64 last_size = new_size;
4514 u32 found_type = (u8)-1;
4515 int found_extent;
4516 int del_item;
4517 int pending_del_nr = 0;
4518 int pending_del_slot = 0;
4519 int extent_type = -1;
4520 int ret;
4521 u64 ino = btrfs_ino(BTRFS_I(inode));
4522 u64 bytes_deleted = 0;
4523 bool be_nice = false;
4524 bool should_throttle = false;
4525
4526 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4527
4528
4529
4530
4531
4532 if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
4533 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4534 be_nice = true;
4535
4536 path = btrfs_alloc_path();
4537 if (!path)
4538 return -ENOMEM;
4539 path->reada = READA_BACK;
4540
4541
4542
4543
4544
4545
4546 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4547 root == fs_info->tree_root)
4548 btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
4549 fs_info->sectorsize),
4550 (u64)-1, 0);
4551
4552
4553
4554
4555
4556
4557
4558 if (min_type == 0 && root == BTRFS_I(inode)->root)
4559 btrfs_kill_delayed_inode_items(BTRFS_I(inode));
4560
4561 key.objectid = ino;
4562 key.offset = (u64)-1;
4563 key.type = (u8)-1;
4564
4565search_again:
4566
4567
4568
4569
4570
4571 if (be_nice && bytes_deleted > SZ_32M &&
4572 btrfs_should_end_transaction(trans)) {
4573 ret = -EAGAIN;
4574 goto out;
4575 }
4576
4577 path->leave_spinning = 1;
4578 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4579 if (ret < 0)
4580 goto out;
4581
4582 if (ret > 0) {
4583 ret = 0;
4584
4585
4586
4587 if (path->slots[0] == 0)
4588 goto out;
4589 path->slots[0]--;
4590 }
4591
4592 while (1) {
4593 fi = NULL;
4594 leaf = path->nodes[0];
4595 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4596 found_type = found_key.type;
4597
4598 if (found_key.objectid != ino)
4599 break;
4600
4601 if (found_type < min_type)
4602 break;
4603
4604 item_end = found_key.offset;
4605 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4606 fi = btrfs_item_ptr(leaf, path->slots[0],
4607 struct btrfs_file_extent_item);
4608 extent_type = btrfs_file_extent_type(leaf, fi);
4609 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4610 item_end +=
4611 btrfs_file_extent_num_bytes(leaf, fi);
4612
4613 trace_btrfs_truncate_show_fi_regular(
4614 BTRFS_I(inode), leaf, fi,
4615 found_key.offset);
4616 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4617 item_end += btrfs_file_extent_ram_bytes(leaf,
4618 fi);
4619
4620 trace_btrfs_truncate_show_fi_inline(
4621 BTRFS_I(inode), leaf, fi, path->slots[0],
4622 found_key.offset);
4623 }
4624 item_end--;
4625 }
4626 if (found_type > min_type) {
4627 del_item = 1;
4628 } else {
4629 if (item_end < new_size)
4630 break;
4631 if (found_key.offset >= new_size)
4632 del_item = 1;
4633 else
4634 del_item = 0;
4635 }
4636 found_extent = 0;
4637
4638 if (found_type != BTRFS_EXTENT_DATA_KEY)
4639 goto delete;
4640
4641 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4642 u64 num_dec;
4643 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4644 if (!del_item) {
4645 u64 orig_num_bytes =
4646 btrfs_file_extent_num_bytes(leaf, fi);
4647 extent_num_bytes = ALIGN(new_size -
4648 found_key.offset,
4649 fs_info->sectorsize);
4650 btrfs_set_file_extent_num_bytes(leaf, fi,
4651 extent_num_bytes);
4652 num_dec = (orig_num_bytes -
4653 extent_num_bytes);
4654 if (test_bit(BTRFS_ROOT_REF_COWS,
4655 &root->state) &&
4656 extent_start != 0)
4657 inode_sub_bytes(inode, num_dec);
4658 btrfs_mark_buffer_dirty(leaf);
4659 } else {
4660 extent_num_bytes =
4661 btrfs_file_extent_disk_num_bytes(leaf,
4662 fi);
4663 extent_offset = found_key.offset -
4664 btrfs_file_extent_offset(leaf, fi);
4665
4666
4667 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4668 if (extent_start != 0) {
4669 found_extent = 1;
4670 if (test_bit(BTRFS_ROOT_REF_COWS,
4671 &root->state))
4672 inode_sub_bytes(inode, num_dec);
4673 }
4674 }
4675 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4676
4677
4678
4679
4680 if (!del_item &&
4681 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4682 btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
4683 btrfs_file_extent_compression(leaf, fi) == 0) {
4684 u32 size = (u32)(new_size - found_key.offset);
4685
4686 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4687 size = btrfs_file_extent_calc_inline_size(size);
4688 btrfs_truncate_item(path, size, 1);
4689 } else if (!del_item) {
4690
4691
4692
4693
4694 ret = NEED_TRUNCATE_BLOCK;
4695 break;
4696 }
4697
4698 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4699 inode_sub_bytes(inode, item_end + 1 - new_size);
4700 }
4701delete:
4702 if (del_item)
4703 last_size = found_key.offset;
4704 else
4705 last_size = new_size;
4706 if (del_item) {
4707 if (!pending_del_nr) {
4708
4709 pending_del_slot = path->slots[0];
4710 pending_del_nr = 1;
4711 } else if (pending_del_nr &&
4712 path->slots[0] + 1 == pending_del_slot) {
4713
4714 pending_del_nr++;
4715 pending_del_slot = path->slots[0];
4716 } else {
4717 BUG();
4718 }
4719 } else {
4720 break;
4721 }
4722 should_throttle = false;
4723
4724 if (found_extent &&
4725 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4726 root == fs_info->tree_root)) {
4727 struct btrfs_ref ref = { 0 };
4728
4729 btrfs_set_path_blocking(path);
4730 bytes_deleted += extent_num_bytes;
4731
4732 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
4733 extent_start, extent_num_bytes, 0);
4734 ref.real_root = root->root_key.objectid;
4735 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
4736 ino, extent_offset);
4737 ret = btrfs_free_extent(trans, &ref);
4738 if (ret) {
4739 btrfs_abort_transaction(trans, ret);
4740 break;
4741 }
4742 if (be_nice) {
4743 if (btrfs_should_throttle_delayed_refs(trans))
4744 should_throttle = true;
4745 }
4746 }
4747
4748 if (found_type == BTRFS_INODE_ITEM_KEY)
4749 break;
4750
4751 if (path->slots[0] == 0 ||
4752 path->slots[0] != pending_del_slot ||
4753 should_throttle) {
4754 if (pending_del_nr) {
4755 ret = btrfs_del_items(trans, root, path,
4756 pending_del_slot,
4757 pending_del_nr);
4758 if (ret) {
4759 btrfs_abort_transaction(trans, ret);
4760 break;
4761 }
4762 pending_del_nr = 0;
4763 }
4764 btrfs_release_path(path);
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776 if (should_throttle) {
4777 ret = btrfs_delayed_refs_rsv_refill(fs_info,
4778 BTRFS_RESERVE_NO_FLUSH);
4779 if (ret) {
4780 ret = -EAGAIN;
4781 break;
4782 }
4783 }
4784 goto search_again;
4785 } else {
4786 path->slots[0]--;
4787 }
4788 }
4789out:
4790 if (ret >= 0 && pending_del_nr) {
4791 int err;
4792
4793 err = btrfs_del_items(trans, root, path, pending_del_slot,
4794 pending_del_nr);
4795 if (err) {
4796 btrfs_abort_transaction(trans, err);
4797 ret = err;
4798 }
4799 }
4800 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4801 ASSERT(last_size >= new_size);
4802 if (!ret && last_size > new_size)
4803 last_size = new_size;
4804 btrfs_ordered_update_i_size(inode, last_size, NULL);
4805 }
4806
4807 btrfs_free_path(path);
4808 return ret;
4809}
4810
4811
4812
4813
4814
4815
4816
4817
4818
4819
4820
4821
4822int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4823 int front)
4824{
4825 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4826 struct address_space *mapping = inode->i_mapping;
4827 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4828 struct btrfs_ordered_extent *ordered;
4829 struct extent_state *cached_state = NULL;
4830 struct extent_changeset *data_reserved = NULL;
4831 char *kaddr;
4832 u32 blocksize = fs_info->sectorsize;
4833 pgoff_t index = from >> PAGE_SHIFT;
4834 unsigned offset = from & (blocksize - 1);
4835 struct page *page;
4836 gfp_t mask = btrfs_alloc_write_mask(mapping);
4837 int ret = 0;
4838 u64 block_start;
4839 u64 block_end;
4840
4841 if (IS_ALIGNED(offset, blocksize) &&
4842 (!len || IS_ALIGNED(len, blocksize)))
4843 goto out;
4844
4845 block_start = round_down(from, blocksize);
4846 block_end = block_start + blocksize - 1;
4847
4848 ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
4849 block_start, blocksize);
4850 if (ret)
4851 goto out;
4852
4853again:
4854 page = find_or_create_page(mapping, index, mask);
4855 if (!page) {
4856 btrfs_delalloc_release_space(inode, data_reserved,
4857 block_start, blocksize, true);
4858 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
4859 ret = -ENOMEM;
4860 goto out;
4861 }
4862
4863 if (!PageUptodate(page)) {
4864 ret = btrfs_readpage(NULL, page);
4865 lock_page(page);
4866 if (page->mapping != mapping) {
4867 unlock_page(page);
4868 put_page(page);
4869 goto again;
4870 }
4871 if (!PageUptodate(page)) {
4872 ret = -EIO;
4873 goto out_unlock;
4874 }
4875 }
4876 wait_on_page_writeback(page);
4877
4878 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4879 set_page_extent_mapped(page);
4880
4881 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4882 if (ordered) {
4883 unlock_extent_cached(io_tree, block_start, block_end,
4884 &cached_state);
4885 unlock_page(page);
4886 put_page(page);
4887 btrfs_start_ordered_extent(inode, ordered, 1);
4888 btrfs_put_ordered_extent(ordered);
4889 goto again;
4890 }
4891
4892 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4893 EXTENT_DIRTY | EXTENT_DELALLOC |
4894 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4895 0, 0, &cached_state);
4896
4897 ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
4898 &cached_state, 0);
4899 if (ret) {
4900 unlock_extent_cached(io_tree, block_start, block_end,
4901 &cached_state);
4902 goto out_unlock;
4903 }
4904
4905 if (offset != blocksize) {
4906 if (!len)
4907 len = blocksize - offset;
4908 kaddr = kmap(page);
4909 if (front)
4910 memset(kaddr + (block_start - page_offset(page)),
4911 0, offset);
4912 else
4913 memset(kaddr + (block_start - page_offset(page)) + offset,
4914 0, len);
4915 flush_dcache_page(page);
4916 kunmap(page);
4917 }
4918 ClearPageChecked(page);
4919 set_page_dirty(page);
4920 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
4921
4922out_unlock:
4923 if (ret)
4924 btrfs_delalloc_release_space(inode, data_reserved, block_start,
4925 blocksize, true);
4926 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
4927 unlock_page(page);
4928 put_page(page);
4929out:
4930 extent_changeset_free(data_reserved);
4931 return ret;
4932}
4933
4934static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4935 u64 offset, u64 len)
4936{
4937 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4938 struct btrfs_trans_handle *trans;
4939 int ret;
4940
4941
4942
4943
4944
4945 if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
4946 BTRFS_I(inode)->last_trans = fs_info->generation;
4947 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4948 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4949 return 0;
4950 }
4951
4952
4953
4954
4955
4956
4957 trans = btrfs_start_transaction(root, 3);
4958 if (IS_ERR(trans))
4959 return PTR_ERR(trans);
4960
4961 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4962 if (ret) {
4963 btrfs_abort_transaction(trans, ret);
4964 btrfs_end_transaction(trans);
4965 return ret;
4966 }
4967
4968 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
4969 offset, 0, 0, len, 0, len, 0, 0, 0);
4970 if (ret)
4971 btrfs_abort_transaction(trans, ret);
4972 else
4973 btrfs_update_inode(trans, root, inode);
4974 btrfs_end_transaction(trans);
4975 return ret;
4976}
4977
4978
4979
4980
4981
4982
4983
4984int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4985{
4986 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4987 struct btrfs_root *root = BTRFS_I(inode)->root;
4988 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4989 struct extent_map *em = NULL;
4990 struct extent_state *cached_state = NULL;
4991 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4992 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
4993 u64 block_end = ALIGN(size, fs_info->sectorsize);
4994 u64 last_byte;
4995 u64 cur_offset;
4996 u64 hole_size;
4997 int err = 0;
4998
4999
5000
5001
5002
5003
5004 err = btrfs_truncate_block(inode, oldsize, 0, 0);
5005 if (err)
5006 return err;
5007
5008 if (size <= hole_start)
5009 return 0;
5010
5011 while (1) {
5012 struct btrfs_ordered_extent *ordered;
5013
5014 lock_extent_bits(io_tree, hole_start, block_end - 1,
5015 &cached_state);
5016 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
5017 block_end - hole_start);
5018 if (!ordered)
5019 break;
5020 unlock_extent_cached(io_tree, hole_start, block_end - 1,
5021 &cached_state);
5022 btrfs_start_ordered_extent(inode, ordered, 1);
5023 btrfs_put_ordered_extent(ordered);
5024 }
5025
5026 cur_offset = hole_start;
5027 while (1) {
5028 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
5029 block_end - cur_offset, 0);
5030 if (IS_ERR(em)) {
5031 err = PTR_ERR(em);
5032 em = NULL;
5033 break;
5034 }
5035 last_byte = min(extent_map_end(em), block_end);
5036 last_byte = ALIGN(last_byte, fs_info->sectorsize);
5037 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
5038 struct extent_map *hole_em;
5039 hole_size = last_byte - cur_offset;
5040
5041 err = maybe_insert_hole(root, inode, cur_offset,
5042 hole_size);
5043 if (err)
5044 break;
5045 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
5046 cur_offset + hole_size - 1, 0);
5047 hole_em = alloc_extent_map();
5048 if (!hole_em) {
5049 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
5050 &BTRFS_I(inode)->runtime_flags);
5051 goto next;
5052 }
5053 hole_em->start = cur_offset;
5054 hole_em->len = hole_size;
5055 hole_em->orig_start = cur_offset;
5056
5057 hole_em->block_start = EXTENT_MAP_HOLE;
5058 hole_em->block_len = 0;
5059 hole_em->orig_block_len = 0;
5060 hole_em->ram_bytes = hole_size;
5061 hole_em->bdev = fs_info->fs_devices->latest_bdev;
5062 hole_em->compress_type = BTRFS_COMPRESS_NONE;
5063 hole_em->generation = fs_info->generation;
5064
5065 while (1) {
5066 write_lock(&em_tree->lock);
5067 err = add_extent_mapping(em_tree, hole_em, 1);
5068 write_unlock(&em_tree->lock);
5069 if (err != -EEXIST)
5070 break;
5071 btrfs_drop_extent_cache(BTRFS_I(inode),
5072 cur_offset,
5073 cur_offset +
5074 hole_size - 1, 0);
5075 }
5076 free_extent_map(hole_em);
5077 }
5078next:
5079 free_extent_map(em);
5080 em = NULL;
5081 cur_offset = last_byte;
5082 if (cur_offset >= block_end)
5083 break;
5084 }
5085 free_extent_map(em);
5086 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
5087 return err;
5088}
5089
5090static int btrfs_setsize(struct inode *inode, struct iattr *attr)
5091{
5092 struct btrfs_root *root = BTRFS_I(inode)->root;
5093 struct btrfs_trans_handle *trans;
5094 loff_t oldsize = i_size_read(inode);
5095 loff_t newsize = attr->ia_size;
5096 int mask = attr->ia_valid;
5097 int ret;
5098
5099
5100
5101
5102
5103
5104
5105 if (newsize != oldsize) {
5106 inode_inc_iversion(inode);
5107 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
5108 inode->i_ctime = inode->i_mtime =
5109 current_time(inode);
5110 }
5111
5112 if (newsize > oldsize) {
5113
5114
5115
5116
5117
5118
5119
5120 btrfs_wait_for_snapshot_creation(root);
5121 ret = btrfs_cont_expand(inode, oldsize, newsize);
5122 if (ret) {
5123 btrfs_end_write_no_snapshotting(root);
5124 return ret;
5125 }
5126
5127 trans = btrfs_start_transaction(root, 1);
5128 if (IS_ERR(trans)) {
5129 btrfs_end_write_no_snapshotting(root);
5130 return PTR_ERR(trans);
5131 }
5132
5133 i_size_write(inode, newsize);
5134 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
5135 pagecache_isize_extended(inode, oldsize, newsize);
5136 ret = btrfs_update_inode(trans, root, inode);
5137 btrfs_end_write_no_snapshotting(root);
5138 btrfs_end_transaction(trans);
5139 } else {
5140
5141
5142
5143
5144
5145
5146 if (newsize == 0)
5147 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
5148 &BTRFS_I(inode)->runtime_flags);
5149
5150 truncate_setsize(inode, newsize);
5151
5152
5153 btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
5154 inode_dio_wait(inode);
5155 btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
5156
5157 ret = btrfs_truncate(inode, newsize == oldsize);
5158 if (ret && inode->i_nlink) {
5159 int err;
5160
5161
5162
5163
5164
5165
5166
5167 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5168 if (err)
5169 return err;
5170 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5171 }
5172 }
5173
5174 return ret;
5175}
5176
5177static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
5178{
5179 struct inode *inode = d_inode(dentry);
5180 struct btrfs_root *root = BTRFS_I(inode)->root;
5181 int err;
5182
5183 if (btrfs_root_readonly(root))
5184 return -EROFS;
5185
5186 err = setattr_prepare(dentry, attr);
5187 if (err)
5188 return err;
5189
5190 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5191 err = btrfs_setsize(inode, attr);
5192 if (err)
5193 return err;
5194 }
5195
5196 if (attr->ia_valid) {
5197 setattr_copy(inode, attr);
5198 inode_inc_iversion(inode);
5199 err = btrfs_dirty_inode(inode);
5200
5201 if (!err && attr->ia_valid & ATTR_MODE)
5202 err = posix_acl_chmod(inode, inode->i_mode);
5203 }
5204
5205 return err;
5206}
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220static void evict_inode_truncate_pages(struct inode *inode)
5221{
5222 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5223 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5224 struct rb_node *node;
5225
5226 ASSERT(inode->i_state & I_FREEING);
5227 truncate_inode_pages_final(&inode->i_data);
5228
5229 write_lock(&map_tree->lock);
5230 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
5231 struct extent_map *em;
5232
5233 node = rb_first_cached(&map_tree->map);
5234 em = rb_entry(node, struct extent_map, rb_node);
5235 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5236 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5237 remove_extent_mapping(map_tree, em);
5238 free_extent_map(em);
5239 if (need_resched()) {
5240 write_unlock(&map_tree->lock);
5241 cond_resched();
5242 write_lock(&map_tree->lock);
5243 }
5244 }
5245 write_unlock(&map_tree->lock);
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263 spin_lock(&io_tree->lock);
5264 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5265 struct extent_state *state;
5266 struct extent_state *cached_state = NULL;
5267 u64 start;
5268 u64 end;
5269 unsigned state_flags;
5270
5271 node = rb_first(&io_tree->state);
5272 state = rb_entry(node, struct extent_state, rb_node);
5273 start = state->start;
5274 end = state->end;
5275 state_flags = state->state;
5276 spin_unlock(&io_tree->lock);
5277
5278 lock_extent_bits(io_tree, start, end, &cached_state);
5279
5280
5281
5282
5283
5284
5285
5286
5287
5288 if (state_flags & EXTENT_DELALLOC)
5289 btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
5290
5291 clear_extent_bit(io_tree, start, end,
5292 EXTENT_LOCKED | EXTENT_DIRTY |
5293 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
5294 EXTENT_DEFRAG, 1, 1, &cached_state);
5295
5296 cond_resched();
5297 spin_lock(&io_tree->lock);
5298 }
5299 spin_unlock(&io_tree->lock);
5300}
5301
5302static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5303 struct btrfs_block_rsv *rsv)
5304{
5305 struct btrfs_fs_info *fs_info = root->fs_info;
5306 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5307 u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
5308 int failures = 0;
5309
5310 for (;;) {
5311 struct btrfs_trans_handle *trans;
5312 int ret;
5313
5314 ret = btrfs_block_rsv_refill(root, rsv,
5315 rsv->size + delayed_refs_extra,
5316 BTRFS_RESERVE_FLUSH_LIMIT);
5317
5318 if (ret && ++failures > 2) {
5319 btrfs_warn(fs_info,
5320 "could not allocate space for a delete; will truncate on mount");
5321 return ERR_PTR(-ENOSPC);
5322 }
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336 trans = btrfs_join_transaction(root);
5337 if (IS_ERR(trans) || !ret) {
5338 if (!IS_ERR(trans)) {
5339 trans->block_rsv = &fs_info->trans_block_rsv;
5340 trans->bytes_reserved = delayed_refs_extra;
5341 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5342 delayed_refs_extra, 1);
5343 }
5344 return trans;
5345 }
5346
5347
5348
5349
5350
5351 if (!btrfs_check_space_for_delayed_refs(fs_info) &&
5352 !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0))
5353 return trans;
5354
5355
5356 ret = btrfs_commit_transaction(trans);
5357 if (ret)
5358 return ERR_PTR(ret);
5359 }
5360}
5361
5362void btrfs_evict_inode(struct inode *inode)
5363{
5364 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5365 struct btrfs_trans_handle *trans;
5366 struct btrfs_root *root = BTRFS_I(inode)->root;
5367 struct btrfs_block_rsv *rsv;
5368 int ret;
5369
5370 trace_btrfs_inode_evict(inode);
5371
5372 if (!root) {
5373 clear_inode(inode);
5374 return;
5375 }
5376
5377 evict_inode_truncate_pages(inode);
5378
5379 if (inode->i_nlink &&
5380 ((btrfs_root_refs(&root->root_item) != 0 &&
5381 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5382 btrfs_is_free_space_inode(BTRFS_I(inode))))
5383 goto no_delete;
5384
5385 if (is_bad_inode(inode))
5386 goto no_delete;
5387
5388 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5389
5390 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5391 goto no_delete;
5392
5393 if (inode->i_nlink > 0) {
5394 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5395 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5396 goto no_delete;
5397 }
5398
5399 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5400 if (ret)
5401 goto no_delete;
5402
5403 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5404 if (!rsv)
5405 goto no_delete;
5406 rsv->size = btrfs_calc_trunc_metadata_size(fs_info, 1);
5407 rsv->failfast = 1;
5408
5409 btrfs_i_size_write(BTRFS_I(inode), 0);
5410
5411 while (1) {
5412 trans = evict_refill_and_join(root, rsv);
5413 if (IS_ERR(trans))
5414 goto free_rsv;
5415
5416 trans->block_rsv = rsv;
5417
5418 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5419 trans->block_rsv = &fs_info->trans_block_rsv;
5420 btrfs_end_transaction(trans);
5421 btrfs_btree_balance_dirty(fs_info);
5422 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5423 goto free_rsv;
5424 else if (!ret)
5425 break;
5426 }
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437 trans = evict_refill_and_join(root, rsv);
5438 if (!IS_ERR(trans)) {
5439 trans->block_rsv = rsv;
5440 btrfs_orphan_del(trans, BTRFS_I(inode));
5441 trans->block_rsv = &fs_info->trans_block_rsv;
5442 btrfs_end_transaction(trans);
5443 }
5444
5445 if (!(root == fs_info->tree_root ||
5446 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
5447 btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
5448
5449free_rsv:
5450 btrfs_free_block_rsv(fs_info, rsv);
5451no_delete:
5452
5453
5454
5455
5456
5457 btrfs_remove_delayed_node(BTRFS_I(inode));
5458 clear_inode(inode);
5459}
5460
5461
5462
5463
5464
5465
5466
5467
5468static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5469 struct btrfs_key *location, u8 *type)
5470{
5471 const char *name = dentry->d_name.name;
5472 int namelen = dentry->d_name.len;
5473 struct btrfs_dir_item *di;
5474 struct btrfs_path *path;
5475 struct btrfs_root *root = BTRFS_I(dir)->root;
5476 int ret = 0;
5477
5478 path = btrfs_alloc_path();
5479 if (!path)
5480 return -ENOMEM;
5481
5482 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5483 name, namelen, 0);
5484 if (IS_ERR_OR_NULL(di)) {
5485 ret = di ? PTR_ERR(di) : -ENOENT;
5486 goto out;
5487 }
5488
5489 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5490 if (location->type != BTRFS_INODE_ITEM_KEY &&
5491 location->type != BTRFS_ROOT_ITEM_KEY) {
5492 ret = -EUCLEAN;
5493 btrfs_warn(root->fs_info,
5494"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5495 __func__, name, btrfs_ino(BTRFS_I(dir)),
5496 location->objectid, location->type, location->offset);
5497 }
5498 if (!ret)
5499 *type = btrfs_dir_type(path->nodes[0], di);
5500out:
5501 btrfs_free_path(path);
5502 return ret;
5503}
5504
5505
5506
5507
5508
5509
5510static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5511 struct inode *dir,
5512 struct dentry *dentry,
5513 struct btrfs_key *location,
5514 struct btrfs_root **sub_root)
5515{
5516 struct btrfs_path *path;
5517 struct btrfs_root *new_root;
5518 struct btrfs_root_ref *ref;
5519 struct extent_buffer *leaf;
5520 struct btrfs_key key;
5521 int ret;
5522 int err = 0;
5523
5524 path = btrfs_alloc_path();
5525 if (!path) {
5526 err = -ENOMEM;
5527 goto out;
5528 }
5529
5530 err = -ENOENT;
5531 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5532 key.type = BTRFS_ROOT_REF_KEY;
5533 key.offset = location->objectid;
5534
5535 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5536 if (ret) {
5537 if (ret < 0)
5538 err = ret;
5539 goto out;
5540 }
5541
5542 leaf = path->nodes[0];
5543 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5544 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5545 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5546 goto out;
5547
5548 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5549 (unsigned long)(ref + 1),
5550 dentry->d_name.len);
5551 if (ret)
5552 goto out;
5553
5554 btrfs_release_path(path);
5555
5556 new_root = btrfs_read_fs_root_no_name(fs_info, location);
5557 if (IS_ERR(new_root)) {
5558 err = PTR_ERR(new_root);
5559 goto out;
5560 }
5561
5562 *sub_root = new_root;
5563 location->objectid = btrfs_root_dirid(&new_root->root_item);
5564 location->type = BTRFS_INODE_ITEM_KEY;
5565 location->offset = 0;
5566 err = 0;
5567out:
5568 btrfs_free_path(path);
5569 return err;
5570}
5571
5572static void inode_tree_add(struct inode *inode)
5573{
5574 struct btrfs_root *root = BTRFS_I(inode)->root;
5575 struct btrfs_inode *entry;
5576 struct rb_node **p;
5577 struct rb_node *parent;
5578 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5579 u64 ino = btrfs_ino(BTRFS_I(inode));
5580
5581 if (inode_unhashed(inode))
5582 return;
5583 parent = NULL;
5584 spin_lock(&root->inode_lock);
5585 p = &root->inode_tree.rb_node;
5586 while (*p) {
5587 parent = *p;
5588 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5589
5590 if (ino < btrfs_ino(entry))
5591 p = &parent->rb_left;
5592 else if (ino > btrfs_ino(entry))
5593 p = &parent->rb_right;
5594 else {
5595 WARN_ON(!(entry->vfs_inode.i_state &
5596 (I_WILL_FREE | I_FREEING)));
5597 rb_replace_node(parent, new, &root->inode_tree);
5598 RB_CLEAR_NODE(parent);
5599 spin_unlock(&root->inode_lock);
5600 return;
5601 }
5602 }
5603 rb_link_node(new, parent, p);
5604 rb_insert_color(new, &root->inode_tree);
5605 spin_unlock(&root->inode_lock);
5606}
5607
5608static void inode_tree_del(struct inode *inode)
5609{
5610 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5611 struct btrfs_root *root = BTRFS_I(inode)->root;
5612 int empty = 0;
5613
5614 spin_lock(&root->inode_lock);
5615 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5616 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5617 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5618 empty = RB_EMPTY_ROOT(&root->inode_tree);
5619 }
5620 spin_unlock(&root->inode_lock);
5621
5622 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5623 synchronize_srcu(&fs_info->subvol_srcu);
5624 spin_lock(&root->inode_lock);
5625 empty = RB_EMPTY_ROOT(&root->inode_tree);
5626 spin_unlock(&root->inode_lock);
5627 if (empty)
5628 btrfs_add_dead_root(root);
5629 }
5630}
5631
5632
5633static int btrfs_init_locked_inode(struct inode *inode, void *p)
5634{
5635 struct btrfs_iget_args *args = p;
5636 inode->i_ino = args->location->objectid;
5637 memcpy(&BTRFS_I(inode)->location, args->location,
5638 sizeof(*args->location));
5639 BTRFS_I(inode)->root = args->root;
5640 return 0;
5641}
5642
5643static int btrfs_find_actor(struct inode *inode, void *opaque)
5644{
5645 struct btrfs_iget_args *args = opaque;
5646 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
5647 args->root == BTRFS_I(inode)->root;
5648}
5649
5650static struct inode *btrfs_iget_locked(struct super_block *s,
5651 struct btrfs_key *location,
5652 struct btrfs_root *root)
5653{
5654 struct inode *inode;
5655 struct btrfs_iget_args args;
5656 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
5657
5658 args.location = location;
5659 args.root = root;
5660
5661 inode = iget5_locked(s, hashval, btrfs_find_actor,
5662 btrfs_init_locked_inode,
5663 (void *)&args);
5664 return inode;
5665}
5666
5667
5668
5669
5670struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5671 struct btrfs_root *root, int *new,
5672 struct btrfs_path *path)
5673{
5674 struct inode *inode;
5675
5676 inode = btrfs_iget_locked(s, location, root);
5677 if (!inode)
5678 return ERR_PTR(-ENOMEM);
5679
5680 if (inode->i_state & I_NEW) {
5681 int ret;
5682
5683 ret = btrfs_read_locked_inode(inode, path);
5684 if (!ret) {
5685 inode_tree_add(inode);
5686 unlock_new_inode(inode);
5687 if (new)
5688 *new = 1;
5689 } else {
5690 iget_failed(inode);
5691
5692
5693
5694
5695
5696 if (ret > 0)
5697 ret = -ENOENT;
5698 inode = ERR_PTR(ret);
5699 }
5700 }
5701
5702 return inode;
5703}
5704
5705struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5706 struct btrfs_root *root, int *new)
5707{
5708 return btrfs_iget_path(s, location, root, new, NULL);
5709}
5710
5711static struct inode *new_simple_dir(struct super_block *s,
5712 struct btrfs_key *key,
5713 struct btrfs_root *root)
5714{
5715 struct inode *inode = new_inode(s);
5716
5717 if (!inode)
5718 return ERR_PTR(-ENOMEM);
5719
5720 BTRFS_I(inode)->root = root;
5721 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5722 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5723
5724 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5725 inode->i_op = &btrfs_dir_ro_inode_operations;
5726 inode->i_opflags &= ~IOP_XATTR;
5727 inode->i_fop = &simple_dir_operations;
5728 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5729 inode->i_mtime = current_time(inode);
5730 inode->i_atime = inode->i_mtime;
5731 inode->i_ctime = inode->i_mtime;
5732 BTRFS_I(inode)->i_otime = inode->i_mtime;
5733
5734 return inode;
5735}
5736
5737static inline u8 btrfs_inode_type(struct inode *inode)
5738{
5739
5740
5741
5742
5743 BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
5744 BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
5745 BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
5746 BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
5747 BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
5748 BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
5749 BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
5750 BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
5751
5752 return fs_umode_to_ftype(inode->i_mode);
5753}
5754
5755struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5756{
5757 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5758 struct inode *inode;
5759 struct btrfs_root *root = BTRFS_I(dir)->root;
5760 struct btrfs_root *sub_root = root;
5761 struct btrfs_key location;
5762 u8 di_type = 0;
5763 int index;
5764 int ret = 0;
5765
5766 if (dentry->d_name.len > BTRFS_NAME_LEN)
5767 return ERR_PTR(-ENAMETOOLONG);
5768
5769 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5770 if (ret < 0)
5771 return ERR_PTR(ret);
5772
5773 if (location.type == BTRFS_INODE_ITEM_KEY) {
5774 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5775 if (IS_ERR(inode))
5776 return inode;
5777
5778
5779 if (btrfs_inode_type(inode) != di_type) {
5780 btrfs_crit(fs_info,
5781"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5782 inode->i_mode, btrfs_inode_type(inode),
5783 di_type);
5784 iput(inode);
5785 return ERR_PTR(-EUCLEAN);
5786 }
5787 return inode;
5788 }
5789
5790 index = srcu_read_lock(&fs_info->subvol_srcu);
5791 ret = fixup_tree_root_location(fs_info, dir, dentry,
5792 &location, &sub_root);
5793 if (ret < 0) {
5794 if (ret != -ENOENT)
5795 inode = ERR_PTR(ret);
5796 else
5797 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5798 } else {
5799 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
5800 }
5801 srcu_read_unlock(&fs_info->subvol_srcu, index);
5802
5803 if (!IS_ERR(inode) && root != sub_root) {
5804 down_read(&fs_info->cleanup_work_sem);
5805 if (!sb_rdonly(inode->i_sb))
5806 ret = btrfs_orphan_cleanup(sub_root);
5807 up_read(&fs_info->cleanup_work_sem);
5808 if (ret) {
5809 iput(inode);
5810 inode = ERR_PTR(ret);
5811 }
5812 }
5813
5814 return inode;
5815}
5816
5817static int btrfs_dentry_delete(const struct dentry *dentry)
5818{
5819 struct btrfs_root *root;
5820 struct inode *inode = d_inode(dentry);
5821
5822 if (!inode && !IS_ROOT(dentry))
5823 inode = d_inode(dentry->d_parent);
5824
5825 if (inode) {
5826 root = BTRFS_I(inode)->root;
5827 if (btrfs_root_refs(&root->root_item) == 0)
5828 return 1;
5829
5830 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5831 return 1;
5832 }
5833 return 0;
5834}
5835
5836static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5837 unsigned int flags)
5838{
5839 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
5840
5841 if (inode == ERR_PTR(-ENOENT))
5842 inode = NULL;
5843 return d_splice_alias(inode, dentry);
5844}
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855static int btrfs_opendir(struct inode *inode, struct file *file)
5856{
5857 struct btrfs_file_private *private;
5858
5859 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5860 if (!private)
5861 return -ENOMEM;
5862 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5863 if (!private->filldir_buf) {
5864 kfree(private);
5865 return -ENOMEM;
5866 }
5867 file->private_data = private;
5868 return 0;
5869}
5870
5871struct dir_entry {
5872 u64 ino;
5873 u64 offset;
5874 unsigned type;
5875 int name_len;
5876};
5877
5878static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5879{
5880 while (entries--) {
5881 struct dir_entry *entry = addr;
5882 char *name = (char *)(entry + 1);
5883
5884 ctx->pos = get_unaligned(&entry->offset);
5885 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5886 get_unaligned(&entry->ino),
5887 get_unaligned(&entry->type)))
5888 return 1;
5889 addr += sizeof(struct dir_entry) +
5890 get_unaligned(&entry->name_len);
5891 ctx->pos++;
5892 }
5893 return 0;
5894}
5895
5896static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5897{
5898 struct inode *inode = file_inode(file);
5899 struct btrfs_root *root = BTRFS_I(inode)->root;
5900 struct btrfs_file_private *private = file->private_data;
5901 struct btrfs_dir_item *di;
5902 struct btrfs_key key;
5903 struct btrfs_key found_key;
5904 struct btrfs_path *path;
5905 void *addr;
5906 struct list_head ins_list;
5907 struct list_head del_list;
5908 int ret;
5909 struct extent_buffer *leaf;
5910 int slot;
5911 char *name_ptr;
5912 int name_len;
5913 int entries = 0;
5914 int total_len = 0;
5915 bool put = false;
5916 struct btrfs_key location;
5917
5918 if (!dir_emit_dots(file, ctx))
5919 return 0;
5920
5921 path = btrfs_alloc_path();
5922 if (!path)
5923 return -ENOMEM;
5924
5925 addr = private->filldir_buf;
5926 path->reada = READA_FORWARD;
5927
5928 INIT_LIST_HEAD(&ins_list);
5929 INIT_LIST_HEAD(&del_list);
5930 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5931
5932again:
5933 key.type = BTRFS_DIR_INDEX_KEY;
5934 key.offset = ctx->pos;
5935 key.objectid = btrfs_ino(BTRFS_I(inode));
5936
5937 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5938 if (ret < 0)
5939 goto err;
5940
5941 while (1) {
5942 struct dir_entry *entry;
5943
5944 leaf = path->nodes[0];
5945 slot = path->slots[0];
5946 if (slot >= btrfs_header_nritems(leaf)) {
5947 ret = btrfs_next_leaf(root, path);
5948 if (ret < 0)
5949 goto err;
5950 else if (ret > 0)
5951 break;
5952 continue;
5953 }
5954
5955 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5956
5957 if (found_key.objectid != key.objectid)
5958 break;
5959 if (found_key.type != BTRFS_DIR_INDEX_KEY)
5960 break;
5961 if (found_key.offset < ctx->pos)
5962 goto next;
5963 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
5964 goto next;
5965 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5966 name_len = btrfs_dir_name_len(leaf, di);
5967 if ((total_len + sizeof(struct dir_entry) + name_len) >=
5968 PAGE_SIZE) {
5969 btrfs_release_path(path);
5970 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5971 if (ret)
5972 goto nopos;
5973 addr = private->filldir_buf;
5974 entries = 0;
5975 total_len = 0;
5976 goto again;
5977 }
5978
5979 entry = addr;
5980 put_unaligned(name_len, &entry->name_len);
5981 name_ptr = (char *)(entry + 1);
5982 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
5983 name_len);
5984 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
5985 &entry->type);
5986 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5987 put_unaligned(location.objectid, &entry->ino);
5988 put_unaligned(found_key.offset, &entry->offset);
5989 entries++;
5990 addr += sizeof(struct dir_entry) + name_len;
5991 total_len += sizeof(struct dir_entry) + name_len;
5992next:
5993 path->slots[0]++;
5994 }
5995 btrfs_release_path(path);
5996
5997 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
5998 if (ret)
5999 goto nopos;
6000
6001 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
6002 if (ret)
6003 goto nopos;
6004
6005
6006
6007
6008
6009
6010
6011
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022 if (ctx->pos >= INT_MAX)
6023 ctx->pos = LLONG_MAX;
6024 else
6025 ctx->pos = INT_MAX;
6026nopos:
6027 ret = 0;
6028err:
6029 if (put)
6030 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
6031 btrfs_free_path(path);
6032 return ret;
6033}
6034
6035
6036
6037
6038
6039
6040
6041static int btrfs_dirty_inode(struct inode *inode)
6042{
6043 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6044 struct btrfs_root *root = BTRFS_I(inode)->root;
6045 struct btrfs_trans_handle *trans;
6046 int ret;
6047
6048 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6049 return 0;
6050
6051 trans = btrfs_join_transaction(root);
6052 if (IS_ERR(trans))
6053 return PTR_ERR(trans);
6054
6055 ret = btrfs_update_inode(trans, root, inode);
6056 if (ret && ret == -ENOSPC) {
6057
6058 btrfs_end_transaction(trans);
6059 trans = btrfs_start_transaction(root, 1);
6060 if (IS_ERR(trans))
6061 return PTR_ERR(trans);
6062
6063 ret = btrfs_update_inode(trans, root, inode);
6064 }
6065 btrfs_end_transaction(trans);
6066 if (BTRFS_I(inode)->delayed_node)
6067 btrfs_balance_delayed_items(fs_info);
6068
6069 return ret;
6070}
6071
6072
6073
6074
6075
6076static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
6077 int flags)
6078{
6079 struct btrfs_root *root = BTRFS_I(inode)->root;
6080 bool dirty = flags & ~S_VERSION;
6081
6082 if (btrfs_root_readonly(root))
6083 return -EROFS;
6084
6085 if (flags & S_VERSION)
6086 dirty |= inode_maybe_inc_iversion(inode, dirty);
6087 if (flags & S_CTIME)
6088 inode->i_ctime = *now;
6089 if (flags & S_MTIME)
6090 inode->i_mtime = *now;
6091 if (flags & S_ATIME)
6092 inode->i_atime = *now;
6093 return dirty ? btrfs_dirty_inode(inode) : 0;
6094}
6095
6096
6097
6098
6099
6100
6101static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
6102{
6103 struct btrfs_root *root = inode->root;
6104 struct btrfs_key key, found_key;
6105 struct btrfs_path *path;
6106 struct extent_buffer *leaf;
6107 int ret;
6108
6109 key.objectid = btrfs_ino(inode);
6110 key.type = BTRFS_DIR_INDEX_KEY;
6111 key.offset = (u64)-1;
6112
6113 path = btrfs_alloc_path();
6114 if (!path)
6115 return -ENOMEM;
6116
6117 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6118 if (ret < 0)
6119 goto out;
6120
6121 if (ret == 0)
6122 goto out;
6123 ret = 0;
6124
6125
6126
6127
6128
6129
6130
6131 if (path->slots[0] == 0) {
6132 inode->index_cnt = 2;
6133 goto out;
6134 }
6135
6136 path->slots[0]--;
6137
6138 leaf = path->nodes[0];
6139 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6140
6141 if (found_key.objectid != btrfs_ino(inode) ||
6142 found_key.type != BTRFS_DIR_INDEX_KEY) {
6143 inode->index_cnt = 2;
6144 goto out;
6145 }
6146
6147 inode->index_cnt = found_key.offset + 1;
6148out:
6149 btrfs_free_path(path);
6150 return ret;
6151}
6152
6153
6154
6155
6156
6157int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
6158{
6159 int ret = 0;
6160
6161 if (dir->index_cnt == (u64)-1) {
6162 ret = btrfs_inode_delayed_dir_index_count(dir);
6163 if (ret) {
6164 ret = btrfs_set_inode_index_count(dir);
6165 if (ret)
6166 return ret;
6167 }
6168 }
6169
6170 *index = dir->index_cnt;
6171 dir->index_cnt++;
6172
6173 return ret;
6174}
6175
6176static int btrfs_insert_inode_locked(struct inode *inode)
6177{
6178 struct btrfs_iget_args args;
6179 args.location = &BTRFS_I(inode)->location;
6180 args.root = BTRFS_I(inode)->root;
6181
6182 return insert_inode_locked4(inode,
6183 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6184 btrfs_find_actor, &args);
6185}
6186
6187
6188
6189
6190
6191
6192static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
6193{
6194 unsigned int flags;
6195
6196 if (!dir)
6197 return;
6198
6199 flags = BTRFS_I(dir)->flags;
6200
6201 if (flags & BTRFS_INODE_NOCOMPRESS) {
6202 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
6203 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
6204 } else if (flags & BTRFS_INODE_COMPRESS) {
6205 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
6206 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
6207 }
6208
6209 if (flags & BTRFS_INODE_NODATACOW) {
6210 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
6211 if (S_ISREG(inode->i_mode))
6212 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6213 }
6214
6215 btrfs_sync_inode_flags_to_i_flags(inode);
6216}
6217
6218static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6219 struct btrfs_root *root,
6220 struct inode *dir,
6221 const char *name, int name_len,
6222 u64 ref_objectid, u64 objectid,
6223 umode_t mode, u64 *index)
6224{
6225 struct btrfs_fs_info *fs_info = root->fs_info;
6226 struct inode *inode;
6227 struct btrfs_inode_item *inode_item;
6228 struct btrfs_key *location;
6229 struct btrfs_path *path;
6230 struct btrfs_inode_ref *ref;
6231 struct btrfs_key key[2];
6232 u32 sizes[2];
6233 int nitems = name ? 2 : 1;
6234 unsigned long ptr;
6235 int ret;
6236
6237 path = btrfs_alloc_path();
6238 if (!path)
6239 return ERR_PTR(-ENOMEM);
6240
6241 inode = new_inode(fs_info->sb);
6242 if (!inode) {
6243 btrfs_free_path(path);
6244 return ERR_PTR(-ENOMEM);
6245 }
6246
6247
6248
6249
6250
6251 if (!name)
6252 set_nlink(inode, 0);
6253
6254
6255
6256
6257
6258 inode->i_ino = objectid;
6259
6260 if (dir && name) {
6261 trace_btrfs_inode_request(dir);
6262
6263 ret = btrfs_set_inode_index(BTRFS_I(dir), index);
6264 if (ret) {
6265 btrfs_free_path(path);
6266 iput(inode);
6267 return ERR_PTR(ret);
6268 }
6269 } else if (dir) {
6270 *index = 0;
6271 }
6272
6273
6274
6275
6276
6277 BTRFS_I(inode)->index_cnt = 2;
6278 BTRFS_I(inode)->dir_index = *index;
6279 BTRFS_I(inode)->root = root;
6280 BTRFS_I(inode)->generation = trans->transid;
6281 inode->i_generation = BTRFS_I(inode)->generation;
6282
6283
6284
6285
6286
6287
6288
6289 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6290
6291 key[0].objectid = objectid;
6292 key[0].type = BTRFS_INODE_ITEM_KEY;
6293 key[0].offset = 0;
6294
6295 sizes[0] = sizeof(struct btrfs_inode_item);
6296
6297 if (name) {
6298
6299
6300
6301
6302
6303
6304 key[1].objectid = objectid;
6305 key[1].type = BTRFS_INODE_REF_KEY;
6306 key[1].offset = ref_objectid;
6307
6308 sizes[1] = name_len + sizeof(*ref);
6309 }
6310
6311 location = &BTRFS_I(inode)->location;
6312 location->objectid = objectid;
6313 location->offset = 0;
6314 location->type = BTRFS_INODE_ITEM_KEY;
6315
6316 ret = btrfs_insert_inode_locked(inode);
6317 if (ret < 0) {
6318 iput(inode);
6319 goto fail;
6320 }
6321
6322 path->leave_spinning = 1;
6323 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
6324 if (ret != 0)
6325 goto fail_unlock;
6326
6327 inode_init_owner(inode, dir, mode);
6328 inode_set_bytes(inode, 0);
6329
6330 inode->i_mtime = current_time(inode);
6331 inode->i_atime = inode->i_mtime;
6332 inode->i_ctime = inode->i_mtime;
6333 BTRFS_I(inode)->i_otime = inode->i_mtime;
6334
6335 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6336 struct btrfs_inode_item);
6337 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6338 sizeof(*inode_item));
6339 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6340
6341 if (name) {
6342 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6343 struct btrfs_inode_ref);
6344 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6345 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6346 ptr = (unsigned long)(ref + 1);
6347 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6348 }
6349
6350 btrfs_mark_buffer_dirty(path->nodes[0]);
6351 btrfs_free_path(path);
6352
6353 btrfs_inherit_iflags(inode, dir);
6354
6355 if (S_ISREG(mode)) {
6356 if (btrfs_test_opt(fs_info, NODATASUM))
6357 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6358 if (btrfs_test_opt(fs_info, NODATACOW))
6359 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6360 BTRFS_INODE_NODATASUM;
6361 }
6362
6363 inode_tree_add(inode);
6364
6365 trace_btrfs_inode_new(inode);
6366 btrfs_set_inode_last_trans(trans, inode);
6367
6368 btrfs_update_root_times(trans, root);
6369
6370 ret = btrfs_inode_inherit_props(trans, inode, dir);
6371 if (ret)
6372 btrfs_err(fs_info,
6373 "error inheriting props for ino %llu (root %llu): %d",
6374 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
6375
6376 return inode;
6377
6378fail_unlock:
6379 discard_new_inode(inode);
6380fail:
6381 if (dir && name)
6382 BTRFS_I(dir)->index_cnt--;
6383 btrfs_free_path(path);
6384 return ERR_PTR(ret);
6385}
6386
6387
6388
6389
6390
6391
6392
6393int btrfs_add_link(struct btrfs_trans_handle *trans,
6394 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6395 const char *name, int name_len, int add_backref, u64 index)
6396{
6397 int ret = 0;
6398 struct btrfs_key key;
6399 struct btrfs_root *root = parent_inode->root;
6400 u64 ino = btrfs_ino(inode);
6401 u64 parent_ino = btrfs_ino(parent_inode);
6402
6403 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6404 memcpy(&key, &inode->root->root_key, sizeof(key));
6405 } else {
6406 key.objectid = ino;
6407 key.type = BTRFS_INODE_ITEM_KEY;
6408 key.offset = 0;
6409 }
6410
6411 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6412 ret = btrfs_add_root_ref(trans, key.objectid,
6413 root->root_key.objectid, parent_ino,
6414 index, name, name_len);
6415 } else if (add_backref) {
6416 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6417 parent_ino, index);
6418 }
6419
6420
6421 if (ret)
6422 return ret;
6423
6424 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6425 btrfs_inode_type(&inode->vfs_inode), index);
6426 if (ret == -EEXIST || ret == -EOVERFLOW)
6427 goto fail_dir_item;
6428 else if (ret) {
6429 btrfs_abort_transaction(trans, ret);
6430 return ret;
6431 }
6432
6433 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6434 name_len * 2);
6435 inode_inc_iversion(&parent_inode->vfs_inode);
6436
6437
6438
6439
6440
6441
6442 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6443 struct timespec64 now = current_time(&parent_inode->vfs_inode);
6444
6445 parent_inode->vfs_inode.i_mtime = now;
6446 parent_inode->vfs_inode.i_ctime = now;
6447 }
6448 ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
6449 if (ret)
6450 btrfs_abort_transaction(trans, ret);
6451 return ret;
6452
6453fail_dir_item:
6454 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6455 u64 local_index;
6456 int err;
6457 err = btrfs_del_root_ref(trans, key.objectid,
6458 root->root_key.objectid, parent_ino,
6459 &local_index, name, name_len);
6460 if (err)
6461 btrfs_abort_transaction(trans, err);
6462 } else if (add_backref) {
6463 u64 local_index;
6464 int err;
6465
6466 err = btrfs_del_inode_ref(trans, root, name, name_len,
6467 ino, parent_ino, &local_index);
6468 if (err)
6469 btrfs_abort_transaction(trans, err);
6470 }
6471
6472
6473 return ret;
6474}
6475
6476static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6477 struct btrfs_inode *dir, struct dentry *dentry,
6478 struct btrfs_inode *inode, int backref, u64 index)
6479{
6480 int err = btrfs_add_link(trans, dir, inode,
6481 dentry->d_name.name, dentry->d_name.len,
6482 backref, index);
6483 if (err > 0)
6484 err = -EEXIST;
6485 return err;
6486}
6487
6488static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6489 umode_t mode, dev_t rdev)
6490{
6491 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6492 struct btrfs_trans_handle *trans;
6493 struct btrfs_root *root = BTRFS_I(dir)->root;
6494 struct inode *inode = NULL;
6495 int err;
6496 u64 objectid;
6497 u64 index = 0;
6498
6499
6500
6501
6502
6503
6504 trans = btrfs_start_transaction(root, 5);
6505 if (IS_ERR(trans))
6506 return PTR_ERR(trans);
6507
6508 err = btrfs_find_free_ino(root, &objectid);
6509 if (err)
6510 goto out_unlock;
6511
6512 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6513 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6514 mode, &index);
6515 if (IS_ERR(inode)) {
6516 err = PTR_ERR(inode);
6517 inode = NULL;
6518 goto out_unlock;
6519 }
6520
6521
6522
6523
6524
6525
6526
6527 inode->i_op = &btrfs_special_inode_operations;
6528 init_special_inode(inode, inode->i_mode, rdev);
6529
6530 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6531 if (err)
6532 goto out_unlock;
6533
6534 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6535 0, index);
6536 if (err)
6537 goto out_unlock;
6538
6539 btrfs_update_inode(trans, root, inode);
6540 d_instantiate_new(dentry, inode);
6541
6542out_unlock:
6543 btrfs_end_transaction(trans);
6544 btrfs_btree_balance_dirty(fs_info);
6545 if (err && inode) {
6546 inode_dec_link_count(inode);
6547 discard_new_inode(inode);
6548 }
6549 return err;
6550}
6551
6552static int btrfs_create(struct inode *dir, struct dentry *dentry,
6553 umode_t mode, bool excl)
6554{
6555 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6556 struct btrfs_trans_handle *trans;
6557 struct btrfs_root *root = BTRFS_I(dir)->root;
6558 struct inode *inode = NULL;
6559 int err;
6560 u64 objectid;
6561 u64 index = 0;
6562
6563
6564
6565
6566
6567
6568 trans = btrfs_start_transaction(root, 5);
6569 if (IS_ERR(trans))
6570 return PTR_ERR(trans);
6571
6572 err = btrfs_find_free_ino(root, &objectid);
6573 if (err)
6574 goto out_unlock;
6575
6576 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6577 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6578 mode, &index);
6579 if (IS_ERR(inode)) {
6580 err = PTR_ERR(inode);
6581 inode = NULL;
6582 goto out_unlock;
6583 }
6584
6585
6586
6587
6588
6589
6590 inode->i_fop = &btrfs_file_operations;
6591 inode->i_op = &btrfs_file_inode_operations;
6592 inode->i_mapping->a_ops = &btrfs_aops;
6593
6594 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6595 if (err)
6596 goto out_unlock;
6597
6598 err = btrfs_update_inode(trans, root, inode);
6599 if (err)
6600 goto out_unlock;
6601
6602 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6603 0, index);
6604 if (err)
6605 goto out_unlock;
6606
6607 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6608 d_instantiate_new(dentry, inode);
6609
6610out_unlock:
6611 btrfs_end_transaction(trans);
6612 if (err && inode) {
6613 inode_dec_link_count(inode);
6614 discard_new_inode(inode);
6615 }
6616 btrfs_btree_balance_dirty(fs_info);
6617 return err;
6618}
6619
6620static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6621 struct dentry *dentry)
6622{
6623 struct btrfs_trans_handle *trans = NULL;
6624 struct btrfs_root *root = BTRFS_I(dir)->root;
6625 struct inode *inode = d_inode(old_dentry);
6626 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6627 u64 index;
6628 int err;
6629 int drop_inode = 0;
6630
6631
6632 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6633 return -EXDEV;
6634
6635 if (inode->i_nlink >= BTRFS_LINK_MAX)
6636 return -EMLINK;
6637
6638 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6639 if (err)
6640 goto fail;
6641
6642
6643
6644
6645
6646
6647
6648 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6649 if (IS_ERR(trans)) {
6650 err = PTR_ERR(trans);
6651 trans = NULL;
6652 goto fail;
6653 }
6654
6655
6656 BTRFS_I(inode)->dir_index = 0ULL;
6657 inc_nlink(inode);
6658 inode_inc_iversion(inode);
6659 inode->i_ctime = current_time(inode);
6660 ihold(inode);
6661 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6662
6663 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6664 1, index);
6665
6666 if (err) {
6667 drop_inode = 1;
6668 } else {
6669 struct dentry *parent = dentry->d_parent;
6670 int ret;
6671
6672 err = btrfs_update_inode(trans, root, inode);
6673 if (err)
6674 goto fail;
6675 if (inode->i_nlink == 1) {
6676
6677
6678
6679
6680 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6681 if (err)
6682 goto fail;
6683 }
6684 d_instantiate(dentry, inode);
6685 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6686 true, NULL);
6687 if (ret == BTRFS_NEED_TRANS_COMMIT) {
6688 err = btrfs_commit_transaction(trans);
6689 trans = NULL;
6690 }
6691 }
6692
6693fail:
6694 if (trans)
6695 btrfs_end_transaction(trans);
6696 if (drop_inode) {
6697 inode_dec_link_count(inode);
6698 iput(inode);
6699 }
6700 btrfs_btree_balance_dirty(fs_info);
6701 return err;
6702}
6703
6704static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6705{
6706 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6707 struct inode *inode = NULL;
6708 struct btrfs_trans_handle *trans;
6709 struct btrfs_root *root = BTRFS_I(dir)->root;
6710 int err = 0;
6711 u64 objectid = 0;
6712 u64 index = 0;
6713
6714
6715
6716
6717
6718
6719 trans = btrfs_start_transaction(root, 5);
6720 if (IS_ERR(trans))
6721 return PTR_ERR(trans);
6722
6723 err = btrfs_find_free_ino(root, &objectid);
6724 if (err)
6725 goto out_fail;
6726
6727 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6728 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6729 S_IFDIR | mode, &index);
6730 if (IS_ERR(inode)) {
6731 err = PTR_ERR(inode);
6732 inode = NULL;
6733 goto out_fail;
6734 }
6735
6736
6737 inode->i_op = &btrfs_dir_inode_operations;
6738 inode->i_fop = &btrfs_dir_file_operations;
6739
6740 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6741 if (err)
6742 goto out_fail;
6743
6744 btrfs_i_size_write(BTRFS_I(inode), 0);
6745 err = btrfs_update_inode(trans, root, inode);
6746 if (err)
6747 goto out_fail;
6748
6749 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6750 dentry->d_name.name,
6751 dentry->d_name.len, 0, index);
6752 if (err)
6753 goto out_fail;
6754
6755 d_instantiate_new(dentry, inode);
6756
6757out_fail:
6758 btrfs_end_transaction(trans);
6759 if (err && inode) {
6760 inode_dec_link_count(inode);
6761 discard_new_inode(inode);
6762 }
6763 btrfs_btree_balance_dirty(fs_info);
6764 return err;
6765}
6766
6767static noinline int uncompress_inline(struct btrfs_path *path,
6768 struct page *page,
6769 size_t pg_offset, u64 extent_offset,
6770 struct btrfs_file_extent_item *item)
6771{
6772 int ret;
6773 struct extent_buffer *leaf = path->nodes[0];
6774 char *tmp;
6775 size_t max_size;
6776 unsigned long inline_size;
6777 unsigned long ptr;
6778 int compress_type;
6779
6780 WARN_ON(pg_offset != 0);
6781 compress_type = btrfs_file_extent_compression(leaf, item);
6782 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6783 inline_size = btrfs_file_extent_inline_item_len(leaf,
6784 btrfs_item_nr(path->slots[0]));
6785 tmp = kmalloc(inline_size, GFP_NOFS);
6786 if (!tmp)
6787 return -ENOMEM;
6788 ptr = btrfs_file_extent_inline_start(item);
6789
6790 read_extent_buffer(leaf, tmp, ptr, inline_size);
6791
6792 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6793 ret = btrfs_decompress(compress_type, tmp, page,
6794 extent_offset, inline_size, max_size);
6795
6796
6797
6798
6799
6800
6801
6802
6803
6804 if (max_size + pg_offset < PAGE_SIZE) {
6805 char *map = kmap(page);
6806 memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
6807 kunmap(page);
6808 }
6809 kfree(tmp);
6810 return ret;
6811}
6812
6813
6814
6815
6816
6817
6818
6819
6820
6821struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6822 struct page *page,
6823 size_t pg_offset, u64 start, u64 len,
6824 int create)
6825{
6826 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6827 int ret;
6828 int err = 0;
6829 u64 extent_start = 0;
6830 u64 extent_end = 0;
6831 u64 objectid = btrfs_ino(inode);
6832 int extent_type = -1;
6833 struct btrfs_path *path = NULL;
6834 struct btrfs_root *root = inode->root;
6835 struct btrfs_file_extent_item *item;
6836 struct extent_buffer *leaf;
6837 struct btrfs_key found_key;
6838 struct extent_map *em = NULL;
6839 struct extent_map_tree *em_tree = &inode->extent_tree;
6840 struct extent_io_tree *io_tree = &inode->io_tree;
6841 const bool new_inline = !page || create;
6842
6843 read_lock(&em_tree->lock);
6844 em = lookup_extent_mapping(em_tree, start, len);
6845 if (em)
6846 em->bdev = fs_info->fs_devices->latest_bdev;
6847 read_unlock(&em_tree->lock);
6848
6849 if (em) {
6850 if (em->start > start || em->start + em->len <= start)
6851 free_extent_map(em);
6852 else if (em->block_start == EXTENT_MAP_INLINE && page)
6853 free_extent_map(em);
6854 else
6855 goto out;
6856 }
6857 em = alloc_extent_map();
6858 if (!em) {
6859 err = -ENOMEM;
6860 goto out;
6861 }
6862 em->bdev = fs_info->fs_devices->latest_bdev;
6863 em->start = EXTENT_MAP_HOLE;
6864 em->orig_start = EXTENT_MAP_HOLE;
6865 em->len = (u64)-1;
6866 em->block_len = (u64)-1;
6867
6868 path = btrfs_alloc_path();
6869 if (!path) {
6870 err = -ENOMEM;
6871 goto out;
6872 }
6873
6874
6875 path->reada = READA_FORWARD;
6876
6877
6878
6879
6880
6881 path->leave_spinning = 1;
6882
6883 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
6884 if (ret < 0) {
6885 err = ret;
6886 goto out;
6887 } else if (ret > 0) {
6888 if (path->slots[0] == 0)
6889 goto not_found;
6890 path->slots[0]--;
6891 }
6892
6893 leaf = path->nodes[0];
6894 item = btrfs_item_ptr(leaf, path->slots[0],
6895 struct btrfs_file_extent_item);
6896 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6897 if (found_key.objectid != objectid ||
6898 found_key.type != BTRFS_EXTENT_DATA_KEY) {
6899
6900
6901
6902
6903
6904
6905 extent_end = start;
6906 goto next;
6907 }
6908
6909 extent_type = btrfs_file_extent_type(leaf, item);
6910 extent_start = found_key.offset;
6911 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6912 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6913
6914 if (!S_ISREG(inode->vfs_inode.i_mode)) {
6915 ret = -EUCLEAN;
6916 btrfs_crit(fs_info,
6917 "regular/prealloc extent found for non-regular inode %llu",
6918 btrfs_ino(inode));
6919 goto out;
6920 }
6921 extent_end = extent_start +
6922 btrfs_file_extent_num_bytes(leaf, item);
6923
6924 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
6925 extent_start);
6926 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6927 size_t size;
6928
6929 size = btrfs_file_extent_ram_bytes(leaf, item);
6930 extent_end = ALIGN(extent_start + size,
6931 fs_info->sectorsize);
6932
6933 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
6934 path->slots[0],
6935 extent_start);
6936 }
6937next:
6938 if (start >= extent_end) {
6939 path->slots[0]++;
6940 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6941 ret = btrfs_next_leaf(root, path);
6942 if (ret < 0) {
6943 err = ret;
6944 goto out;
6945 } else if (ret > 0) {
6946 goto not_found;
6947 }
6948 leaf = path->nodes[0];
6949 }
6950 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6951 if (found_key.objectid != objectid ||
6952 found_key.type != BTRFS_EXTENT_DATA_KEY)
6953 goto not_found;
6954 if (start + len <= found_key.offset)
6955 goto not_found;
6956 if (start > found_key.offset)
6957 goto next;
6958
6959
6960 em->start = start;
6961 em->orig_start = start;
6962 em->len = found_key.offset - start;
6963 em->block_start = EXTENT_MAP_HOLE;
6964 goto insert;
6965 }
6966
6967 btrfs_extent_item_to_extent_map(inode, path, item,
6968 new_inline, em);
6969
6970 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6971 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6972 goto insert;
6973 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6974 unsigned long ptr;
6975 char *map;
6976 size_t size;
6977 size_t extent_offset;
6978 size_t copy_size;
6979
6980 if (new_inline)
6981 goto out;
6982
6983 size = btrfs_file_extent_ram_bytes(leaf, item);
6984 extent_offset = page_offset(page) + pg_offset - extent_start;
6985 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
6986 size - extent_offset);
6987 em->start = extent_start + extent_offset;
6988 em->len = ALIGN(copy_size, fs_info->sectorsize);
6989 em->orig_block_len = em->len;
6990 em->orig_start = em->start;
6991 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6992
6993 btrfs_set_path_blocking(path);
6994 if (!PageUptodate(page)) {
6995 if (btrfs_file_extent_compression(leaf, item) !=
6996 BTRFS_COMPRESS_NONE) {
6997 ret = uncompress_inline(path, page, pg_offset,
6998 extent_offset, item);
6999 if (ret) {
7000 err = ret;
7001 goto out;
7002 }
7003 } else {
7004 map = kmap(page);
7005 read_extent_buffer(leaf, map + pg_offset, ptr,
7006 copy_size);
7007 if (pg_offset + copy_size < PAGE_SIZE) {
7008 memset(map + pg_offset + copy_size, 0,
7009 PAGE_SIZE - pg_offset -
7010 copy_size);
7011 }
7012 kunmap(page);
7013 }
7014 flush_dcache_page(page);
7015 }
7016 set_extent_uptodate(io_tree, em->start,
7017 extent_map_end(em) - 1, NULL, GFP_NOFS);
7018 goto insert;
7019 }
7020not_found:
7021 em->start = start;
7022 em->orig_start = start;
7023 em->len = len;
7024 em->block_start = EXTENT_MAP_HOLE;
7025insert:
7026 btrfs_release_path(path);
7027 if (em->start > start || extent_map_end(em) <= start) {
7028 btrfs_err(fs_info,
7029 "bad extent! em: [%llu %llu] passed [%llu %llu]",
7030 em->start, em->len, start, len);
7031 err = -EIO;
7032 goto out;
7033 }
7034
7035 err = 0;
7036 write_lock(&em_tree->lock);
7037 err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
7038 write_unlock(&em_tree->lock);
7039out:
7040 btrfs_free_path(path);
7041
7042 trace_btrfs_get_extent(root, inode, em);
7043
7044 if (err) {
7045 free_extent_map(em);
7046 return ERR_PTR(err);
7047 }
7048 BUG_ON(!em);
7049 return em;
7050}
7051
7052struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
7053 u64 start, u64 len)
7054{
7055 struct extent_map *em;
7056 struct extent_map *hole_em = NULL;
7057 u64 delalloc_start = start;
7058 u64 end;
7059 u64 delalloc_len;
7060 u64 delalloc_end;
7061 int err = 0;
7062
7063 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7064 if (IS_ERR(em))
7065 return em;
7066
7067
7068
7069
7070
7071
7072 if (em->block_start != EXTENT_MAP_HOLE &&
7073 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7074 return em;
7075 else
7076 hole_em = em;
7077
7078
7079 end = start + len;
7080 if (end < start)
7081 end = (u64)-1;
7082 else
7083 end -= 1;
7084
7085 em = NULL;
7086
7087
7088 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
7089 end, len, EXTENT_DELALLOC, 1);
7090 delalloc_end = delalloc_start + delalloc_len;
7091 if (delalloc_end < delalloc_start)
7092 delalloc_end = (u64)-1;
7093
7094
7095
7096
7097
7098 if (delalloc_start > end || delalloc_end <= start) {
7099 em = hole_em;
7100 hole_em = NULL;
7101 goto out;
7102 }
7103
7104
7105
7106
7107
7108 delalloc_start = max(start, delalloc_start);
7109 delalloc_len = delalloc_end - delalloc_start;
7110
7111 if (delalloc_len > 0) {
7112 u64 hole_start;
7113 u64 hole_len;
7114 const u64 hole_end = extent_map_end(hole_em);
7115
7116 em = alloc_extent_map();
7117 if (!em) {
7118 err = -ENOMEM;
7119 goto out;
7120 }
7121 em->bdev = NULL;
7122
7123 ASSERT(hole_em);
7124
7125
7126
7127
7128
7129
7130
7131 if (hole_end <= start || hole_em->start > end) {
7132 free_extent_map(hole_em);
7133 hole_em = NULL;
7134 } else {
7135 hole_start = max(hole_em->start, start);
7136 hole_len = hole_end - hole_start;
7137 }
7138
7139 if (hole_em && delalloc_start > hole_start) {
7140
7141
7142
7143
7144
7145 em->len = min(hole_len, delalloc_start - hole_start);
7146 em->start = hole_start;
7147 em->orig_start = hole_start;
7148
7149
7150
7151
7152 em->block_start = hole_em->block_start;
7153 em->block_len = hole_len;
7154 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7155 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7156 } else {
7157
7158
7159
7160
7161 em->start = delalloc_start;
7162 em->len = delalloc_len;
7163 em->orig_start = delalloc_start;
7164 em->block_start = EXTENT_MAP_DELALLOC;
7165 em->block_len = delalloc_len;
7166 }
7167 } else {
7168 return hole_em;
7169 }
7170out:
7171
7172 free_extent_map(hole_em);
7173 if (err) {
7174 free_extent_map(em);
7175 return ERR_PTR(err);
7176 }
7177 return em;
7178}
7179
7180static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
7181 const u64 start,
7182 const u64 len,
7183 const u64 orig_start,
7184 const u64 block_start,
7185 const u64 block_len,
7186 const u64 orig_block_len,
7187 const u64 ram_bytes,
7188 const int type)
7189{
7190 struct extent_map *em = NULL;
7191 int ret;
7192
7193 if (type != BTRFS_ORDERED_NOCOW) {
7194 em = create_io_em(inode, start, len, orig_start,
7195 block_start, block_len, orig_block_len,
7196 ram_bytes,
7197 BTRFS_COMPRESS_NONE,
7198 type);
7199 if (IS_ERR(em))
7200 goto out;
7201 }
7202 ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
7203 len, block_len, type);
7204 if (ret) {
7205 if (em) {
7206 free_extent_map(em);
7207 btrfs_drop_extent_cache(BTRFS_I(inode), start,
7208 start + len - 1, 0);
7209 }
7210 em = ERR_PTR(ret);
7211 }
7212 out:
7213
7214 return em;
7215}
7216
7217static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7218 u64 start, u64 len)
7219{
7220 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7221 struct btrfs_root *root = BTRFS_I(inode)->root;
7222 struct extent_map *em;
7223 struct btrfs_key ins;
7224 u64 alloc_hint;
7225 int ret;
7226
7227 alloc_hint = get_extent_allocation_hint(inode, start, len);
7228 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
7229 0, alloc_hint, &ins, 1, 1);
7230 if (ret)
7231 return ERR_PTR(ret);
7232
7233 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7234 ins.objectid, ins.offset, ins.offset,
7235 ins.offset, BTRFS_ORDERED_REGULAR);
7236 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
7237 if (IS_ERR(em))
7238 btrfs_free_reserved_extent(fs_info, ins.objectid,
7239 ins.offset, 1);
7240
7241 return em;
7242}
7243
7244
7245
7246
7247
7248noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7249 u64 *orig_start, u64 *orig_block_len,
7250 u64 *ram_bytes)
7251{
7252 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7253 struct btrfs_path *path;
7254 int ret;
7255 struct extent_buffer *leaf;
7256 struct btrfs_root *root = BTRFS_I(inode)->root;
7257 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7258 struct btrfs_file_extent_item *fi;
7259 struct btrfs_key key;
7260 u64 disk_bytenr;
7261 u64 backref_offset;
7262 u64 extent_end;
7263 u64 num_bytes;
7264 int slot;
7265 int found_type;
7266 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7267
7268 path = btrfs_alloc_path();
7269 if (!path)
7270 return -ENOMEM;
7271
7272 ret = btrfs_lookup_file_extent(NULL, root, path,
7273 btrfs_ino(BTRFS_I(inode)), offset, 0);
7274 if (ret < 0)
7275 goto out;
7276
7277 slot = path->slots[0];
7278 if (ret == 1) {
7279 if (slot == 0) {
7280
7281 ret = 0;
7282 goto out;
7283 }
7284 slot--;
7285 }
7286 ret = 0;
7287 leaf = path->nodes[0];
7288 btrfs_item_key_to_cpu(leaf, &key, slot);
7289 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7290 key.type != BTRFS_EXTENT_DATA_KEY) {
7291
7292 goto out;
7293 }
7294
7295 if (key.offset > offset) {
7296
7297 goto out;
7298 }
7299
7300 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7301 found_type = btrfs_file_extent_type(leaf, fi);
7302 if (found_type != BTRFS_FILE_EXTENT_REG &&
7303 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7304
7305 goto out;
7306 }
7307
7308 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7309 goto out;
7310
7311 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7312 if (extent_end <= offset)
7313 goto out;
7314
7315 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7316 if (disk_bytenr == 0)
7317 goto out;
7318
7319 if (btrfs_file_extent_compression(leaf, fi) ||
7320 btrfs_file_extent_encryption(leaf, fi) ||
7321 btrfs_file_extent_other_encoding(leaf, fi))
7322 goto out;
7323
7324
7325
7326
7327
7328 if (btrfs_file_extent_generation(leaf, fi) <=
7329 btrfs_root_last_snapshot(&root->root_item))
7330 goto out;
7331
7332 backref_offset = btrfs_file_extent_offset(leaf, fi);
7333
7334 if (orig_start) {
7335 *orig_start = key.offset - backref_offset;
7336 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7337 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7338 }
7339
7340 if (btrfs_extent_readonly(fs_info, disk_bytenr))
7341 goto out;
7342
7343 num_bytes = min(offset + *len, extent_end) - offset;
7344 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7345 u64 range_end;
7346
7347 range_end = round_up(offset + num_bytes,
7348 root->fs_info->sectorsize) - 1;
7349 ret = test_range_bit(io_tree, offset, range_end,
7350 EXTENT_DELALLOC, 0, NULL);
7351 if (ret) {
7352 ret = -EAGAIN;
7353 goto out;
7354 }
7355 }
7356
7357 btrfs_release_path(path);
7358
7359
7360
7361
7362
7363
7364 ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
7365 key.offset - backref_offset, disk_bytenr);
7366 if (ret) {
7367 ret = 0;
7368 goto out;
7369 }
7370
7371
7372
7373
7374
7375
7376
7377 disk_bytenr += backref_offset;
7378 disk_bytenr += offset - key.offset;
7379 if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
7380 goto out;
7381
7382
7383
7384
7385 *len = num_bytes;
7386 ret = 1;
7387out:
7388 btrfs_free_path(path);
7389 return ret;
7390}
7391
7392static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7393 struct extent_state **cached_state, int writing)
7394{
7395 struct btrfs_ordered_extent *ordered;
7396 int ret = 0;
7397
7398 while (1) {
7399 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7400 cached_state);
7401
7402
7403
7404
7405
7406 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7407 lockend - lockstart + 1);
7408
7409
7410
7411
7412
7413
7414
7415
7416 if (!ordered &&
7417 (!writing || !filemap_range_has_page(inode->i_mapping,
7418 lockstart, lockend)))
7419 break;
7420
7421 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7422 cached_state);
7423
7424 if (ordered) {
7425
7426
7427
7428
7429
7430
7431
7432
7433
7434
7435
7436
7437
7438
7439
7440 if (writing ||
7441 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7442 btrfs_start_ordered_extent(inode, ordered, 1);
7443 else
7444 ret = -ENOTBLK;
7445 btrfs_put_ordered_extent(ordered);
7446 } else {
7447
7448
7449
7450
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460 ret = -ENOTBLK;
7461 }
7462
7463 if (ret)
7464 break;
7465
7466 cond_resched();
7467 }
7468
7469 return ret;
7470}
7471
7472
7473static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
7474 u64 orig_start, u64 block_start,
7475 u64 block_len, u64 orig_block_len,
7476 u64 ram_bytes, int compress_type,
7477 int type)
7478{
7479 struct extent_map_tree *em_tree;
7480 struct extent_map *em;
7481 struct btrfs_root *root = BTRFS_I(inode)->root;
7482 int ret;
7483
7484 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7485 type == BTRFS_ORDERED_COMPRESSED ||
7486 type == BTRFS_ORDERED_NOCOW ||
7487 type == BTRFS_ORDERED_REGULAR);
7488
7489 em_tree = &BTRFS_I(inode)->extent_tree;
7490 em = alloc_extent_map();
7491 if (!em)
7492 return ERR_PTR(-ENOMEM);
7493
7494 em->start = start;
7495 em->orig_start = orig_start;
7496 em->len = len;
7497 em->block_len = block_len;
7498 em->block_start = block_start;
7499 em->bdev = root->fs_info->fs_devices->latest_bdev;
7500 em->orig_block_len = orig_block_len;
7501 em->ram_bytes = ram_bytes;
7502 em->generation = -1;
7503 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7504 if (type == BTRFS_ORDERED_PREALLOC) {
7505 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7506 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7507 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7508 em->compress_type = compress_type;
7509 }
7510
7511 do {
7512 btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
7513 em->start + em->len - 1, 0);
7514 write_lock(&em_tree->lock);
7515 ret = add_extent_mapping(em_tree, em, 1);
7516 write_unlock(&em_tree->lock);
7517
7518
7519
7520
7521 } while (ret == -EEXIST);
7522
7523 if (ret) {
7524 free_extent_map(em);
7525 return ERR_PTR(ret);
7526 }
7527
7528
7529 return em;
7530}
7531
7532
7533static int btrfs_get_blocks_direct_read(struct extent_map *em,
7534 struct buffer_head *bh_result,
7535 struct inode *inode,
7536 u64 start, u64 len)
7537{
7538 if (em->block_start == EXTENT_MAP_HOLE ||
7539 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7540 return -ENOENT;
7541
7542 len = min(len, em->len - (start - em->start));
7543
7544 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7545 inode->i_blkbits;
7546 bh_result->b_size = len;
7547 bh_result->b_bdev = em->bdev;
7548 set_buffer_mapped(bh_result);
7549
7550 return 0;
7551}
7552
7553static int btrfs_get_blocks_direct_write(struct extent_map **map,
7554 struct buffer_head *bh_result,
7555 struct inode *inode,
7556 struct btrfs_dio_data *dio_data,
7557 u64 start, u64 len)
7558{
7559 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7560 struct extent_map *em = *map;
7561 int ret = 0;
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7573 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7574 em->block_start != EXTENT_MAP_HOLE)) {
7575 int type;
7576 u64 block_start, orig_start, orig_block_len, ram_bytes;
7577
7578 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7579 type = BTRFS_ORDERED_PREALLOC;
7580 else
7581 type = BTRFS_ORDERED_NOCOW;
7582 len = min(len, em->len - (start - em->start));
7583 block_start = em->block_start + (start - em->start);
7584
7585 if (can_nocow_extent(inode, start, &len, &orig_start,
7586 &orig_block_len, &ram_bytes) == 1 &&
7587 btrfs_inc_nocow_writers(fs_info, block_start)) {
7588 struct extent_map *em2;
7589
7590 em2 = btrfs_create_dio_extent(inode, start, len,
7591 orig_start, block_start,
7592 len, orig_block_len,
7593 ram_bytes, type);
7594 btrfs_dec_nocow_writers(fs_info, block_start);
7595 if (type == BTRFS_ORDERED_PREALLOC) {
7596 free_extent_map(em);
7597 *map = em = em2;
7598 }
7599
7600 if (em2 && IS_ERR(em2)) {
7601 ret = PTR_ERR(em2);
7602 goto out;
7603 }
7604
7605
7606
7607
7608
7609 btrfs_free_reserved_data_space_noquota(inode, start,
7610 len);
7611 goto skip_cow;
7612 }
7613 }
7614
7615
7616 len = bh_result->b_size;
7617 free_extent_map(em);
7618 *map = em = btrfs_new_extent_direct(inode, start, len);
7619 if (IS_ERR(em)) {
7620 ret = PTR_ERR(em);
7621 goto out;
7622 }
7623
7624 len = min(len, em->len - (start - em->start));
7625
7626skip_cow:
7627 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7628 inode->i_blkbits;
7629 bh_result->b_size = len;
7630 bh_result->b_bdev = em->bdev;
7631 set_buffer_mapped(bh_result);
7632
7633 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7634 set_buffer_new(bh_result);
7635
7636
7637
7638
7639
7640 if (!dio_data->overwrite && start + len > i_size_read(inode))
7641 i_size_write(inode, start + len);
7642
7643 WARN_ON(dio_data->reserve < len);
7644 dio_data->reserve -= len;
7645 dio_data->unsubmitted_oe_range_end = start + len;
7646 current->journal_info = dio_data;
7647out:
7648 return ret;
7649}
7650
7651static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7652 struct buffer_head *bh_result, int create)
7653{
7654 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7655 struct extent_map *em;
7656 struct extent_state *cached_state = NULL;
7657 struct btrfs_dio_data *dio_data = NULL;
7658 u64 start = iblock << inode->i_blkbits;
7659 u64 lockstart, lockend;
7660 u64 len = bh_result->b_size;
7661 int unlock_bits = EXTENT_LOCKED;
7662 int ret = 0;
7663
7664 if (create)
7665 unlock_bits |= EXTENT_DIRTY;
7666 else
7667 len = min_t(u64, len, fs_info->sectorsize);
7668
7669 lockstart = start;
7670 lockend = start + len - 1;
7671
7672 if (current->journal_info) {
7673
7674
7675
7676
7677
7678 dio_data = current->journal_info;
7679 current->journal_info = NULL;
7680 }
7681
7682
7683
7684
7685
7686 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7687 create)) {
7688 ret = -ENOTBLK;
7689 goto err;
7690 }
7691
7692 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
7693 if (IS_ERR(em)) {
7694 ret = PTR_ERR(em);
7695 goto unlock_err;
7696 }
7697
7698
7699
7700
7701
7702
7703
7704
7705
7706
7707
7708
7709
7710
7711
7712 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7713 em->block_start == EXTENT_MAP_INLINE) {
7714 free_extent_map(em);
7715 ret = -ENOTBLK;
7716 goto unlock_err;
7717 }
7718
7719 if (create) {
7720 ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
7721 dio_data, start, len);
7722 if (ret < 0)
7723 goto unlock_err;
7724
7725
7726 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7727 unlock_bits, 1, 0, &cached_state);
7728 } else {
7729 ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
7730 start, len);
7731
7732 if (ret < 0) {
7733 ret = 0;
7734 free_extent_map(em);
7735 goto unlock_err;
7736 }
7737
7738
7739
7740
7741 lockstart = start + bh_result->b_size;
7742 if (lockstart < lockend) {
7743 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7744 lockend, unlock_bits, 1, 0,
7745 &cached_state);
7746 } else {
7747 free_extent_state(cached_state);
7748 }
7749 }
7750
7751 free_extent_map(em);
7752
7753 return 0;
7754
7755unlock_err:
7756 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7757 unlock_bits, 1, 0, &cached_state);
7758err:
7759 if (dio_data)
7760 current->journal_info = dio_data;
7761 return ret;
7762}
7763
7764static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
7765 struct bio *bio,
7766 int mirror_num)
7767{
7768 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7769 blk_status_t ret;
7770
7771 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
7772
7773 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR);
7774 if (ret)
7775 return ret;
7776
7777 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
7778
7779 return ret;
7780}
7781
7782static int btrfs_check_dio_repairable(struct inode *inode,
7783 struct bio *failed_bio,
7784 struct io_failure_record *failrec,
7785 int failed_mirror)
7786{
7787 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7788 int num_copies;
7789
7790 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
7791 if (num_copies == 1) {
7792
7793
7794
7795
7796
7797 btrfs_debug(fs_info,
7798 "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
7799 num_copies, failrec->this_mirror, failed_mirror);
7800 return 0;
7801 }
7802
7803 failrec->failed_mirror = failed_mirror;
7804 failrec->this_mirror++;
7805 if (failrec->this_mirror == failed_mirror)
7806 failrec->this_mirror++;
7807
7808 if (failrec->this_mirror > num_copies) {
7809 btrfs_debug(fs_info,
7810 "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
7811 num_copies, failrec->this_mirror, failed_mirror);
7812 return 0;
7813 }
7814
7815 return 1;
7816}
7817
7818static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
7819 struct page *page, unsigned int pgoff,
7820 u64 start, u64 end, int failed_mirror,
7821 bio_end_io_t *repair_endio, void *repair_arg)
7822{
7823 struct io_failure_record *failrec;
7824 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7825 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
7826 struct bio *bio;
7827 int isector;
7828 unsigned int read_mode = 0;
7829 int segs;
7830 int ret;
7831 blk_status_t status;
7832 struct bio_vec bvec;
7833
7834 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
7835
7836 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7837 if (ret)
7838 return errno_to_blk_status(ret);
7839
7840 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7841 failed_mirror);
7842 if (!ret) {
7843 free_io_failure(failure_tree, io_tree, failrec);
7844 return BLK_STS_IOERR;
7845 }
7846
7847 segs = bio_segments(failed_bio);
7848 bio_get_first_bvec(failed_bio, &bvec);
7849 if (segs > 1 ||
7850 (bvec.bv_len > btrfs_inode_sectorsize(inode)))
7851 read_mode |= REQ_FAILFAST_DEV;
7852
7853 isector = start - btrfs_io_bio(failed_bio)->logical;
7854 isector >>= inode->i_sb->s_blocksize_bits;
7855 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7856 pgoff, isector, repair_endio, repair_arg);
7857 bio->bi_opf = REQ_OP_READ | read_mode;
7858
7859 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7860 "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
7861 read_mode, failrec->this_mirror, failrec->in_validation);
7862
7863 status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
7864 if (status) {
7865 free_io_failure(failure_tree, io_tree, failrec);
7866 bio_put(bio);
7867 }
7868
7869 return status;
7870}
7871
7872struct btrfs_retry_complete {
7873 struct completion done;
7874 struct inode *inode;
7875 u64 start;
7876 int uptodate;
7877};
7878
7879static void btrfs_retry_endio_nocsum(struct bio *bio)
7880{
7881 struct btrfs_retry_complete *done = bio->bi_private;
7882 struct inode *inode = done->inode;
7883 struct bio_vec *bvec;
7884 struct extent_io_tree *io_tree, *failure_tree;
7885 struct bvec_iter_all iter_all;
7886
7887 if (bio->bi_status)
7888 goto end;
7889
7890 ASSERT(bio->bi_vcnt == 1);
7891 io_tree = &BTRFS_I(inode)->io_tree;
7892 failure_tree = &BTRFS_I(inode)->io_failure_tree;
7893 ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode));
7894
7895 done->uptodate = 1;
7896 ASSERT(!bio_flagged(bio, BIO_CLONED));
7897 bio_for_each_segment_all(bvec, bio, iter_all)
7898 clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
7899 io_tree, done->start, bvec->bv_page,
7900 btrfs_ino(BTRFS_I(inode)), 0);
7901end:
7902 complete(&done->done);
7903 bio_put(bio);
7904}
7905
7906static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
7907 struct btrfs_io_bio *io_bio)
7908{
7909 struct btrfs_fs_info *fs_info;
7910 struct bio_vec bvec;
7911 struct bvec_iter iter;
7912 struct btrfs_retry_complete done;
7913 u64 start;
7914 unsigned int pgoff;
7915 u32 sectorsize;
7916 int nr_sectors;
7917 blk_status_t ret;
7918 blk_status_t err = BLK_STS_OK;
7919
7920 fs_info = BTRFS_I(inode)->root->fs_info;
7921 sectorsize = fs_info->sectorsize;
7922
7923 start = io_bio->logical;
7924 done.inode = inode;
7925 io_bio->bio.bi_iter = io_bio->iter;
7926
7927 bio_for_each_segment(bvec, &io_bio->bio, iter) {
7928 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
7929 pgoff = bvec.bv_offset;
7930
7931next_block_or_try_again:
7932 done.uptodate = 0;
7933 done.start = start;
7934 init_completion(&done.done);
7935
7936 ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
7937 pgoff, start, start + sectorsize - 1,
7938 io_bio->mirror_num,
7939 btrfs_retry_endio_nocsum, &done);
7940 if (ret) {
7941 err = ret;
7942 goto next;
7943 }
7944
7945 wait_for_completion_io(&done.done);
7946
7947 if (!done.uptodate) {
7948
7949 goto next_block_or_try_again;
7950 }
7951
7952next:
7953 start += sectorsize;
7954
7955 nr_sectors--;
7956 if (nr_sectors) {
7957 pgoff += sectorsize;
7958 ASSERT(pgoff < PAGE_SIZE);
7959 goto next_block_or_try_again;
7960 }
7961 }
7962
7963 return err;
7964}
7965
7966static void btrfs_retry_endio(struct bio *bio)
7967{
7968 struct btrfs_retry_complete *done = bio->bi_private;
7969 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7970 struct extent_io_tree *io_tree, *failure_tree;
7971 struct inode *inode = done->inode;
7972 struct bio_vec *bvec;
7973 int uptodate;
7974 int ret;
7975 int i = 0;
7976 struct bvec_iter_all iter_all;
7977
7978 if (bio->bi_status)
7979 goto end;
7980
7981 uptodate = 1;
7982
7983 ASSERT(bio->bi_vcnt == 1);
7984 ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode));
7985
7986 io_tree = &BTRFS_I(inode)->io_tree;
7987 failure_tree = &BTRFS_I(inode)->io_failure_tree;
7988
7989 ASSERT(!bio_flagged(bio, BIO_CLONED));
7990 bio_for_each_segment_all(bvec, bio, iter_all) {
7991 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
7992 bvec->bv_offset, done->start,
7993 bvec->bv_len);
7994 if (!ret)
7995 clean_io_failure(BTRFS_I(inode)->root->fs_info,
7996 failure_tree, io_tree, done->start,
7997 bvec->bv_page,
7998 btrfs_ino(BTRFS_I(inode)),
7999 bvec->bv_offset);
8000 else
8001 uptodate = 0;
8002 i++;
8003 }
8004
8005 done->uptodate = uptodate;
8006end:
8007 complete(&done->done);
8008 bio_put(bio);
8009}
8010
8011static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
8012 struct btrfs_io_bio *io_bio, blk_status_t err)
8013{
8014 struct btrfs_fs_info *fs_info;
8015 struct bio_vec bvec;
8016 struct bvec_iter iter;
8017 struct btrfs_retry_complete done;
8018 u64 start;
8019 u64 offset = 0;
8020 u32 sectorsize;
8021 int nr_sectors;
8022 unsigned int pgoff;
8023 int csum_pos;
8024 bool uptodate = (err == 0);
8025 int ret;
8026 blk_status_t status;
8027
8028 fs_info = BTRFS_I(inode)->root->fs_info;
8029 sectorsize = fs_info->sectorsize;
8030
8031 err = BLK_STS_OK;
8032 start = io_bio->logical;
8033 done.inode = inode;
8034 io_bio->bio.bi_iter = io_bio->iter;
8035
8036 bio_for_each_segment(bvec, &io_bio->bio, iter) {
8037 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
8038
8039 pgoff = bvec.bv_offset;
8040next_block:
8041 if (uptodate) {
8042 csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
8043 ret = __readpage_endio_check(inode, io_bio, csum_pos,
8044 bvec.bv_page, pgoff, start, sectorsize);
8045 if (likely(!ret))
8046 goto next;
8047 }
8048try_again:
8049 done.uptodate = 0;
8050 done.start = start;
8051 init_completion(&done.done);
8052
8053 status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
8054 pgoff, start, start + sectorsize - 1,
8055 io_bio->mirror_num, btrfs_retry_endio,
8056 &done);
8057 if (status) {
8058 err = status;
8059 goto next;
8060 }
8061
8062 wait_for_completion_io(&done.done);
8063
8064 if (!done.uptodate) {
8065
8066 goto try_again;
8067 }
8068next:
8069 offset += sectorsize;
8070 start += sectorsize;
8071
8072 ASSERT(nr_sectors);
8073
8074 nr_sectors--;
8075 if (nr_sectors) {
8076 pgoff += sectorsize;
8077 ASSERT(pgoff < PAGE_SIZE);
8078 goto next_block;
8079 }
8080 }
8081
8082 return err;
8083}
8084
8085static blk_status_t btrfs_subio_endio_read(struct inode *inode,
8086 struct btrfs_io_bio *io_bio, blk_status_t err)
8087{
8088 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8089
8090 if (skip_csum) {
8091 if (unlikely(err))
8092 return __btrfs_correct_data_nocsum(inode, io_bio);
8093 else
8094 return BLK_STS_OK;
8095 } else {
8096 return __btrfs_subio_endio_read(inode, io_bio, err);
8097 }
8098}
8099
8100static void btrfs_endio_direct_read(struct bio *bio)
8101{
8102 struct btrfs_dio_private *dip = bio->bi_private;
8103 struct inode *inode = dip->inode;
8104 struct bio *dio_bio;
8105 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8106 blk_status_t err = bio->bi_status;
8107
8108 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8109 err = btrfs_subio_endio_read(inode, io_bio, err);
8110
8111 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8112 dip->logical_offset + dip->bytes - 1);
8113 dio_bio = dip->dio_bio;
8114
8115 kfree(dip);
8116
8117 dio_bio->bi_status = err;
8118 dio_end_io(dio_bio);
8119 btrfs_io_bio_free_csum(io_bio);
8120 bio_put(bio);
8121}
8122
8123static void __endio_write_update_ordered(struct inode *inode,
8124 const u64 offset, const u64 bytes,
8125 const bool uptodate)
8126{
8127 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8128 struct btrfs_ordered_extent *ordered = NULL;
8129 struct btrfs_workqueue *wq;
8130 btrfs_work_func_t func;
8131 u64 ordered_offset = offset;
8132 u64 ordered_bytes = bytes;
8133 u64 last_offset;
8134
8135 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
8136 wq = fs_info->endio_freespace_worker;
8137 func = btrfs_freespace_write_helper;
8138 } else {
8139 wq = fs_info->endio_write_workers;
8140 func = btrfs_endio_write_helper;
8141 }
8142
8143 while (ordered_offset < offset + bytes) {
8144 last_offset = ordered_offset;
8145 if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
8146 &ordered_offset,
8147 ordered_bytes,
8148 uptodate)) {
8149 btrfs_init_work(&ordered->work, func,
8150 finish_ordered_fn,
8151 NULL, NULL);
8152 btrfs_queue_work(wq, &ordered->work);
8153 }
8154
8155
8156
8157
8158 if (ordered_offset == last_offset)
8159 return;
8160
8161
8162
8163
8164 if (ordered_offset < offset + bytes) {
8165 ordered_bytes = offset + bytes - ordered_offset;
8166 ordered = NULL;
8167 }
8168 }
8169}
8170
8171static void btrfs_endio_direct_write(struct bio *bio)
8172{
8173 struct btrfs_dio_private *dip = bio->bi_private;
8174 struct bio *dio_bio = dip->dio_bio;
8175
8176 __endio_write_update_ordered(dip->inode, dip->logical_offset,
8177 dip->bytes, !bio->bi_status);
8178
8179 kfree(dip);
8180
8181 dio_bio->bi_status = bio->bi_status;
8182 dio_end_io(dio_bio);
8183 bio_put(bio);
8184}
8185
8186static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
8187 struct bio *bio, u64 offset)
8188{
8189 struct inode *inode = private_data;
8190 blk_status_t ret;
8191 ret = btrfs_csum_one_bio(inode, bio, offset, 1);
8192 BUG_ON(ret);
8193 return 0;
8194}
8195
8196static void btrfs_end_dio_bio(struct bio *bio)
8197{
8198 struct btrfs_dio_private *dip = bio->bi_private;
8199 blk_status_t err = bio->bi_status;
8200
8201 if (err)
8202 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8203 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
8204 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
8205 bio->bi_opf,
8206 (unsigned long long)bio->bi_iter.bi_sector,
8207 bio->bi_iter.bi_size, err);
8208
8209 if (dip->subio_endio)
8210 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
8211
8212 if (err) {
8213
8214
8215
8216
8217
8218
8219 dip->errors = 1;
8220 }
8221
8222
8223 if (!atomic_dec_and_test(&dip->pending_bios))
8224 goto out;
8225
8226 if (dip->errors) {
8227 bio_io_error(dip->orig_bio);
8228 } else {
8229 dip->dio_bio->bi_status = BLK_STS_OK;
8230 bio_endio(dip->orig_bio);
8231 }
8232out:
8233 bio_put(bio);
8234}
8235
8236static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
8237 struct btrfs_dio_private *dip,
8238 struct bio *bio,
8239 u64 file_offset)
8240{
8241 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8242 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8243 blk_status_t ret;
8244
8245
8246
8247
8248
8249
8250 if (dip->logical_offset == file_offset) {
8251 ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio,
8252 file_offset);
8253 if (ret)
8254 return ret;
8255 }
8256
8257 if (bio == dip->orig_bio)
8258 return 0;
8259
8260 file_offset -= dip->logical_offset;
8261 file_offset >>= inode->i_sb->s_blocksize_bits;
8262 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
8263
8264 return 0;
8265}
8266
8267static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
8268 struct inode *inode, u64 file_offset, int async_submit)
8269{
8270 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8271 struct btrfs_dio_private *dip = bio->bi_private;
8272 bool write = bio_op(bio) == REQ_OP_WRITE;
8273 blk_status_t ret;
8274
8275
8276 if (async_submit)
8277 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
8278
8279 if (!write) {
8280 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
8281 if (ret)
8282 goto err;
8283 }
8284
8285 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
8286 goto map;
8287
8288 if (write && async_submit) {
8289 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
8290 file_offset, inode,
8291 btrfs_submit_bio_start_direct_io);
8292 goto err;
8293 } else if (write) {
8294
8295
8296
8297
8298 ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
8299 if (ret)
8300 goto err;
8301 } else {
8302 ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio,
8303 file_offset);
8304 if (ret)
8305 goto err;
8306 }
8307map:
8308 ret = btrfs_map_bio(fs_info, bio, 0, 0);
8309err:
8310 return ret;
8311}
8312
8313static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
8314{
8315 struct inode *inode = dip->inode;
8316 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8317 struct bio *bio;
8318 struct bio *orig_bio = dip->orig_bio;
8319 u64 start_sector = orig_bio->bi_iter.bi_sector;
8320 u64 file_offset = dip->logical_offset;
8321 u64 map_length;
8322 int async_submit = 0;
8323 u64 submit_len;
8324 int clone_offset = 0;
8325 int clone_len;
8326 int ret;
8327 blk_status_t status;
8328
8329 map_length = orig_bio->bi_iter.bi_size;
8330 submit_len = map_length;
8331 ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
8332 &map_length, NULL, 0);
8333 if (ret)
8334 return -EIO;
8335
8336 if (map_length >= submit_len) {
8337 bio = orig_bio;
8338 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
8339 goto submit;
8340 }
8341
8342
8343 if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK)
8344 async_submit = 0;
8345 else
8346 async_submit = 1;
8347
8348
8349 ASSERT(map_length <= INT_MAX);
8350 atomic_inc(&dip->pending_bios);
8351 do {
8352 clone_len = min_t(int, submit_len, map_length);
8353
8354
8355
8356
8357
8358 bio = btrfs_bio_clone_partial(orig_bio, clone_offset,
8359 clone_len);
8360 bio->bi_private = dip;
8361 bio->bi_end_io = btrfs_end_dio_bio;
8362 btrfs_io_bio(bio)->logical = file_offset;
8363
8364 ASSERT(submit_len >= clone_len);
8365 submit_len -= clone_len;
8366 if (submit_len == 0)
8367 break;
8368
8369
8370
8371
8372
8373
8374
8375 atomic_inc(&dip->pending_bios);
8376
8377 status = btrfs_submit_dio_bio(bio, inode, file_offset,
8378 async_submit);
8379 if (status) {
8380 bio_put(bio);
8381 atomic_dec(&dip->pending_bios);
8382 goto out_err;
8383 }
8384
8385 clone_offset += clone_len;
8386 start_sector += clone_len >> 9;
8387 file_offset += clone_len;
8388
8389 map_length = submit_len;
8390 ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
8391 start_sector << 9, &map_length, NULL, 0);
8392 if (ret)
8393 goto out_err;
8394 } while (submit_len > 0);
8395
8396submit:
8397 status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
8398 if (!status)
8399 return 0;
8400
8401 bio_put(bio);
8402out_err:
8403 dip->errors = 1;
8404
8405
8406
8407
8408
8409
8410 if (atomic_dec_and_test(&dip->pending_bios))
8411 bio_io_error(dip->orig_bio);
8412
8413
8414 return 0;
8415}
8416
8417static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
8418 loff_t file_offset)
8419{
8420 struct btrfs_dio_private *dip = NULL;
8421 struct bio *bio = NULL;
8422 struct btrfs_io_bio *io_bio;
8423 bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
8424 int ret = 0;
8425
8426 bio = btrfs_bio_clone(dio_bio);
8427
8428 dip = kzalloc(sizeof(*dip), GFP_NOFS);
8429 if (!dip) {
8430 ret = -ENOMEM;
8431 goto free_ordered;
8432 }
8433
8434 dip->private = dio_bio->bi_private;
8435 dip->inode = inode;
8436 dip->logical_offset = file_offset;
8437 dip->bytes = dio_bio->bi_iter.bi_size;
8438 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
8439 bio->bi_private = dip;
8440 dip->orig_bio = bio;
8441 dip->dio_bio = dio_bio;
8442 atomic_set(&dip->pending_bios, 0);
8443 io_bio = btrfs_io_bio(bio);
8444 io_bio->logical = file_offset;
8445
8446 if (write) {
8447 bio->bi_end_io = btrfs_endio_direct_write;
8448 } else {
8449 bio->bi_end_io = btrfs_endio_direct_read;
8450 dip->subio_endio = btrfs_subio_endio_read;
8451 }
8452
8453
8454
8455
8456
8457
8458
8459 if (write) {
8460 struct btrfs_dio_data *dio_data = current->journal_info;
8461
8462 dio_data->unsubmitted_oe_range_end = dip->logical_offset +
8463 dip->bytes;
8464 dio_data->unsubmitted_oe_range_start =
8465 dio_data->unsubmitted_oe_range_end;
8466 }
8467
8468 ret = btrfs_submit_direct_hook(dip);
8469 if (!ret)
8470 return;
8471
8472 btrfs_io_bio_free_csum(io_bio);
8473
8474free_ordered:
8475
8476
8477
8478
8479
8480
8481
8482
8483
8484 if (bio && dip) {
8485 bio_io_error(bio);
8486
8487
8488
8489
8490
8491 dip = NULL;
8492 bio = NULL;
8493 } else {
8494 if (write)
8495 __endio_write_update_ordered(inode,
8496 file_offset,
8497 dio_bio->bi_iter.bi_size,
8498 false);
8499 else
8500 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8501 file_offset + dio_bio->bi_iter.bi_size - 1);
8502
8503 dio_bio->bi_status = BLK_STS_IOERR;
8504
8505
8506
8507
8508 dio_end_io(dio_bio);
8509 }
8510 if (bio)
8511 bio_put(bio);
8512 kfree(dip);
8513}
8514
8515static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
8516 const struct iov_iter *iter, loff_t offset)
8517{
8518 int seg;
8519 int i;
8520 unsigned int blocksize_mask = fs_info->sectorsize - 1;
8521 ssize_t retval = -EINVAL;
8522
8523 if (offset & blocksize_mask)
8524 goto out;
8525
8526 if (iov_iter_alignment(iter) & blocksize_mask)
8527 goto out;
8528
8529
8530 if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
8531 return 0;
8532
8533
8534
8535
8536
8537 for (seg = 0; seg < iter->nr_segs; seg++) {
8538 for (i = seg + 1; i < iter->nr_segs; i++) {
8539 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
8540 goto out;
8541 }
8542 }
8543 retval = 0;
8544out:
8545 return retval;
8546}
8547
8548static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8549{
8550 struct file *file = iocb->ki_filp;
8551 struct inode *inode = file->f_mapping->host;
8552 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8553 struct btrfs_dio_data dio_data = { 0 };
8554 struct extent_changeset *data_reserved = NULL;
8555 loff_t offset = iocb->ki_pos;
8556 size_t count = 0;
8557 int flags = 0;
8558 bool wakeup = true;
8559 bool relock = false;
8560 ssize_t ret;
8561
8562 if (check_direct_IO(fs_info, iter, offset))
8563 return 0;
8564
8565 inode_dio_begin(inode);
8566
8567
8568
8569
8570
8571
8572
8573 count = iov_iter_count(iter);
8574 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8575 &BTRFS_I(inode)->runtime_flags))
8576 filemap_fdatawrite_range(inode->i_mapping, offset,
8577 offset + count - 1);
8578
8579 if (iov_iter_rw(iter) == WRITE) {
8580
8581
8582
8583
8584
8585 if (offset + count <= inode->i_size) {
8586 dio_data.overwrite = 1;
8587 inode_unlock(inode);
8588 relock = true;
8589 } else if (iocb->ki_flags & IOCB_NOWAIT) {
8590 ret = -EAGAIN;
8591 goto out;
8592 }
8593 ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
8594 offset, count);
8595 if (ret)
8596 goto out;
8597
8598
8599
8600
8601
8602
8603 dio_data.reserve = round_up(count,
8604 fs_info->sectorsize);
8605 dio_data.unsubmitted_oe_range_start = (u64)offset;
8606 dio_data.unsubmitted_oe_range_end = (u64)offset;
8607 current->journal_info = &dio_data;
8608 down_read(&BTRFS_I(inode)->dio_sem);
8609 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8610 &BTRFS_I(inode)->runtime_flags)) {
8611 inode_dio_end(inode);
8612 flags = DIO_LOCKING | DIO_SKIP_HOLES;
8613 wakeup = false;
8614 }
8615
8616 ret = __blockdev_direct_IO(iocb, inode,
8617 fs_info->fs_devices->latest_bdev,
8618 iter, btrfs_get_blocks_direct, NULL,
8619 btrfs_submit_direct, flags);
8620 if (iov_iter_rw(iter) == WRITE) {
8621 up_read(&BTRFS_I(inode)->dio_sem);
8622 current->journal_info = NULL;
8623 if (ret < 0 && ret != -EIOCBQUEUED) {
8624 if (dio_data.reserve)
8625 btrfs_delalloc_release_space(inode, data_reserved,
8626 offset, dio_data.reserve, true);
8627
8628
8629
8630
8631
8632
8633 if (dio_data.unsubmitted_oe_range_start <
8634 dio_data.unsubmitted_oe_range_end)
8635 __endio_write_update_ordered(inode,
8636 dio_data.unsubmitted_oe_range_start,
8637 dio_data.unsubmitted_oe_range_end -
8638 dio_data.unsubmitted_oe_range_start,
8639 false);
8640 } else if (ret >= 0 && (size_t)ret < count)
8641 btrfs_delalloc_release_space(inode, data_reserved,
8642 offset, count - (size_t)ret, true);
8643 btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
8644 }
8645out:
8646 if (wakeup)
8647 inode_dio_end(inode);
8648 if (relock)
8649 inode_lock(inode);
8650
8651 extent_changeset_free(data_reserved);
8652 return ret;
8653}
8654
8655#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
8656
8657static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8658 __u64 start, __u64 len)
8659{
8660 int ret;
8661
8662 ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
8663 if (ret)
8664 return ret;
8665
8666 return extent_fiemap(inode, fieinfo, start, len);
8667}
8668
8669int btrfs_readpage(struct file *file, struct page *page)
8670{
8671 struct extent_io_tree *tree;
8672 tree = &BTRFS_I(page->mapping->host)->io_tree;
8673 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
8674}
8675
8676static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8677{
8678 struct inode *inode = page->mapping->host;
8679 int ret;
8680
8681 if (current->flags & PF_MEMALLOC) {
8682 redirty_page_for_writepage(wbc, page);
8683 unlock_page(page);
8684 return 0;
8685 }
8686
8687
8688
8689
8690
8691
8692 if (!igrab(inode)) {
8693 redirty_page_for_writepage(wbc, page);
8694 return AOP_WRITEPAGE_ACTIVATE;
8695 }
8696 ret = extent_write_full_page(page, wbc);
8697 btrfs_add_delayed_iput(inode);
8698 return ret;
8699}
8700
8701static int btrfs_writepages(struct address_space *mapping,
8702 struct writeback_control *wbc)
8703{
8704 return extent_writepages(mapping, wbc);
8705}
8706
8707static int
8708btrfs_readpages(struct file *file, struct address_space *mapping,
8709 struct list_head *pages, unsigned nr_pages)
8710{
8711 return extent_readpages(mapping, pages, nr_pages);
8712}
8713
8714static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8715{
8716 int ret = try_release_extent_mapping(page, gfp_flags);
8717 if (ret == 1) {
8718 ClearPagePrivate(page);
8719 set_page_private(page, 0);
8720 put_page(page);
8721 }
8722 return ret;
8723}
8724
8725static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8726{
8727 if (PageWriteback(page) || PageDirty(page))
8728 return 0;
8729 return __btrfs_releasepage(page, gfp_flags);
8730}
8731
8732static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8733 unsigned int length)
8734{
8735 struct inode *inode = page->mapping->host;
8736 struct extent_io_tree *tree;
8737 struct btrfs_ordered_extent *ordered;
8738 struct extent_state *cached_state = NULL;
8739 u64 page_start = page_offset(page);
8740 u64 page_end = page_start + PAGE_SIZE - 1;
8741 u64 start;
8742 u64 end;
8743 int inode_evicting = inode->i_state & I_FREEING;
8744
8745
8746
8747
8748
8749
8750
8751
8752 wait_on_page_writeback(page);
8753
8754 tree = &BTRFS_I(inode)->io_tree;
8755 if (offset) {
8756 btrfs_releasepage(page, GFP_NOFS);
8757 return;
8758 }
8759
8760 if (!inode_evicting)
8761 lock_extent_bits(tree, page_start, page_end, &cached_state);
8762again:
8763 start = page_start;
8764 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
8765 page_end - start + 1);
8766 if (ordered) {
8767 end = min(page_end, ordered->file_offset + ordered->len - 1);
8768
8769
8770
8771
8772 if (!inode_evicting)
8773 clear_extent_bit(tree, start, end,
8774 EXTENT_DIRTY | EXTENT_DELALLOC |
8775 EXTENT_DELALLOC_NEW |
8776 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8777 EXTENT_DEFRAG, 1, 0, &cached_state);
8778
8779
8780
8781
8782 if (TestClearPagePrivate2(page)) {
8783 struct btrfs_ordered_inode_tree *tree;
8784 u64 new_len;
8785
8786 tree = &BTRFS_I(inode)->ordered_tree;
8787
8788 spin_lock_irq(&tree->lock);
8789 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8790 new_len = start - ordered->file_offset;
8791 if (new_len < ordered->truncated_len)
8792 ordered->truncated_len = new_len;
8793 spin_unlock_irq(&tree->lock);
8794
8795 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8796 start,
8797 end - start + 1, 1))
8798 btrfs_finish_ordered_io(ordered);
8799 }
8800 btrfs_put_ordered_extent(ordered);
8801 if (!inode_evicting) {
8802 cached_state = NULL;
8803 lock_extent_bits(tree, start, end,
8804 &cached_state);
8805 }
8806
8807 start = end + 1;
8808 if (start < page_end)
8809 goto again;
8810 }
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827 if (PageDirty(page))
8828 btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
8829 if (!inode_evicting) {
8830 clear_extent_bit(tree, page_start, page_end,
8831 EXTENT_LOCKED | EXTENT_DIRTY |
8832 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
8833 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
8834 &cached_state);
8835
8836 __btrfs_releasepage(page, GFP_NOFS);
8837 }
8838
8839 ClearPageChecked(page);
8840 if (PagePrivate(page)) {
8841 ClearPagePrivate(page);
8842 set_page_private(page, 0);
8843 put_page(page);
8844 }
8845}
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8863{
8864 struct page *page = vmf->page;
8865 struct inode *inode = file_inode(vmf->vma->vm_file);
8866 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8867 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8868 struct btrfs_ordered_extent *ordered;
8869 struct extent_state *cached_state = NULL;
8870 struct extent_changeset *data_reserved = NULL;
8871 char *kaddr;
8872 unsigned long zero_start;
8873 loff_t size;
8874 vm_fault_t ret;
8875 int ret2;
8876 int reserved = 0;
8877 u64 reserved_space;
8878 u64 page_start;
8879 u64 page_end;
8880 u64 end;
8881
8882 reserved_space = PAGE_SIZE;
8883
8884 sb_start_pagefault(inode->i_sb);
8885 page_start = page_offset(page);
8886 page_end = page_start + PAGE_SIZE - 1;
8887 end = page_end;
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897 ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
8898 reserved_space);
8899 if (!ret2) {
8900 ret2 = file_update_time(vmf->vma->vm_file);
8901 reserved = 1;
8902 }
8903 if (ret2) {
8904 ret = vmf_error(ret2);
8905 if (reserved)
8906 goto out;
8907 goto out_noreserve;
8908 }
8909
8910 ret = VM_FAULT_NOPAGE;
8911again:
8912 lock_page(page);
8913 size = i_size_read(inode);
8914
8915 if ((page->mapping != inode->i_mapping) ||
8916 (page_start >= size)) {
8917
8918 goto out_unlock;
8919 }
8920 wait_on_page_writeback(page);
8921
8922 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8923 set_page_extent_mapped(page);
8924
8925
8926
8927
8928
8929 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8930 PAGE_SIZE);
8931 if (ordered) {
8932 unlock_extent_cached(io_tree, page_start, page_end,
8933 &cached_state);
8934 unlock_page(page);
8935 btrfs_start_ordered_extent(inode, ordered, 1);
8936 btrfs_put_ordered_extent(ordered);
8937 goto again;
8938 }
8939
8940 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8941 reserved_space = round_up(size - page_start,
8942 fs_info->sectorsize);
8943 if (reserved_space < PAGE_SIZE) {
8944 end = page_start + reserved_space - 1;
8945 btrfs_delalloc_release_space(inode, data_reserved,
8946 page_start, PAGE_SIZE - reserved_space,
8947 true);
8948 }
8949 }
8950
8951
8952
8953
8954
8955
8956
8957
8958 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8959 EXTENT_DIRTY | EXTENT_DELALLOC |
8960 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
8961 0, 0, &cached_state);
8962
8963 ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
8964 &cached_state, 0);
8965 if (ret2) {
8966 unlock_extent_cached(io_tree, page_start, page_end,
8967 &cached_state);
8968 ret = VM_FAULT_SIGBUS;
8969 goto out_unlock;
8970 }
8971 ret2 = 0;
8972
8973
8974 if (page_start + PAGE_SIZE > size)
8975 zero_start = offset_in_page(size);
8976 else
8977 zero_start = PAGE_SIZE;
8978
8979 if (zero_start != PAGE_SIZE) {
8980 kaddr = kmap(page);
8981 memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
8982 flush_dcache_page(page);
8983 kunmap(page);
8984 }
8985 ClearPageChecked(page);
8986 set_page_dirty(page);
8987 SetPageUptodate(page);
8988
8989 BTRFS_I(inode)->last_trans = fs_info->generation;
8990 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
8991 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
8992
8993 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
8994
8995 if (!ret2) {
8996 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
8997 sb_end_pagefault(inode->i_sb);
8998 extent_changeset_free(data_reserved);
8999 return VM_FAULT_LOCKED;
9000 }
9001
9002out_unlock:
9003 unlock_page(page);
9004out:
9005 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
9006 btrfs_delalloc_release_space(inode, data_reserved, page_start,
9007 reserved_space, (ret != 0));
9008out_noreserve:
9009 sb_end_pagefault(inode->i_sb);
9010 extent_changeset_free(data_reserved);
9011 return ret;
9012}
9013
9014static int btrfs_truncate(struct inode *inode, bool skip_writeback)
9015{
9016 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9017 struct btrfs_root *root = BTRFS_I(inode)->root;
9018 struct btrfs_block_rsv *rsv;
9019 int ret;
9020 struct btrfs_trans_handle *trans;
9021 u64 mask = fs_info->sectorsize - 1;
9022 u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
9023
9024 if (!skip_writeback) {
9025 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
9026 (u64)-1);
9027 if (ret)
9028 return ret;
9029 }
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057
9058
9059 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
9060 if (!rsv)
9061 return -ENOMEM;
9062 rsv->size = min_size;
9063 rsv->failfast = 1;
9064
9065
9066
9067
9068
9069 trans = btrfs_start_transaction(root, 2);
9070 if (IS_ERR(trans)) {
9071 ret = PTR_ERR(trans);
9072 goto out;
9073 }
9074
9075
9076 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
9077 min_size, false);
9078 BUG_ON(ret);
9079
9080
9081
9082
9083
9084
9085
9086
9087 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
9088 trans->block_rsv = rsv;
9089
9090 while (1) {
9091 ret = btrfs_truncate_inode_items(trans, root, inode,
9092 inode->i_size,
9093 BTRFS_EXTENT_DATA_KEY);
9094 trans->block_rsv = &fs_info->trans_block_rsv;
9095 if (ret != -ENOSPC && ret != -EAGAIN)
9096 break;
9097
9098 ret = btrfs_update_inode(trans, root, inode);
9099 if (ret)
9100 break;
9101
9102 btrfs_end_transaction(trans);
9103 btrfs_btree_balance_dirty(fs_info);
9104
9105 trans = btrfs_start_transaction(root, 2);
9106 if (IS_ERR(trans)) {
9107 ret = PTR_ERR(trans);
9108 trans = NULL;
9109 break;
9110 }
9111
9112 btrfs_block_rsv_release(fs_info, rsv, -1);
9113 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
9114 rsv, min_size, false);
9115 BUG_ON(ret);
9116 trans->block_rsv = rsv;
9117 }
9118
9119
9120
9121
9122
9123
9124
9125 if (ret == NEED_TRUNCATE_BLOCK) {
9126 btrfs_end_transaction(trans);
9127 btrfs_btree_balance_dirty(fs_info);
9128
9129 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
9130 if (ret)
9131 goto out;
9132 trans = btrfs_start_transaction(root, 1);
9133 if (IS_ERR(trans)) {
9134 ret = PTR_ERR(trans);
9135 goto out;
9136 }
9137 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
9138 }
9139
9140 if (trans) {
9141 int ret2;
9142
9143 trans->block_rsv = &fs_info->trans_block_rsv;
9144 ret2 = btrfs_update_inode(trans, root, inode);
9145 if (ret2 && !ret)
9146 ret = ret2;
9147
9148 ret2 = btrfs_end_transaction(trans);
9149 if (ret2 && !ret)
9150 ret = ret2;
9151 btrfs_btree_balance_dirty(fs_info);
9152 }
9153out:
9154 btrfs_free_block_rsv(fs_info, rsv);
9155
9156 return ret;
9157}
9158
9159
9160
9161
9162int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
9163 struct btrfs_root *new_root,
9164 struct btrfs_root *parent_root,
9165 u64 new_dirid)
9166{
9167 struct inode *inode;
9168 int err;
9169 u64 index = 0;
9170
9171 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
9172 new_dirid, new_dirid,
9173 S_IFDIR | (~current_umask() & S_IRWXUGO),
9174 &index);
9175 if (IS_ERR(inode))
9176 return PTR_ERR(inode);
9177 inode->i_op = &btrfs_dir_inode_operations;
9178 inode->i_fop = &btrfs_dir_file_operations;
9179
9180 set_nlink(inode, 1);
9181 btrfs_i_size_write(BTRFS_I(inode), 0);
9182 unlock_new_inode(inode);
9183
9184 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
9185 if (err)
9186 btrfs_err(new_root->fs_info,
9187 "error inheriting subvolume %llu properties: %d",
9188 new_root->root_key.objectid, err);
9189
9190 err = btrfs_update_inode(trans, new_root, inode);
9191
9192 iput(inode);
9193 return err;
9194}
9195
9196struct inode *btrfs_alloc_inode(struct super_block *sb)
9197{
9198 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
9199 struct btrfs_inode *ei;
9200 struct inode *inode;
9201
9202 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
9203 if (!ei)
9204 return NULL;
9205
9206 ei->root = NULL;
9207 ei->generation = 0;
9208 ei->last_trans = 0;
9209 ei->last_sub_trans = 0;
9210 ei->logged_trans = 0;
9211 ei->delalloc_bytes = 0;
9212 ei->new_delalloc_bytes = 0;
9213 ei->defrag_bytes = 0;
9214 ei->disk_i_size = 0;
9215 ei->flags = 0;
9216 ei->csum_bytes = 0;
9217 ei->index_cnt = (u64)-1;
9218 ei->dir_index = 0;
9219 ei->last_unlink_trans = 0;
9220 ei->last_log_commit = 0;
9221
9222 spin_lock_init(&ei->lock);
9223 ei->outstanding_extents = 0;
9224 if (sb->s_magic != BTRFS_TEST_MAGIC)
9225 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
9226 BTRFS_BLOCK_RSV_DELALLOC);
9227 ei->runtime_flags = 0;
9228 ei->prop_compress = BTRFS_COMPRESS_NONE;
9229 ei->defrag_compress = BTRFS_COMPRESS_NONE;
9230
9231 ei->delayed_node = NULL;
9232
9233 ei->i_otime.tv_sec = 0;
9234 ei->i_otime.tv_nsec = 0;
9235
9236 inode = &ei->vfs_inode;
9237 extent_map_tree_init(&ei->extent_tree);
9238 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
9239 extent_io_tree_init(fs_info, &ei->io_failure_tree,
9240 IO_TREE_INODE_IO_FAILURE, inode);
9241 ei->io_tree.track_uptodate = true;
9242 ei->io_failure_tree.track_uptodate = true;
9243 atomic_set(&ei->sync_writers, 0);
9244 mutex_init(&ei->log_mutex);
9245 mutex_init(&ei->delalloc_mutex);
9246 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
9247 INIT_LIST_HEAD(&ei->delalloc_inodes);
9248 INIT_LIST_HEAD(&ei->delayed_iput);
9249 RB_CLEAR_NODE(&ei->rb_node);
9250 init_rwsem(&ei->dio_sem);
9251
9252 return inode;
9253}
9254
9255#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
9256void btrfs_test_destroy_inode(struct inode *inode)
9257{
9258 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9259 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9260}
9261#endif
9262
9263void btrfs_free_inode(struct inode *inode)
9264{
9265 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9266}
9267
9268void btrfs_destroy_inode(struct inode *inode)
9269{
9270 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9271 struct btrfs_ordered_extent *ordered;
9272 struct btrfs_root *root = BTRFS_I(inode)->root;
9273
9274 WARN_ON(!hlist_empty(&inode->i_dentry));
9275 WARN_ON(inode->i_data.nrpages);
9276 WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
9277 WARN_ON(BTRFS_I(inode)->block_rsv.size);
9278 WARN_ON(BTRFS_I(inode)->outstanding_extents);
9279 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
9280 WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
9281 WARN_ON(BTRFS_I(inode)->csum_bytes);
9282 WARN_ON(BTRFS_I(inode)->defrag_bytes);
9283
9284
9285
9286
9287
9288
9289 if (!root)
9290 return;
9291
9292 while (1) {
9293 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
9294 if (!ordered)
9295 break;
9296 else {
9297 btrfs_err(fs_info,
9298 "found ordered extent %llu %llu on inode cleanup",
9299 ordered->file_offset, ordered->len);
9300 btrfs_remove_ordered_extent(inode, ordered);
9301 btrfs_put_ordered_extent(ordered);
9302 btrfs_put_ordered_extent(ordered);
9303 }
9304 }
9305 btrfs_qgroup_check_reserved_leak(inode);
9306 inode_tree_del(inode);
9307 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9308}
9309
9310int btrfs_drop_inode(struct inode *inode)
9311{
9312 struct btrfs_root *root = BTRFS_I(inode)->root;
9313
9314 if (root == NULL)
9315 return 1;
9316
9317
9318 if (btrfs_root_refs(&root->root_item) == 0)
9319 return 1;
9320 else
9321 return generic_drop_inode(inode);
9322}
9323
9324static void init_once(void *foo)
9325{
9326 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
9327
9328 inode_init_once(&ei->vfs_inode);
9329}
9330
9331void __cold btrfs_destroy_cachep(void)
9332{
9333
9334
9335
9336
9337 rcu_barrier();
9338 kmem_cache_destroy(btrfs_inode_cachep);
9339 kmem_cache_destroy(btrfs_trans_handle_cachep);
9340 kmem_cache_destroy(btrfs_path_cachep);
9341 kmem_cache_destroy(btrfs_free_space_cachep);
9342}
9343
9344int __init btrfs_init_cachep(void)
9345{
9346 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9347 sizeof(struct btrfs_inode), 0,
9348 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
9349 init_once);
9350 if (!btrfs_inode_cachep)
9351 goto fail;
9352
9353 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9354 sizeof(struct btrfs_trans_handle), 0,
9355 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9356 if (!btrfs_trans_handle_cachep)
9357 goto fail;
9358
9359 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9360 sizeof(struct btrfs_path), 0,
9361 SLAB_MEM_SPREAD, NULL);
9362 if (!btrfs_path_cachep)
9363 goto fail;
9364
9365 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9366 sizeof(struct btrfs_free_space), 0,
9367 SLAB_MEM_SPREAD, NULL);
9368 if (!btrfs_free_space_cachep)
9369 goto fail;
9370
9371 return 0;
9372fail:
9373 btrfs_destroy_cachep();
9374 return -ENOMEM;
9375}
9376
9377static int btrfs_getattr(const struct path *path, struct kstat *stat,
9378 u32 request_mask, unsigned int flags)
9379{
9380 u64 delalloc_bytes;
9381 struct inode *inode = d_inode(path->dentry);
9382 u32 blocksize = inode->i_sb->s_blocksize;
9383 u32 bi_flags = BTRFS_I(inode)->flags;
9384
9385 stat->result_mask |= STATX_BTIME;
9386 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
9387 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
9388 if (bi_flags & BTRFS_INODE_APPEND)
9389 stat->attributes |= STATX_ATTR_APPEND;
9390 if (bi_flags & BTRFS_INODE_COMPRESS)
9391 stat->attributes |= STATX_ATTR_COMPRESSED;
9392 if (bi_flags & BTRFS_INODE_IMMUTABLE)
9393 stat->attributes |= STATX_ATTR_IMMUTABLE;
9394 if (bi_flags & BTRFS_INODE_NODUMP)
9395 stat->attributes |= STATX_ATTR_NODUMP;
9396
9397 stat->attributes_mask |= (STATX_ATTR_APPEND |
9398 STATX_ATTR_COMPRESSED |
9399 STATX_ATTR_IMMUTABLE |
9400 STATX_ATTR_NODUMP);
9401
9402 generic_fillattr(inode, stat);
9403 stat->dev = BTRFS_I(inode)->root->anon_dev;
9404
9405 spin_lock(&BTRFS_I(inode)->lock);
9406 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9407 spin_unlock(&BTRFS_I(inode)->lock);
9408 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
9409 ALIGN(delalloc_bytes, blocksize)) >> 9;
9410 return 0;
9411}
9412
9413static int btrfs_rename_exchange(struct inode *old_dir,
9414 struct dentry *old_dentry,
9415 struct inode *new_dir,
9416 struct dentry *new_dentry)
9417{
9418 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9419 struct btrfs_trans_handle *trans;
9420 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9421 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9422 struct inode *new_inode = new_dentry->d_inode;
9423 struct inode *old_inode = old_dentry->d_inode;
9424 struct timespec64 ctime = current_time(old_inode);
9425 struct dentry *parent;
9426 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9427 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
9428 u64 old_idx = 0;
9429 u64 new_idx = 0;
9430 u64 root_objectid;
9431 int ret;
9432 bool root_log_pinned = false;
9433 bool dest_log_pinned = false;
9434 struct btrfs_log_ctx ctx_root;
9435 struct btrfs_log_ctx ctx_dest;
9436 bool sync_log_root = false;
9437 bool sync_log_dest = false;
9438 bool commit_transaction = false;
9439
9440
9441 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9442 return -EXDEV;
9443
9444 btrfs_init_log_ctx(&ctx_root, old_inode);
9445 btrfs_init_log_ctx(&ctx_dest, new_inode);
9446
9447
9448 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9449 down_read(&fs_info->subvol_sem);
9450 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9451 down_read(&fs_info->subvol_sem);
9452
9453
9454
9455
9456
9457
9458
9459
9460
9461 trans = btrfs_start_transaction(root, 12);
9462 if (IS_ERR(trans)) {
9463 ret = PTR_ERR(trans);
9464 goto out_notrans;
9465 }
9466
9467
9468
9469
9470
9471 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
9472 if (ret)
9473 goto out_fail;
9474 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
9475 if (ret)
9476 goto out_fail;
9477
9478 BTRFS_I(old_inode)->dir_index = 0ULL;
9479 BTRFS_I(new_inode)->dir_index = 0ULL;
9480
9481
9482 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9483
9484 btrfs_set_log_full_commit(trans);
9485 } else {
9486 btrfs_pin_log_trans(root);
9487 root_log_pinned = true;
9488 ret = btrfs_insert_inode_ref(trans, dest,
9489 new_dentry->d_name.name,
9490 new_dentry->d_name.len,
9491 old_ino,
9492 btrfs_ino(BTRFS_I(new_dir)),
9493 old_idx);
9494 if (ret)
9495 goto out_fail;
9496 }
9497
9498
9499 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9500
9501 btrfs_set_log_full_commit(trans);
9502 } else {
9503 btrfs_pin_log_trans(dest);
9504 dest_log_pinned = true;
9505 ret = btrfs_insert_inode_ref(trans, root,
9506 old_dentry->d_name.name,
9507 old_dentry->d_name.len,
9508 new_ino,
9509 btrfs_ino(BTRFS_I(old_dir)),
9510 new_idx);
9511 if (ret)
9512 goto out_fail;
9513 }
9514
9515
9516 inode_inc_iversion(old_dir);
9517 inode_inc_iversion(new_dir);
9518 inode_inc_iversion(old_inode);
9519 inode_inc_iversion(new_inode);
9520 old_dir->i_ctime = old_dir->i_mtime = ctime;
9521 new_dir->i_ctime = new_dir->i_mtime = ctime;
9522 old_inode->i_ctime = ctime;
9523 new_inode->i_ctime = ctime;
9524
9525 if (old_dentry->d_parent != new_dentry->d_parent) {
9526 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9527 BTRFS_I(old_inode), 1);
9528 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
9529 BTRFS_I(new_inode), 1);
9530 }
9531
9532
9533 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9534 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9535 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9536 old_dentry->d_name.name,
9537 old_dentry->d_name.len);
9538 } else {
9539 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9540 BTRFS_I(old_dentry->d_inode),
9541 old_dentry->d_name.name,
9542 old_dentry->d_name.len);
9543 if (!ret)
9544 ret = btrfs_update_inode(trans, root, old_inode);
9545 }
9546 if (ret) {
9547 btrfs_abort_transaction(trans, ret);
9548 goto out_fail;
9549 }
9550
9551
9552 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9553 root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
9554 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9555 new_dentry->d_name.name,
9556 new_dentry->d_name.len);
9557 } else {
9558 ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9559 BTRFS_I(new_dentry->d_inode),
9560 new_dentry->d_name.name,
9561 new_dentry->d_name.len);
9562 if (!ret)
9563 ret = btrfs_update_inode(trans, dest, new_inode);
9564 }
9565 if (ret) {
9566 btrfs_abort_transaction(trans, ret);
9567 goto out_fail;
9568 }
9569
9570 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9571 new_dentry->d_name.name,
9572 new_dentry->d_name.len, 0, old_idx);
9573 if (ret) {
9574 btrfs_abort_transaction(trans, ret);
9575 goto out_fail;
9576 }
9577
9578 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
9579 old_dentry->d_name.name,
9580 old_dentry->d_name.len, 0, new_idx);
9581 if (ret) {
9582 btrfs_abort_transaction(trans, ret);
9583 goto out_fail;
9584 }
9585
9586 if (old_inode->i_nlink == 1)
9587 BTRFS_I(old_inode)->dir_index = old_idx;
9588 if (new_inode->i_nlink == 1)
9589 BTRFS_I(new_inode)->dir_index = new_idx;
9590
9591 if (root_log_pinned) {
9592 parent = new_dentry->d_parent;
9593 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9594 BTRFS_I(old_dir), parent,
9595 false, &ctx_root);
9596 if (ret == BTRFS_NEED_LOG_SYNC)
9597 sync_log_root = true;
9598 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9599 commit_transaction = true;
9600 ret = 0;
9601 btrfs_end_log_trans(root);
9602 root_log_pinned = false;
9603 }
9604 if (dest_log_pinned) {
9605 if (!commit_transaction) {
9606 parent = old_dentry->d_parent;
9607 ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
9608 BTRFS_I(new_dir), parent,
9609 false, &ctx_dest);
9610 if (ret == BTRFS_NEED_LOG_SYNC)
9611 sync_log_dest = true;
9612 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9613 commit_transaction = true;
9614 ret = 0;
9615 }
9616 btrfs_end_log_trans(dest);
9617 dest_log_pinned = false;
9618 }
9619out_fail:
9620
9621
9622
9623
9624
9625
9626
9627
9628
9629
9630
9631 if (ret && (root_log_pinned || dest_log_pinned)) {
9632 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9633 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9634 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9635 (new_inode &&
9636 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9637 btrfs_set_log_full_commit(trans);
9638
9639 if (root_log_pinned) {
9640 btrfs_end_log_trans(root);
9641 root_log_pinned = false;
9642 }
9643 if (dest_log_pinned) {
9644 btrfs_end_log_trans(dest);
9645 dest_log_pinned = false;
9646 }
9647 }
9648 if (!ret && sync_log_root && !commit_transaction) {
9649 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
9650 &ctx_root);
9651 if (ret)
9652 commit_transaction = true;
9653 }
9654 if (!ret && sync_log_dest && !commit_transaction) {
9655 ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
9656 &ctx_dest);
9657 if (ret)
9658 commit_transaction = true;
9659 }
9660 if (commit_transaction) {
9661 ret = btrfs_commit_transaction(trans);
9662 } else {
9663 int ret2;
9664
9665 ret2 = btrfs_end_transaction(trans);
9666 ret = ret ? ret : ret2;
9667 }
9668out_notrans:
9669 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9670 up_read(&fs_info->subvol_sem);
9671 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9672 up_read(&fs_info->subvol_sem);
9673
9674 return ret;
9675}
9676
9677static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9678 struct btrfs_root *root,
9679 struct inode *dir,
9680 struct dentry *dentry)
9681{
9682 int ret;
9683 struct inode *inode;
9684 u64 objectid;
9685 u64 index;
9686
9687 ret = btrfs_find_free_ino(root, &objectid);
9688 if (ret)
9689 return ret;
9690
9691 inode = btrfs_new_inode(trans, root, dir,
9692 dentry->d_name.name,
9693 dentry->d_name.len,
9694 btrfs_ino(BTRFS_I(dir)),
9695 objectid,
9696 S_IFCHR | WHITEOUT_MODE,
9697 &index);
9698
9699 if (IS_ERR(inode)) {
9700 ret = PTR_ERR(inode);
9701 return ret;
9702 }
9703
9704 inode->i_op = &btrfs_special_inode_operations;
9705 init_special_inode(inode, inode->i_mode,
9706 WHITEOUT_DEV);
9707
9708 ret = btrfs_init_inode_security(trans, inode, dir,
9709 &dentry->d_name);
9710 if (ret)
9711 goto out;
9712
9713 ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9714 BTRFS_I(inode), 0, index);
9715 if (ret)
9716 goto out;
9717
9718 ret = btrfs_update_inode(trans, root, inode);
9719out:
9720 unlock_new_inode(inode);
9721 if (ret)
9722 inode_dec_link_count(inode);
9723 iput(inode);
9724
9725 return ret;
9726}
9727
9728static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9729 struct inode *new_dir, struct dentry *new_dentry,
9730 unsigned int flags)
9731{
9732 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9733 struct btrfs_trans_handle *trans;
9734 unsigned int trans_num_items;
9735 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9736 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9737 struct inode *new_inode = d_inode(new_dentry);
9738 struct inode *old_inode = d_inode(old_dentry);
9739 u64 index = 0;
9740 u64 root_objectid;
9741 int ret;
9742 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9743 bool log_pinned = false;
9744 struct btrfs_log_ctx ctx;
9745 bool sync_log = false;
9746 bool commit_transaction = false;
9747
9748 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9749 return -EPERM;
9750
9751
9752 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9753 return -EXDEV;
9754
9755 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9756 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9757 return -ENOTEMPTY;
9758
9759 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9760 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9761 return -ENOTEMPTY;
9762
9763
9764
9765 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9766 new_dentry->d_name.name,
9767 new_dentry->d_name.len);
9768
9769 if (ret) {
9770 if (ret == -EEXIST) {
9771
9772
9773 if (WARN_ON(!new_inode)) {
9774 return ret;
9775 }
9776 } else {
9777
9778 return ret;
9779 }
9780 }
9781 ret = 0;
9782
9783
9784
9785
9786
9787 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9788 filemap_flush(old_inode->i_mapping);
9789
9790
9791 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9792 down_read(&fs_info->subvol_sem);
9793
9794
9795
9796
9797
9798
9799
9800
9801
9802
9803
9804 trans_num_items = 11;
9805 if (flags & RENAME_WHITEOUT)
9806 trans_num_items += 5;
9807 trans = btrfs_start_transaction(root, trans_num_items);
9808 if (IS_ERR(trans)) {
9809 ret = PTR_ERR(trans);
9810 goto out_notrans;
9811 }
9812
9813 if (dest != root)
9814 btrfs_record_root_in_trans(trans, dest);
9815
9816 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9817 if (ret)
9818 goto out_fail;
9819
9820 BTRFS_I(old_inode)->dir_index = 0ULL;
9821 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9822
9823 btrfs_set_log_full_commit(trans);
9824 } else {
9825 btrfs_pin_log_trans(root);
9826 log_pinned = true;
9827 ret = btrfs_insert_inode_ref(trans, dest,
9828 new_dentry->d_name.name,
9829 new_dentry->d_name.len,
9830 old_ino,
9831 btrfs_ino(BTRFS_I(new_dir)), index);
9832 if (ret)
9833 goto out_fail;
9834 }
9835
9836 inode_inc_iversion(old_dir);
9837 inode_inc_iversion(new_dir);
9838 inode_inc_iversion(old_inode);
9839 old_dir->i_ctime = old_dir->i_mtime =
9840 new_dir->i_ctime = new_dir->i_mtime =
9841 old_inode->i_ctime = current_time(old_dir);
9842
9843 if (old_dentry->d_parent != new_dentry->d_parent)
9844 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9845 BTRFS_I(old_inode), 1);
9846
9847 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9848 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9849 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9850 old_dentry->d_name.name,
9851 old_dentry->d_name.len);
9852 } else {
9853 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9854 BTRFS_I(d_inode(old_dentry)),
9855 old_dentry->d_name.name,
9856 old_dentry->d_name.len);
9857 if (!ret)
9858 ret = btrfs_update_inode(trans, root, old_inode);
9859 }
9860 if (ret) {
9861 btrfs_abort_transaction(trans, ret);
9862 goto out_fail;
9863 }
9864
9865 if (new_inode) {
9866 inode_inc_iversion(new_inode);
9867 new_inode->i_ctime = current_time(new_inode);
9868 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9869 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9870 root_objectid = BTRFS_I(new_inode)->location.objectid;
9871 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9872 new_dentry->d_name.name,
9873 new_dentry->d_name.len);
9874 BUG_ON(new_inode->i_nlink == 0);
9875 } else {
9876 ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9877 BTRFS_I(d_inode(new_dentry)),
9878 new_dentry->d_name.name,
9879 new_dentry->d_name.len);
9880 }
9881 if (!ret && new_inode->i_nlink == 0)
9882 ret = btrfs_orphan_add(trans,
9883 BTRFS_I(d_inode(new_dentry)));
9884 if (ret) {
9885 btrfs_abort_transaction(trans, ret);
9886 goto out_fail;
9887 }
9888 }
9889
9890 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9891 new_dentry->d_name.name,
9892 new_dentry->d_name.len, 0, index);
9893 if (ret) {
9894 btrfs_abort_transaction(trans, ret);
9895 goto out_fail;
9896 }
9897
9898 if (old_inode->i_nlink == 1)
9899 BTRFS_I(old_inode)->dir_index = index;
9900
9901 if (log_pinned) {
9902 struct dentry *parent = new_dentry->d_parent;
9903
9904 btrfs_init_log_ctx(&ctx, old_inode);
9905 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9906 BTRFS_I(old_dir), parent,
9907 false, &ctx);
9908 if (ret == BTRFS_NEED_LOG_SYNC)
9909 sync_log = true;
9910 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9911 commit_transaction = true;
9912 ret = 0;
9913 btrfs_end_log_trans(root);
9914 log_pinned = false;
9915 }
9916
9917 if (flags & RENAME_WHITEOUT) {
9918 ret = btrfs_whiteout_for_rename(trans, root, old_dir,
9919 old_dentry);
9920
9921 if (ret) {
9922 btrfs_abort_transaction(trans, ret);
9923 goto out_fail;
9924 }
9925 }
9926out_fail:
9927
9928
9929
9930
9931
9932
9933
9934
9935
9936
9937
9938 if (ret && log_pinned) {
9939 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9940 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9941 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9942 (new_inode &&
9943 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9944 btrfs_set_log_full_commit(trans);
9945
9946 btrfs_end_log_trans(root);
9947 log_pinned = false;
9948 }
9949 if (!ret && sync_log) {
9950 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
9951 if (ret)
9952 commit_transaction = true;
9953 }
9954 if (commit_transaction) {
9955 ret = btrfs_commit_transaction(trans);
9956 } else {
9957 int ret2;
9958
9959 ret2 = btrfs_end_transaction(trans);
9960 ret = ret ? ret : ret2;
9961 }
9962out_notrans:
9963 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9964 up_read(&fs_info->subvol_sem);
9965
9966 return ret;
9967}
9968
9969static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
9970 struct inode *new_dir, struct dentry *new_dentry,
9971 unsigned int flags)
9972{
9973 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
9974 return -EINVAL;
9975
9976 if (flags & RENAME_EXCHANGE)
9977 return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
9978 new_dentry);
9979
9980 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
9981}
9982
9983struct btrfs_delalloc_work {
9984 struct inode *inode;
9985 struct completion completion;
9986 struct list_head list;
9987 struct btrfs_work work;
9988};
9989
9990static void btrfs_run_delalloc_work(struct btrfs_work *work)
9991{
9992 struct btrfs_delalloc_work *delalloc_work;
9993 struct inode *inode;
9994
9995 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9996 work);
9997 inode = delalloc_work->inode;
9998 filemap_flush(inode->i_mapping);
9999 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
10000 &BTRFS_I(inode)->runtime_flags))
10001 filemap_flush(inode->i_mapping);
10002
10003 iput(inode);
10004 complete(&delalloc_work->completion);
10005}
10006
10007static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
10008{
10009 struct btrfs_delalloc_work *work;
10010
10011 work = kmalloc(sizeof(*work), GFP_NOFS);
10012 if (!work)
10013 return NULL;
10014
10015 init_completion(&work->completion);
10016 INIT_LIST_HEAD(&work->list);
10017 work->inode = inode;
10018 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
10019 btrfs_run_delalloc_work, NULL, NULL);
10020
10021 return work;
10022}
10023
10024
10025
10026
10027
10028static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot)
10029{
10030 struct btrfs_inode *binode;
10031 struct inode *inode;
10032 struct btrfs_delalloc_work *work, *next;
10033 struct list_head works;
10034 struct list_head splice;
10035 int ret = 0;
10036
10037 INIT_LIST_HEAD(&works);
10038 INIT_LIST_HEAD(&splice);
10039
10040 mutex_lock(&root->delalloc_mutex);
10041 spin_lock(&root->delalloc_lock);
10042 list_splice_init(&root->delalloc_inodes, &splice);
10043 while (!list_empty(&splice)) {
10044 binode = list_entry(splice.next, struct btrfs_inode,
10045 delalloc_inodes);
10046
10047 list_move_tail(&binode->delalloc_inodes,
10048 &root->delalloc_inodes);
10049 inode = igrab(&binode->vfs_inode);
10050 if (!inode) {
10051 cond_resched_lock(&root->delalloc_lock);
10052 continue;
10053 }
10054 spin_unlock(&root->delalloc_lock);
10055
10056 if (snapshot)
10057 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
10058 &binode->runtime_flags);
10059 work = btrfs_alloc_delalloc_work(inode);
10060 if (!work) {
10061 iput(inode);
10062 ret = -ENOMEM;
10063 goto out;
10064 }
10065 list_add_tail(&work->list, &works);
10066 btrfs_queue_work(root->fs_info->flush_workers,
10067 &work->work);
10068 ret++;
10069 if (nr != -1 && ret >= nr)
10070 goto out;
10071 cond_resched();
10072 spin_lock(&root->delalloc_lock);
10073 }
10074 spin_unlock(&root->delalloc_lock);
10075
10076out:
10077 list_for_each_entry_safe(work, next, &works, list) {
10078 list_del_init(&work->list);
10079 wait_for_completion(&work->completion);
10080 kfree(work);
10081 }
10082
10083 if (!list_empty(&splice)) {
10084 spin_lock(&root->delalloc_lock);
10085 list_splice_tail(&splice, &root->delalloc_inodes);
10086 spin_unlock(&root->delalloc_lock);
10087 }
10088 mutex_unlock(&root->delalloc_mutex);
10089 return ret;
10090}
10091
10092int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
10093{
10094 struct btrfs_fs_info *fs_info = root->fs_info;
10095 int ret;
10096
10097 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10098 return -EROFS;
10099
10100 ret = start_delalloc_inodes(root, -1, true);
10101 if (ret > 0)
10102 ret = 0;
10103 return ret;
10104}
10105
10106int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
10107{
10108 struct btrfs_root *root;
10109 struct list_head splice;
10110 int ret;
10111
10112 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10113 return -EROFS;
10114
10115 INIT_LIST_HEAD(&splice);
10116
10117 mutex_lock(&fs_info->delalloc_root_mutex);
10118 spin_lock(&fs_info->delalloc_root_lock);
10119 list_splice_init(&fs_info->delalloc_roots, &splice);
10120 while (!list_empty(&splice) && nr) {
10121 root = list_first_entry(&splice, struct btrfs_root,
10122 delalloc_root);
10123 root = btrfs_grab_fs_root(root);
10124 BUG_ON(!root);
10125 list_move_tail(&root->delalloc_root,
10126 &fs_info->delalloc_roots);
10127 spin_unlock(&fs_info->delalloc_root_lock);
10128
10129 ret = start_delalloc_inodes(root, nr, false);
10130 btrfs_put_fs_root(root);
10131 if (ret < 0)
10132 goto out;
10133
10134 if (nr != -1) {
10135 nr -= ret;
10136 WARN_ON(nr < 0);
10137 }
10138 spin_lock(&fs_info->delalloc_root_lock);
10139 }
10140 spin_unlock(&fs_info->delalloc_root_lock);
10141
10142 ret = 0;
10143out:
10144 if (!list_empty(&splice)) {
10145 spin_lock(&fs_info->delalloc_root_lock);
10146 list_splice_tail(&splice, &fs_info->delalloc_roots);
10147 spin_unlock(&fs_info->delalloc_root_lock);
10148 }
10149 mutex_unlock(&fs_info->delalloc_root_mutex);
10150 return ret;
10151}
10152
10153static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
10154 const char *symname)
10155{
10156 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10157 struct btrfs_trans_handle *trans;
10158 struct btrfs_root *root = BTRFS_I(dir)->root;
10159 struct btrfs_path *path;
10160 struct btrfs_key key;
10161 struct inode *inode = NULL;
10162 int err;
10163 u64 objectid;
10164 u64 index = 0;
10165 int name_len;
10166 int datasize;
10167 unsigned long ptr;
10168 struct btrfs_file_extent_item *ei;
10169 struct extent_buffer *leaf;
10170
10171 name_len = strlen(symname);
10172 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
10173 return -ENAMETOOLONG;
10174
10175
10176
10177
10178
10179
10180
10181
10182 trans = btrfs_start_transaction(root, 7);
10183 if (IS_ERR(trans))
10184 return PTR_ERR(trans);
10185
10186 err = btrfs_find_free_ino(root, &objectid);
10187 if (err)
10188 goto out_unlock;
10189
10190 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
10191 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
10192 objectid, S_IFLNK|S_IRWXUGO, &index);
10193 if (IS_ERR(inode)) {
10194 err = PTR_ERR(inode);
10195 inode = NULL;
10196 goto out_unlock;
10197 }
10198
10199
10200
10201
10202
10203
10204
10205 inode->i_fop = &btrfs_file_operations;
10206 inode->i_op = &btrfs_file_inode_operations;
10207 inode->i_mapping->a_ops = &btrfs_aops;
10208 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10209
10210 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
10211 if (err)
10212 goto out_unlock;
10213
10214 path = btrfs_alloc_path();
10215 if (!path) {
10216 err = -ENOMEM;
10217 goto out_unlock;
10218 }
10219 key.objectid = btrfs_ino(BTRFS_I(inode));
10220 key.offset = 0;
10221 key.type = BTRFS_EXTENT_DATA_KEY;
10222 datasize = btrfs_file_extent_calc_inline_size(name_len);
10223 err = btrfs_insert_empty_item(trans, root, path, &key,
10224 datasize);
10225 if (err) {
10226 btrfs_free_path(path);
10227 goto out_unlock;
10228 }
10229 leaf = path->nodes[0];
10230 ei = btrfs_item_ptr(leaf, path->slots[0],
10231 struct btrfs_file_extent_item);
10232 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
10233 btrfs_set_file_extent_type(leaf, ei,
10234 BTRFS_FILE_EXTENT_INLINE);
10235 btrfs_set_file_extent_encryption(leaf, ei, 0);
10236 btrfs_set_file_extent_compression(leaf, ei, 0);
10237 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
10238 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
10239
10240 ptr = btrfs_file_extent_inline_start(ei);
10241 write_extent_buffer(leaf, symname, ptr, name_len);
10242 btrfs_mark_buffer_dirty(leaf);
10243 btrfs_free_path(path);
10244
10245 inode->i_op = &btrfs_symlink_inode_operations;
10246 inode_nohighmem(inode);
10247 inode_set_bytes(inode, name_len);
10248 btrfs_i_size_write(BTRFS_I(inode), name_len);
10249 err = btrfs_update_inode(trans, root, inode);
10250
10251
10252
10253
10254
10255 if (!err)
10256 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
10257 BTRFS_I(inode), 0, index);
10258 if (err)
10259 goto out_unlock;
10260
10261 d_instantiate_new(dentry, inode);
10262
10263out_unlock:
10264 btrfs_end_transaction(trans);
10265 if (err && inode) {
10266 inode_dec_link_count(inode);
10267 discard_new_inode(inode);
10268 }
10269 btrfs_btree_balance_dirty(fs_info);
10270 return err;
10271}
10272
10273static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10274 u64 start, u64 num_bytes, u64 min_size,
10275 loff_t actual_len, u64 *alloc_hint,
10276 struct btrfs_trans_handle *trans)
10277{
10278 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
10279 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
10280 struct extent_map *em;
10281 struct btrfs_root *root = BTRFS_I(inode)->root;
10282 struct btrfs_key ins;
10283 u64 cur_offset = start;
10284 u64 i_size;
10285 u64 cur_bytes;
10286 u64 last_alloc = (u64)-1;
10287 int ret = 0;
10288 bool own_trans = true;
10289 u64 end = start + num_bytes - 1;
10290
10291 if (trans)
10292 own_trans = false;
10293 while (num_bytes > 0) {
10294 if (own_trans) {
10295 trans = btrfs_start_transaction(root, 3);
10296 if (IS_ERR(trans)) {
10297 ret = PTR_ERR(trans);
10298 break;
10299 }
10300 }
10301
10302 cur_bytes = min_t(u64, num_bytes, SZ_256M);
10303 cur_bytes = max(cur_bytes, min_size);
10304
10305
10306
10307
10308
10309
10310 cur_bytes = min(cur_bytes, last_alloc);
10311 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10312 min_size, 0, *alloc_hint, &ins, 1, 0);
10313 if (ret) {
10314 if (own_trans)
10315 btrfs_end_transaction(trans);
10316 break;
10317 }
10318 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10319
10320 last_alloc = ins.offset;
10321 ret = insert_reserved_file_extent(trans, inode,
10322 cur_offset, ins.objectid,
10323 ins.offset, ins.offset,
10324 ins.offset, 0, 0, 0,
10325 BTRFS_FILE_EXTENT_PREALLOC);
10326 if (ret) {
10327 btrfs_free_reserved_extent(fs_info, ins.objectid,
10328 ins.offset, 0);
10329 btrfs_abort_transaction(trans, ret);
10330 if (own_trans)
10331 btrfs_end_transaction(trans);
10332 break;
10333 }
10334
10335 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10336 cur_offset + ins.offset -1, 0);
10337
10338 em = alloc_extent_map();
10339 if (!em) {
10340 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
10341 &BTRFS_I(inode)->runtime_flags);
10342 goto next;
10343 }
10344
10345 em->start = cur_offset;
10346 em->orig_start = cur_offset;
10347 em->len = ins.offset;
10348 em->block_start = ins.objectid;
10349 em->block_len = ins.offset;
10350 em->orig_block_len = ins.offset;
10351 em->ram_bytes = ins.offset;
10352 em->bdev = fs_info->fs_devices->latest_bdev;
10353 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
10354 em->generation = trans->transid;
10355
10356 while (1) {
10357 write_lock(&em_tree->lock);
10358 ret = add_extent_mapping(em_tree, em, 1);
10359 write_unlock(&em_tree->lock);
10360 if (ret != -EEXIST)
10361 break;
10362 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10363 cur_offset + ins.offset - 1,
10364 0);
10365 }
10366 free_extent_map(em);
10367next:
10368 num_bytes -= ins.offset;
10369 cur_offset += ins.offset;
10370 *alloc_hint = ins.objectid + ins.offset;
10371
10372 inode_inc_iversion(inode);
10373 inode->i_ctime = current_time(inode);
10374 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10375 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10376 (actual_len > inode->i_size) &&
10377 (cur_offset > inode->i_size)) {
10378 if (cur_offset > actual_len)
10379 i_size = actual_len;
10380 else
10381 i_size = cur_offset;
10382 i_size_write(inode, i_size);
10383 btrfs_ordered_update_i_size(inode, i_size, NULL);
10384 }
10385
10386 ret = btrfs_update_inode(trans, root, inode);
10387
10388 if (ret) {
10389 btrfs_abort_transaction(trans, ret);
10390 if (own_trans)
10391 btrfs_end_transaction(trans);
10392 break;
10393 }
10394
10395 if (own_trans)
10396 btrfs_end_transaction(trans);
10397 }
10398 if (cur_offset < end)
10399 btrfs_free_reserved_data_space(inode, NULL, cur_offset,
10400 end - cur_offset + 1);
10401 return ret;
10402}
10403
10404int btrfs_prealloc_file_range(struct inode *inode, int mode,
10405 u64 start, u64 num_bytes, u64 min_size,
10406 loff_t actual_len, u64 *alloc_hint)
10407{
10408 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10409 min_size, actual_len, alloc_hint,
10410 NULL);
10411}
10412
10413int btrfs_prealloc_file_range_trans(struct inode *inode,
10414 struct btrfs_trans_handle *trans, int mode,
10415 u64 start, u64 num_bytes, u64 min_size,
10416 loff_t actual_len, u64 *alloc_hint)
10417{
10418 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10419 min_size, actual_len, alloc_hint, trans);
10420}
10421
10422static int btrfs_set_page_dirty(struct page *page)
10423{
10424 return __set_page_dirty_nobuffers(page);
10425}
10426
10427static int btrfs_permission(struct inode *inode, int mask)
10428{
10429 struct btrfs_root *root = BTRFS_I(inode)->root;
10430 umode_t mode = inode->i_mode;
10431
10432 if (mask & MAY_WRITE &&
10433 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10434 if (btrfs_root_readonly(root))
10435 return -EROFS;
10436 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10437 return -EACCES;
10438 }
10439 return generic_permission(inode, mask);
10440}
10441
10442static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
10443{
10444 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10445 struct btrfs_trans_handle *trans;
10446 struct btrfs_root *root = BTRFS_I(dir)->root;
10447 struct inode *inode = NULL;
10448 u64 objectid;
10449 u64 index;
10450 int ret = 0;
10451
10452
10453
10454
10455 trans = btrfs_start_transaction(root, 5);
10456 if (IS_ERR(trans))
10457 return PTR_ERR(trans);
10458
10459 ret = btrfs_find_free_ino(root, &objectid);
10460 if (ret)
10461 goto out;
10462
10463 inode = btrfs_new_inode(trans, root, dir, NULL, 0,
10464 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
10465 if (IS_ERR(inode)) {
10466 ret = PTR_ERR(inode);
10467 inode = NULL;
10468 goto out;
10469 }
10470
10471 inode->i_fop = &btrfs_file_operations;
10472 inode->i_op = &btrfs_file_inode_operations;
10473
10474 inode->i_mapping->a_ops = &btrfs_aops;
10475 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10476
10477 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
10478 if (ret)
10479 goto out;
10480
10481 ret = btrfs_update_inode(trans, root, inode);
10482 if (ret)
10483 goto out;
10484 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10485 if (ret)
10486 goto out;
10487
10488
10489
10490
10491
10492
10493
10494
10495 set_nlink(inode, 1);
10496 d_tmpfile(dentry, inode);
10497 unlock_new_inode(inode);
10498 mark_inode_dirty(inode);
10499out:
10500 btrfs_end_transaction(trans);
10501 if (ret && inode)
10502 discard_new_inode(inode);
10503 btrfs_btree_balance_dirty(fs_info);
10504 return ret;
10505}
10506
10507void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
10508{
10509 struct inode *inode = tree->private_data;
10510 unsigned long index = start >> PAGE_SHIFT;
10511 unsigned long end_index = end >> PAGE_SHIFT;
10512 struct page *page;
10513
10514 while (index <= end_index) {
10515 page = find_get_page(inode->i_mapping, index);
10516 ASSERT(page);
10517 set_page_writeback(page);
10518 put_page(page);
10519 index++;
10520 }
10521}
10522
10523#ifdef CONFIG_SWAP
10524
10525
10526
10527
10528
10529static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
10530 bool is_block_group)
10531{
10532 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10533 struct btrfs_swapfile_pin *sp, *entry;
10534 struct rb_node **p;
10535 struct rb_node *parent = NULL;
10536
10537 sp = kmalloc(sizeof(*sp), GFP_NOFS);
10538 if (!sp)
10539 return -ENOMEM;
10540 sp->ptr = ptr;
10541 sp->inode = inode;
10542 sp->is_block_group = is_block_group;
10543
10544 spin_lock(&fs_info->swapfile_pins_lock);
10545 p = &fs_info->swapfile_pins.rb_node;
10546 while (*p) {
10547 parent = *p;
10548 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
10549 if (sp->ptr < entry->ptr ||
10550 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
10551 p = &(*p)->rb_left;
10552 } else if (sp->ptr > entry->ptr ||
10553 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
10554 p = &(*p)->rb_right;
10555 } else {
10556 spin_unlock(&fs_info->swapfile_pins_lock);
10557 kfree(sp);
10558 return 1;
10559 }
10560 }
10561 rb_link_node(&sp->node, parent, p);
10562 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
10563 spin_unlock(&fs_info->swapfile_pins_lock);
10564 return 0;
10565}
10566
10567
10568static void btrfs_free_swapfile_pins(struct inode *inode)
10569{
10570 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10571 struct btrfs_swapfile_pin *sp;
10572 struct rb_node *node, *next;
10573
10574 spin_lock(&fs_info->swapfile_pins_lock);
10575 node = rb_first(&fs_info->swapfile_pins);
10576 while (node) {
10577 next = rb_next(node);
10578 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
10579 if (sp->inode == inode) {
10580 rb_erase(&sp->node, &fs_info->swapfile_pins);
10581 if (sp->is_block_group)
10582 btrfs_put_block_group(sp->ptr);
10583 kfree(sp);
10584 }
10585 node = next;
10586 }
10587 spin_unlock(&fs_info->swapfile_pins_lock);
10588}
10589
10590struct btrfs_swap_info {
10591 u64 start;
10592 u64 block_start;
10593 u64 block_len;
10594 u64 lowest_ppage;
10595 u64 highest_ppage;
10596 unsigned long nr_pages;
10597 int nr_extents;
10598};
10599
10600static int btrfs_add_swap_extent(struct swap_info_struct *sis,
10601 struct btrfs_swap_info *bsi)
10602{
10603 unsigned long nr_pages;
10604 u64 first_ppage, first_ppage_reported, next_ppage;
10605 int ret;
10606
10607 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
10608 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
10609 PAGE_SIZE) >> PAGE_SHIFT;
10610
10611 if (first_ppage >= next_ppage)
10612 return 0;
10613 nr_pages = next_ppage - first_ppage;
10614
10615 first_ppage_reported = first_ppage;
10616 if (bsi->start == 0)
10617 first_ppage_reported++;
10618 if (bsi->lowest_ppage > first_ppage_reported)
10619 bsi->lowest_ppage = first_ppage_reported;
10620 if (bsi->highest_ppage < (next_ppage - 1))
10621 bsi->highest_ppage = next_ppage - 1;
10622
10623 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
10624 if (ret < 0)
10625 return ret;
10626 bsi->nr_extents += ret;
10627 bsi->nr_pages += nr_pages;
10628 return 0;
10629}
10630
10631static void btrfs_swap_deactivate(struct file *file)
10632{
10633 struct inode *inode = file_inode(file);
10634
10635 btrfs_free_swapfile_pins(inode);
10636 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
10637}
10638
10639static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10640 sector_t *span)
10641{
10642 struct inode *inode = file_inode(file);
10643 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10644 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
10645 struct extent_state *cached_state = NULL;
10646 struct extent_map *em = NULL;
10647 struct btrfs_device *device = NULL;
10648 struct btrfs_swap_info bsi = {
10649 .lowest_ppage = (sector_t)-1ULL,
10650 };
10651 int ret = 0;
10652 u64 isize;
10653 u64 start;
10654
10655
10656
10657
10658
10659
10660 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
10661 if (ret)
10662 return ret;
10663
10664
10665
10666
10667 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
10668 btrfs_warn(fs_info, "swapfile must not be compressed");
10669 return -EINVAL;
10670 }
10671 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
10672 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
10673 return -EINVAL;
10674 }
10675 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
10676 btrfs_warn(fs_info, "swapfile must not be checksummed");
10677 return -EINVAL;
10678 }
10679
10680
10681
10682
10683
10684
10685
10686
10687
10688
10689 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
10690 btrfs_warn(fs_info,
10691 "cannot activate swapfile while exclusive operation is running");
10692 return -EBUSY;
10693 }
10694
10695
10696
10697
10698
10699
10700 atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
10701
10702 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
10703
10704 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
10705 start = 0;
10706 while (start < isize) {
10707 u64 logical_block_start, physical_block_start;
10708 struct btrfs_block_group_cache *bg;
10709 u64 len = isize - start;
10710
10711 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
10712 if (IS_ERR(em)) {
10713 ret = PTR_ERR(em);
10714 goto out;
10715 }
10716
10717 if (em->block_start == EXTENT_MAP_HOLE) {
10718 btrfs_warn(fs_info, "swapfile must not have holes");
10719 ret = -EINVAL;
10720 goto out;
10721 }
10722 if (em->block_start == EXTENT_MAP_INLINE) {
10723
10724
10725
10726
10727
10728
10729
10730 btrfs_warn(fs_info, "swapfile must not be inline");
10731 ret = -EINVAL;
10732 goto out;
10733 }
10734 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10735 btrfs_warn(fs_info, "swapfile must not be compressed");
10736 ret = -EINVAL;
10737 goto out;
10738 }
10739
10740 logical_block_start = em->block_start + (start - em->start);
10741 len = min(len, em->len - (start - em->start));
10742 free_extent_map(em);
10743 em = NULL;
10744
10745 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
10746 if (ret < 0) {
10747 goto out;
10748 } else if (ret) {
10749 ret = 0;
10750 } else {
10751 btrfs_warn(fs_info,
10752 "swapfile must not be copy-on-write");
10753 ret = -EINVAL;
10754 goto out;
10755 }
10756
10757 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
10758 if (IS_ERR(em)) {
10759 ret = PTR_ERR(em);
10760 goto out;
10761 }
10762
10763 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
10764 btrfs_warn(fs_info,
10765 "swapfile must have single data profile");
10766 ret = -EINVAL;
10767 goto out;
10768 }
10769
10770 if (device == NULL) {
10771 device = em->map_lookup->stripes[0].dev;
10772 ret = btrfs_add_swapfile_pin(inode, device, false);
10773 if (ret == 1)
10774 ret = 0;
10775 else if (ret)
10776 goto out;
10777 } else if (device != em->map_lookup->stripes[0].dev) {
10778 btrfs_warn(fs_info, "swapfile must be on one device");
10779 ret = -EINVAL;
10780 goto out;
10781 }
10782
10783 physical_block_start = (em->map_lookup->stripes[0].physical +
10784 (logical_block_start - em->start));
10785 len = min(len, em->len - (logical_block_start - em->start));
10786 free_extent_map(em);
10787 em = NULL;
10788
10789 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
10790 if (!bg) {
10791 btrfs_warn(fs_info,
10792 "could not find block group containing swapfile");
10793 ret = -EINVAL;
10794 goto out;
10795 }
10796
10797 ret = btrfs_add_swapfile_pin(inode, bg, true);
10798 if (ret) {
10799 btrfs_put_block_group(bg);
10800 if (ret == 1)
10801 ret = 0;
10802 else
10803 goto out;
10804 }
10805
10806 if (bsi.block_len &&
10807 bsi.block_start + bsi.block_len == physical_block_start) {
10808 bsi.block_len += len;
10809 } else {
10810 if (bsi.block_len) {
10811 ret = btrfs_add_swap_extent(sis, &bsi);
10812 if (ret)
10813 goto out;
10814 }
10815 bsi.start = start;
10816 bsi.block_start = physical_block_start;
10817 bsi.block_len = len;
10818 }
10819
10820 start += len;
10821 }
10822
10823 if (bsi.block_len)
10824 ret = btrfs_add_swap_extent(sis, &bsi);
10825
10826out:
10827 if (!IS_ERR_OR_NULL(em))
10828 free_extent_map(em);
10829
10830 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
10831
10832 if (ret)
10833 btrfs_swap_deactivate(file);
10834
10835 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
10836
10837 if (ret)
10838 return ret;
10839
10840 if (device)
10841 sis->bdev = device->bdev;
10842 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
10843 sis->max = bsi.nr_pages;
10844 sis->pages = bsi.nr_pages - 1;
10845 sis->highest_bit = bsi.nr_pages - 1;
10846 return bsi.nr_extents;
10847}
10848#else
10849static void btrfs_swap_deactivate(struct file *file)
10850{
10851}
10852
10853static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10854 sector_t *span)
10855{
10856 return -EOPNOTSUPP;
10857}
10858#endif
10859
10860static const struct inode_operations btrfs_dir_inode_operations = {
10861 .getattr = btrfs_getattr,
10862 .lookup = btrfs_lookup,
10863 .create = btrfs_create,
10864 .unlink = btrfs_unlink,
10865 .link = btrfs_link,
10866 .mkdir = btrfs_mkdir,
10867 .rmdir = btrfs_rmdir,
10868 .rename = btrfs_rename2,
10869 .symlink = btrfs_symlink,
10870 .setattr = btrfs_setattr,
10871 .mknod = btrfs_mknod,
10872 .listxattr = btrfs_listxattr,
10873 .permission = btrfs_permission,
10874 .get_acl = btrfs_get_acl,
10875 .set_acl = btrfs_set_acl,
10876 .update_time = btrfs_update_time,
10877 .tmpfile = btrfs_tmpfile,
10878};
10879static const struct inode_operations btrfs_dir_ro_inode_operations = {
10880 .lookup = btrfs_lookup,
10881 .permission = btrfs_permission,
10882 .update_time = btrfs_update_time,
10883};
10884
10885static const struct file_operations btrfs_dir_file_operations = {
10886 .llseek = generic_file_llseek,
10887 .read = generic_read_dir,
10888 .iterate_shared = btrfs_real_readdir,
10889 .open = btrfs_opendir,
10890 .unlocked_ioctl = btrfs_ioctl,
10891#ifdef CONFIG_COMPAT
10892 .compat_ioctl = btrfs_compat_ioctl,
10893#endif
10894 .release = btrfs_release_file,
10895 .fsync = btrfs_sync_file,
10896};
10897
10898static const struct extent_io_ops btrfs_extent_io_ops = {
10899
10900 .submit_bio_hook = btrfs_submit_bio_hook,
10901 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10902};
10903
10904
10905
10906
10907
10908
10909
10910
10911
10912
10913
10914
10915
10916static const struct address_space_operations btrfs_aops = {
10917 .readpage = btrfs_readpage,
10918 .writepage = btrfs_writepage,
10919 .writepages = btrfs_writepages,
10920 .readpages = btrfs_readpages,
10921 .direct_IO = btrfs_direct_IO,
10922 .invalidatepage = btrfs_invalidatepage,
10923 .releasepage = btrfs_releasepage,
10924 .set_page_dirty = btrfs_set_page_dirty,
10925 .error_remove_page = generic_error_remove_page,
10926 .swap_activate = btrfs_swap_activate,
10927 .swap_deactivate = btrfs_swap_deactivate,
10928};
10929
10930static const struct inode_operations btrfs_file_inode_operations = {
10931 .getattr = btrfs_getattr,
10932 .setattr = btrfs_setattr,
10933 .listxattr = btrfs_listxattr,
10934 .permission = btrfs_permission,
10935 .fiemap = btrfs_fiemap,
10936 .get_acl = btrfs_get_acl,
10937 .set_acl = btrfs_set_acl,
10938 .update_time = btrfs_update_time,
10939};
10940static const struct inode_operations btrfs_special_inode_operations = {
10941 .getattr = btrfs_getattr,
10942 .setattr = btrfs_setattr,
10943 .permission = btrfs_permission,
10944 .listxattr = btrfs_listxattr,
10945 .get_acl = btrfs_get_acl,
10946 .set_acl = btrfs_set_acl,
10947 .update_time = btrfs_update_time,
10948};
10949static const struct inode_operations btrfs_symlink_inode_operations = {
10950 .get_link = page_get_link,
10951 .getattr = btrfs_getattr,
10952 .setattr = btrfs_setattr,
10953 .permission = btrfs_permission,
10954 .listxattr = btrfs_listxattr,
10955 .update_time = btrfs_update_time,
10956};
10957
10958const struct dentry_operations btrfs_dentry_operations = {
10959 .d_delete = btrfs_dentry_delete,
10960};
10961