1
2
3
4
5
6#include <linux/kernel.h>
7#include <linux/bio.h>
8#include <linux/buffer_head.h>
9#include <linux/file.h>
10#include <linux/fs.h>
11#include <linux/pagemap.h>
12#include <linux/highmem.h>
13#include <linux/time.h>
14#include <linux/init.h>
15#include <linux/string.h>
16#include <linux/backing-dev.h>
17#include <linux/writeback.h>
18#include <linux/compat.h>
19#include <linux/xattr.h>
20#include <linux/posix_acl.h>
21#include <linux/falloc.h>
22#include <linux/slab.h>
23#include <linux/ratelimit.h>
24#include <linux/btrfs.h>
25#include <linux/blkdev.h>
26#include <linux/posix_acl_xattr.h>
27#include <linux/uio.h>
28#include <linux/magic.h>
29#include <linux/iversion.h>
30#include <linux/swap.h>
31#include <linux/sched/mm.h>
32#include <asm/unaligned.h>
33#include "ctree.h"
34#include "disk-io.h"
35#include "transaction.h"
36#include "btrfs_inode.h"
37#include "print-tree.h"
38#include "ordered-data.h"
39#include "xattr.h"
40#include "tree-log.h"
41#include "volumes.h"
42#include "compression.h"
43#include "locking.h"
44#include "free-space-cache.h"
45#include "inode-map.h"
46#include "backref.h"
47#include "props.h"
48#include "qgroup.h"
49#include "dedupe.h"
50#include "delalloc-space.h"
51
52struct btrfs_iget_args {
53 struct btrfs_key *location;
54 struct btrfs_root *root;
55};
56
57struct btrfs_dio_data {
58 u64 reserve;
59 u64 unsubmitted_oe_range_start;
60 u64 unsubmitted_oe_range_end;
61 int overwrite;
62};
63
64static const struct inode_operations btrfs_dir_inode_operations;
65static const struct inode_operations btrfs_symlink_inode_operations;
66static const struct inode_operations btrfs_dir_ro_inode_operations;
67static const struct inode_operations btrfs_special_inode_operations;
68static const struct inode_operations btrfs_file_inode_operations;
69static const struct address_space_operations btrfs_aops;
70static const struct file_operations btrfs_dir_file_operations;
71static const struct extent_io_ops btrfs_extent_io_ops;
72
73static struct kmem_cache *btrfs_inode_cachep;
74struct kmem_cache *btrfs_trans_handle_cachep;
75struct kmem_cache *btrfs_path_cachep;
76struct kmem_cache *btrfs_free_space_cachep;
77
78static int btrfs_setsize(struct inode *inode, struct iattr *attr);
79static int btrfs_truncate(struct inode *inode, bool skip_writeback);
80static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
81static noinline int cow_file_range(struct inode *inode,
82 struct page *locked_page,
83 u64 start, u64 end, u64 delalloc_end,
84 int *page_started, unsigned long *nr_written,
85 int unlock, struct btrfs_dedupe_hash *hash);
86static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
87 u64 orig_start, u64 block_start,
88 u64 block_len, u64 orig_block_len,
89 u64 ram_bytes, int compress_type,
90 int type);
91
92static void __endio_write_update_ordered(struct inode *inode,
93 const u64 offset, const u64 bytes,
94 const bool uptodate);
95
96
97
98
99
100
101
102
103
104
105
106static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
107 struct page *locked_page,
108 u64 offset, u64 bytes)
109{
110 unsigned long index = offset >> PAGE_SHIFT;
111 unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
112 u64 page_start = page_offset(locked_page);
113 u64 page_end = page_start + PAGE_SIZE - 1;
114
115 struct page *page;
116
117 while (index <= end_index) {
118 page = find_get_page(inode->i_mapping, index);
119 index++;
120 if (!page)
121 continue;
122 ClearPagePrivate2(page);
123 put_page(page);
124 }
125
126
127
128
129
130
131 if (page_start >= offset && page_end <= (offset + bytes - 1)) {
132 offset += PAGE_SIZE;
133 bytes -= PAGE_SIZE;
134 }
135
136 return __endio_write_update_ordered(inode, offset, bytes, false);
137}
138
139static int btrfs_dirty_inode(struct inode *inode);
140
141#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
142void btrfs_test_inode_set_ops(struct inode *inode)
143{
144 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
145}
146#endif
147
148static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
149 struct inode *inode, struct inode *dir,
150 const struct qstr *qstr)
151{
152 int err;
153
154 err = btrfs_init_acl(trans, inode, dir);
155 if (!err)
156 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
157 return err;
158}
159
160
161
162
163
164
165static int insert_inline_extent(struct btrfs_trans_handle *trans,
166 struct btrfs_path *path, int extent_inserted,
167 struct btrfs_root *root, struct inode *inode,
168 u64 start, size_t size, size_t compressed_size,
169 int compress_type,
170 struct page **compressed_pages)
171{
172 struct extent_buffer *leaf;
173 struct page *page = NULL;
174 char *kaddr;
175 unsigned long ptr;
176 struct btrfs_file_extent_item *ei;
177 int ret;
178 size_t cur_size = size;
179 unsigned long offset;
180
181 if (compressed_size && compressed_pages)
182 cur_size = compressed_size;
183
184 inode_add_bytes(inode, size);
185
186 if (!extent_inserted) {
187 struct btrfs_key key;
188 size_t datasize;
189
190 key.objectid = btrfs_ino(BTRFS_I(inode));
191 key.offset = start;
192 key.type = BTRFS_EXTENT_DATA_KEY;
193
194 datasize = btrfs_file_extent_calc_inline_size(cur_size);
195 path->leave_spinning = 1;
196 ret = btrfs_insert_empty_item(trans, root, path, &key,
197 datasize);
198 if (ret)
199 goto fail;
200 }
201 leaf = path->nodes[0];
202 ei = btrfs_item_ptr(leaf, path->slots[0],
203 struct btrfs_file_extent_item);
204 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
205 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
206 btrfs_set_file_extent_encryption(leaf, ei, 0);
207 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
208 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
209 ptr = btrfs_file_extent_inline_start(ei);
210
211 if (compress_type != BTRFS_COMPRESS_NONE) {
212 struct page *cpage;
213 int i = 0;
214 while (compressed_size > 0) {
215 cpage = compressed_pages[i];
216 cur_size = min_t(unsigned long, compressed_size,
217 PAGE_SIZE);
218
219 kaddr = kmap_atomic(cpage);
220 write_extent_buffer(leaf, kaddr, ptr, cur_size);
221 kunmap_atomic(kaddr);
222
223 i++;
224 ptr += cur_size;
225 compressed_size -= cur_size;
226 }
227 btrfs_set_file_extent_compression(leaf, ei,
228 compress_type);
229 } else {
230 page = find_get_page(inode->i_mapping,
231 start >> PAGE_SHIFT);
232 btrfs_set_file_extent_compression(leaf, ei, 0);
233 kaddr = kmap_atomic(page);
234 offset = offset_in_page(start);
235 write_extent_buffer(leaf, kaddr + offset, ptr, size);
236 kunmap_atomic(kaddr);
237 put_page(page);
238 }
239 btrfs_mark_buffer_dirty(leaf);
240 btrfs_release_path(path);
241
242
243
244
245
246
247
248
249
250
251 BTRFS_I(inode)->disk_i_size = inode->i_size;
252 ret = btrfs_update_inode(trans, root, inode);
253
254fail:
255 return ret;
256}
257
258
259
260
261
262
263
264static noinline int cow_file_range_inline(struct inode *inode, u64 start,
265 u64 end, size_t compressed_size,
266 int compress_type,
267 struct page **compressed_pages)
268{
269 struct btrfs_root *root = BTRFS_I(inode)->root;
270 struct btrfs_fs_info *fs_info = root->fs_info;
271 struct btrfs_trans_handle *trans;
272 u64 isize = i_size_read(inode);
273 u64 actual_end = min(end + 1, isize);
274 u64 inline_len = actual_end - start;
275 u64 aligned_end = ALIGN(end, fs_info->sectorsize);
276 u64 data_len = inline_len;
277 int ret;
278 struct btrfs_path *path;
279 int extent_inserted = 0;
280 u32 extent_item_size;
281
282 if (compressed_size)
283 data_len = compressed_size;
284
285 if (start > 0 ||
286 actual_end > fs_info->sectorsize ||
287 data_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info) ||
288 (!compressed_size &&
289 (actual_end & (fs_info->sectorsize - 1)) == 0) ||
290 end + 1 < isize ||
291 data_len > fs_info->max_inline) {
292 return 1;
293 }
294
295 path = btrfs_alloc_path();
296 if (!path)
297 return -ENOMEM;
298
299 trans = btrfs_join_transaction(root);
300 if (IS_ERR(trans)) {
301 btrfs_free_path(path);
302 return PTR_ERR(trans);
303 }
304 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
305
306 if (compressed_size && compressed_pages)
307 extent_item_size = btrfs_file_extent_calc_inline_size(
308 compressed_size);
309 else
310 extent_item_size = btrfs_file_extent_calc_inline_size(
311 inline_len);
312
313 ret = __btrfs_drop_extents(trans, root, inode, path,
314 start, aligned_end, NULL,
315 1, 1, extent_item_size, &extent_inserted);
316 if (ret) {
317 btrfs_abort_transaction(trans, ret);
318 goto out;
319 }
320
321 if (isize > actual_end)
322 inline_len = min_t(u64, isize, actual_end);
323 ret = insert_inline_extent(trans, path, extent_inserted,
324 root, inode, start,
325 inline_len, compressed_size,
326 compress_type, compressed_pages);
327 if (ret && ret != -ENOSPC) {
328 btrfs_abort_transaction(trans, ret);
329 goto out;
330 } else if (ret == -ENOSPC) {
331 ret = 1;
332 goto out;
333 }
334
335 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
336 btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
337out:
338
339
340
341
342
343
344 btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
345 btrfs_free_path(path);
346 btrfs_end_transaction(trans);
347 return ret;
348}
349
350struct async_extent {
351 u64 start;
352 u64 ram_size;
353 u64 compressed_size;
354 struct page **pages;
355 unsigned long nr_pages;
356 int compress_type;
357 struct list_head list;
358};
359
360struct async_chunk {
361 struct inode *inode;
362 struct page *locked_page;
363 u64 start;
364 u64 end;
365 unsigned int write_flags;
366 struct list_head extents;
367 struct btrfs_work work;
368 atomic_t *pending;
369};
370
371struct async_cow {
372
373 atomic_t num_chunks;
374 struct async_chunk chunks[];
375};
376
377static noinline int add_async_extent(struct async_chunk *cow,
378 u64 start, u64 ram_size,
379 u64 compressed_size,
380 struct page **pages,
381 unsigned long nr_pages,
382 int compress_type)
383{
384 struct async_extent *async_extent;
385
386 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
387 BUG_ON(!async_extent);
388 async_extent->start = start;
389 async_extent->ram_size = ram_size;
390 async_extent->compressed_size = compressed_size;
391 async_extent->pages = pages;
392 async_extent->nr_pages = nr_pages;
393 async_extent->compress_type = compress_type;
394 list_add_tail(&async_extent->list, &cow->extents);
395 return 0;
396}
397
398
399
400
401static inline bool inode_can_compress(struct inode *inode)
402{
403 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
404 BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
405 return false;
406 return true;
407}
408
409
410
411
412
413static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
414{
415 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
416
417 if (!inode_can_compress(inode)) {
418 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
419 KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
420 btrfs_ino(BTRFS_I(inode)));
421 return 0;
422 }
423
424 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
425 return 1;
426
427 if (BTRFS_I(inode)->defrag_compress)
428 return 1;
429
430 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
431 return 0;
432 if (btrfs_test_opt(fs_info, COMPRESS) ||
433 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
434 BTRFS_I(inode)->prop_compress)
435 return btrfs_compress_heuristic(inode, start, end);
436 return 0;
437}
438
439static inline void inode_should_defrag(struct btrfs_inode *inode,
440 u64 start, u64 end, u64 num_bytes, u64 small_write)
441{
442
443 if (num_bytes < small_write &&
444 (start > 0 || end + 1 < inode->disk_i_size))
445 btrfs_add_inode_defrag(NULL, inode);
446}
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465static noinline void compress_file_range(struct async_chunk *async_chunk,
466 int *num_added)
467{
468 struct inode *inode = async_chunk->inode;
469 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
470 u64 blocksize = fs_info->sectorsize;
471 u64 start = async_chunk->start;
472 u64 end = async_chunk->end;
473 u64 actual_end;
474 int ret = 0;
475 struct page **pages = NULL;
476 unsigned long nr_pages;
477 unsigned long total_compressed = 0;
478 unsigned long total_in = 0;
479 int i;
480 int will_compress;
481 int compress_type = fs_info->compress_type;
482 int redirty = 0;
483
484 inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
485 SZ_16K);
486
487 actual_end = min_t(u64, i_size_read(inode), end + 1);
488again:
489 will_compress = 0;
490 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
491 BUILD_BUG_ON((BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0);
492 nr_pages = min_t(unsigned long, nr_pages,
493 BTRFS_MAX_COMPRESSED / PAGE_SIZE);
494
495
496
497
498
499
500
501
502
503
504
505 if (actual_end <= start)
506 goto cleanup_and_bail_uncompressed;
507
508 total_compressed = actual_end - start;
509
510
511
512
513
514 if (total_compressed <= blocksize &&
515 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
516 goto cleanup_and_bail_uncompressed;
517
518 total_compressed = min_t(unsigned long, total_compressed,
519 BTRFS_MAX_UNCOMPRESSED);
520 total_in = 0;
521 ret = 0;
522
523
524
525
526
527
528 if (inode_need_compress(inode, start, end)) {
529 WARN_ON(pages);
530 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
531 if (!pages) {
532
533 nr_pages = 0;
534 goto cont;
535 }
536
537 if (BTRFS_I(inode)->defrag_compress)
538 compress_type = BTRFS_I(inode)->defrag_compress;
539 else if (BTRFS_I(inode)->prop_compress)
540 compress_type = BTRFS_I(inode)->prop_compress;
541
542
543
544
545
546
547
548
549
550
551
552
553
554 if (!redirty) {
555 extent_range_clear_dirty_for_io(inode, start, end);
556 redirty = 1;
557 }
558
559
560 ret = btrfs_compress_pages(
561 compress_type | (fs_info->compress_level << 4),
562 inode->i_mapping, start,
563 pages,
564 &nr_pages,
565 &total_in,
566 &total_compressed);
567
568 if (!ret) {
569 unsigned long offset = offset_in_page(total_compressed);
570 struct page *page = pages[nr_pages - 1];
571 char *kaddr;
572
573
574
575
576 if (offset) {
577 kaddr = kmap_atomic(page);
578 memset(kaddr + offset, 0,
579 PAGE_SIZE - offset);
580 kunmap_atomic(kaddr);
581 }
582 will_compress = 1;
583 }
584 }
585cont:
586 if (start == 0) {
587
588 if (ret || total_in < actual_end) {
589
590
591
592 ret = cow_file_range_inline(inode, start, end, 0,
593 BTRFS_COMPRESS_NONE, NULL);
594 } else {
595
596 ret = cow_file_range_inline(inode, start, end,
597 total_compressed,
598 compress_type, pages);
599 }
600 if (ret <= 0) {
601 unsigned long clear_flags = EXTENT_DELALLOC |
602 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
603 EXTENT_DO_ACCOUNTING;
604 unsigned long page_error_op;
605
606 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
607
608
609
610
611
612
613
614
615
616
617
618 extent_clear_unlock_delalloc(inode, start, end, end,
619 NULL, clear_flags,
620 PAGE_UNLOCK |
621 PAGE_CLEAR_DIRTY |
622 PAGE_SET_WRITEBACK |
623 page_error_op |
624 PAGE_END_WRITEBACK);
625 goto free_pages_out;
626 }
627 }
628
629 if (will_compress) {
630
631
632
633
634
635 total_compressed = ALIGN(total_compressed, blocksize);
636
637
638
639
640
641
642 total_in = ALIGN(total_in, PAGE_SIZE);
643 if (total_compressed + blocksize <= total_in) {
644 *num_added += 1;
645
646
647
648
649
650
651 add_async_extent(async_chunk, start, total_in,
652 total_compressed, pages, nr_pages,
653 compress_type);
654
655 if (start + total_in < end) {
656 start += total_in;
657 pages = NULL;
658 cond_resched();
659 goto again;
660 }
661 return;
662 }
663 }
664 if (pages) {
665
666
667
668
669 for (i = 0; i < nr_pages; i++) {
670 WARN_ON(pages[i]->mapping);
671 put_page(pages[i]);
672 }
673 kfree(pages);
674 pages = NULL;
675 total_compressed = 0;
676 nr_pages = 0;
677
678
679 if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
680 !(BTRFS_I(inode)->prop_compress)) {
681 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
682 }
683 }
684cleanup_and_bail_uncompressed:
685
686
687
688
689
690
691 if (page_offset(async_chunk->locked_page) >= start &&
692 page_offset(async_chunk->locked_page) <= end)
693 __set_page_dirty_nobuffers(async_chunk->locked_page);
694
695
696 if (redirty)
697 extent_range_redirty_for_io(inode, start, end);
698 add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
699 BTRFS_COMPRESS_NONE);
700 *num_added += 1;
701
702 return;
703
704free_pages_out:
705 for (i = 0; i < nr_pages; i++) {
706 WARN_ON(pages[i]->mapping);
707 put_page(pages[i]);
708 }
709 kfree(pages);
710}
711
712static void free_async_extent_pages(struct async_extent *async_extent)
713{
714 int i;
715
716 if (!async_extent->pages)
717 return;
718
719 for (i = 0; i < async_extent->nr_pages; i++) {
720 WARN_ON(async_extent->pages[i]->mapping);
721 put_page(async_extent->pages[i]);
722 }
723 kfree(async_extent->pages);
724 async_extent->nr_pages = 0;
725 async_extent->pages = NULL;
726}
727
728
729
730
731
732
733
734static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
735{
736 struct inode *inode = async_chunk->inode;
737 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
738 struct async_extent *async_extent;
739 u64 alloc_hint = 0;
740 struct btrfs_key ins;
741 struct extent_map *em;
742 struct btrfs_root *root = BTRFS_I(inode)->root;
743 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
744 int ret = 0;
745
746again:
747 while (!list_empty(&async_chunk->extents)) {
748 async_extent = list_entry(async_chunk->extents.next,
749 struct async_extent, list);
750 list_del(&async_extent->list);
751
752retry:
753 lock_extent(io_tree, async_extent->start,
754 async_extent->start + async_extent->ram_size - 1);
755
756 if (!async_extent->pages) {
757 int page_started = 0;
758 unsigned long nr_written = 0;
759
760
761 ret = cow_file_range(inode, async_chunk->locked_page,
762 async_extent->start,
763 async_extent->start +
764 async_extent->ram_size - 1,
765 async_extent->start +
766 async_extent->ram_size - 1,
767 &page_started, &nr_written, 0,
768 NULL);
769
770
771
772
773
774
775
776
777
778 if (!page_started && !ret)
779 extent_write_locked_range(inode,
780 async_extent->start,
781 async_extent->start +
782 async_extent->ram_size - 1,
783 WB_SYNC_ALL);
784 else if (ret)
785 unlock_page(async_chunk->locked_page);
786 kfree(async_extent);
787 cond_resched();
788 continue;
789 }
790
791 ret = btrfs_reserve_extent(root, async_extent->ram_size,
792 async_extent->compressed_size,
793 async_extent->compressed_size,
794 0, alloc_hint, &ins, 1, 1);
795 if (ret) {
796 free_async_extent_pages(async_extent);
797
798 if (ret == -ENOSPC) {
799 unlock_extent(io_tree, async_extent->start,
800 async_extent->start +
801 async_extent->ram_size - 1);
802
803
804
805
806
807
808
809 extent_range_redirty_for_io(inode,
810 async_extent->start,
811 async_extent->start +
812 async_extent->ram_size - 1);
813
814 goto retry;
815 }
816 goto out_free;
817 }
818
819
820
821
822 em = create_io_em(inode, async_extent->start,
823 async_extent->ram_size,
824 async_extent->start,
825 ins.objectid,
826 ins.offset,
827 ins.offset,
828 async_extent->ram_size,
829 async_extent->compress_type,
830 BTRFS_ORDERED_COMPRESSED);
831 if (IS_ERR(em))
832
833 goto out_free_reserve;
834 free_extent_map(em);
835
836 ret = btrfs_add_ordered_extent_compress(inode,
837 async_extent->start,
838 ins.objectid,
839 async_extent->ram_size,
840 ins.offset,
841 BTRFS_ORDERED_COMPRESSED,
842 async_extent->compress_type);
843 if (ret) {
844 btrfs_drop_extent_cache(BTRFS_I(inode),
845 async_extent->start,
846 async_extent->start +
847 async_extent->ram_size - 1, 0);
848 goto out_free_reserve;
849 }
850 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
851
852
853
854
855 extent_clear_unlock_delalloc(inode, async_extent->start,
856 async_extent->start +
857 async_extent->ram_size - 1,
858 async_extent->start +
859 async_extent->ram_size - 1,
860 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
861 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
862 PAGE_SET_WRITEBACK);
863 if (btrfs_submit_compressed_write(inode,
864 async_extent->start,
865 async_extent->ram_size,
866 ins.objectid,
867 ins.offset, async_extent->pages,
868 async_extent->nr_pages,
869 async_chunk->write_flags)) {
870 struct page *p = async_extent->pages[0];
871 const u64 start = async_extent->start;
872 const u64 end = start + async_extent->ram_size - 1;
873
874 p->mapping = inode->i_mapping;
875 btrfs_writepage_endio_finish_ordered(p, start, end, 0);
876
877 p->mapping = NULL;
878 extent_clear_unlock_delalloc(inode, start, end, end,
879 NULL, 0,
880 PAGE_END_WRITEBACK |
881 PAGE_SET_ERROR);
882 free_async_extent_pages(async_extent);
883 }
884 alloc_hint = ins.objectid + ins.offset;
885 kfree(async_extent);
886 cond_resched();
887 }
888 return;
889out_free_reserve:
890 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
891 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
892out_free:
893 extent_clear_unlock_delalloc(inode, async_extent->start,
894 async_extent->start +
895 async_extent->ram_size - 1,
896 async_extent->start +
897 async_extent->ram_size - 1,
898 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
899 EXTENT_DELALLOC_NEW |
900 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
901 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
902 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
903 PAGE_SET_ERROR);
904 free_async_extent_pages(async_extent);
905 kfree(async_extent);
906 goto again;
907}
908
909static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
910 u64 num_bytes)
911{
912 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
913 struct extent_map *em;
914 u64 alloc_hint = 0;
915
916 read_lock(&em_tree->lock);
917 em = search_extent_mapping(em_tree, start, num_bytes);
918 if (em) {
919
920
921
922
923
924 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
925 free_extent_map(em);
926 em = search_extent_mapping(em_tree, 0, 0);
927 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
928 alloc_hint = em->block_start;
929 if (em)
930 free_extent_map(em);
931 } else {
932 alloc_hint = em->block_start;
933 free_extent_map(em);
934 }
935 }
936 read_unlock(&em_tree->lock);
937
938 return alloc_hint;
939}
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954static noinline int cow_file_range(struct inode *inode,
955 struct page *locked_page,
956 u64 start, u64 end, u64 delalloc_end,
957 int *page_started, unsigned long *nr_written,
958 int unlock, struct btrfs_dedupe_hash *hash)
959{
960 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
961 struct btrfs_root *root = BTRFS_I(inode)->root;
962 u64 alloc_hint = 0;
963 u64 num_bytes;
964 unsigned long ram_size;
965 u64 cur_alloc_size = 0;
966 u64 blocksize = fs_info->sectorsize;
967 struct btrfs_key ins;
968 struct extent_map *em;
969 unsigned clear_bits;
970 unsigned long page_ops;
971 bool extent_reserved = false;
972 int ret = 0;
973
974 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
975 WARN_ON_ONCE(1);
976 ret = -EINVAL;
977 goto out_unlock;
978 }
979
980 num_bytes = ALIGN(end - start + 1, blocksize);
981 num_bytes = max(blocksize, num_bytes);
982 ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
983
984 inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
985
986 if (start == 0) {
987
988 ret = cow_file_range_inline(inode, start, end, 0,
989 BTRFS_COMPRESS_NONE, NULL);
990 if (ret == 0) {
991
992
993
994
995
996
997 extent_clear_unlock_delalloc(inode, start, end,
998 delalloc_end, NULL,
999 EXTENT_LOCKED | EXTENT_DELALLOC |
1000 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1001 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1002 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1003 PAGE_END_WRITEBACK);
1004 *nr_written = *nr_written +
1005 (end - start + PAGE_SIZE) / PAGE_SIZE;
1006 *page_started = 1;
1007 goto out;
1008 } else if (ret < 0) {
1009 goto out_unlock;
1010 }
1011 }
1012
1013 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
1014 btrfs_drop_extent_cache(BTRFS_I(inode), start,
1015 start + num_bytes - 1, 0);
1016
1017 while (num_bytes > 0) {
1018 cur_alloc_size = num_bytes;
1019 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
1020 fs_info->sectorsize, 0, alloc_hint,
1021 &ins, 1, 1);
1022 if (ret < 0)
1023 goto out_unlock;
1024 cur_alloc_size = ins.offset;
1025 extent_reserved = true;
1026
1027 ram_size = ins.offset;
1028 em = create_io_em(inode, start, ins.offset,
1029 start,
1030 ins.objectid,
1031 ins.offset,
1032 ins.offset,
1033 ram_size,
1034 BTRFS_COMPRESS_NONE,
1035 BTRFS_ORDERED_REGULAR );
1036 if (IS_ERR(em)) {
1037 ret = PTR_ERR(em);
1038 goto out_reserve;
1039 }
1040 free_extent_map(em);
1041
1042 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1043 ram_size, cur_alloc_size, 0);
1044 if (ret)
1045 goto out_drop_extent_cache;
1046
1047 if (root->root_key.objectid ==
1048 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1049 ret = btrfs_reloc_clone_csums(inode, start,
1050 cur_alloc_size);
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062 if (ret)
1063 btrfs_drop_extent_cache(BTRFS_I(inode), start,
1064 start + ram_size - 1, 0);
1065 }
1066
1067 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1068
1069
1070
1071
1072
1073
1074
1075
1076 page_ops = unlock ? PAGE_UNLOCK : 0;
1077 page_ops |= PAGE_SET_PRIVATE2;
1078
1079 extent_clear_unlock_delalloc(inode, start,
1080 start + ram_size - 1,
1081 delalloc_end, locked_page,
1082 EXTENT_LOCKED | EXTENT_DELALLOC,
1083 page_ops);
1084 if (num_bytes < cur_alloc_size)
1085 num_bytes = 0;
1086 else
1087 num_bytes -= cur_alloc_size;
1088 alloc_hint = ins.objectid + ins.offset;
1089 start += cur_alloc_size;
1090 extent_reserved = false;
1091
1092
1093
1094
1095
1096
1097 if (ret)
1098 goto out_unlock;
1099 }
1100out:
1101 return ret;
1102
1103out_drop_extent_cache:
1104 btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
1105out_reserve:
1106 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
1107 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
1108out_unlock:
1109 clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
1110 EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
1111 page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
1112 PAGE_END_WRITEBACK;
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123 if (extent_reserved) {
1124 extent_clear_unlock_delalloc(inode, start,
1125 start + cur_alloc_size,
1126 start + cur_alloc_size,
1127 locked_page,
1128 clear_bits,
1129 page_ops);
1130 start += cur_alloc_size;
1131 if (start >= end)
1132 goto out;
1133 }
1134 extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
1135 locked_page,
1136 clear_bits | EXTENT_CLEAR_DATA_RESV,
1137 page_ops);
1138 goto out;
1139}
1140
1141
1142
1143
1144static noinline void async_cow_start(struct btrfs_work *work)
1145{
1146 struct async_chunk *async_chunk;
1147 int num_added = 0;
1148
1149 async_chunk = container_of(work, struct async_chunk, work);
1150
1151 compress_file_range(async_chunk, &num_added);
1152 if (num_added == 0) {
1153 btrfs_add_delayed_iput(async_chunk->inode);
1154 async_chunk->inode = NULL;
1155 }
1156}
1157
1158
1159
1160
1161static noinline void async_cow_submit(struct btrfs_work *work)
1162{
1163 struct async_chunk *async_chunk = container_of(work, struct async_chunk,
1164 work);
1165 struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
1166 unsigned long nr_pages;
1167
1168 nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
1169 PAGE_SHIFT;
1170
1171
1172 if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
1173 5 * SZ_1M)
1174 cond_wake_up_nomb(&fs_info->async_submit_wait);
1175
1176
1177
1178
1179
1180
1181
1182 if (async_chunk->inode)
1183 submit_compressed_extents(async_chunk);
1184}
1185
1186static noinline void async_cow_free(struct btrfs_work *work)
1187{
1188 struct async_chunk *async_chunk;
1189
1190 async_chunk = container_of(work, struct async_chunk, work);
1191 if (async_chunk->inode)
1192 btrfs_add_delayed_iput(async_chunk->inode);
1193
1194
1195
1196
1197 if (atomic_dec_and_test(async_chunk->pending))
1198 kvfree(async_chunk->pending);
1199}
1200
1201static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1202 u64 start, u64 end, int *page_started,
1203 unsigned long *nr_written,
1204 unsigned int write_flags)
1205{
1206 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1207 struct async_cow *ctx;
1208 struct async_chunk *async_chunk;
1209 unsigned long nr_pages;
1210 u64 cur_end;
1211 u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
1212 int i;
1213 bool should_compress;
1214 unsigned nofs_flag;
1215
1216 unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
1217
1218 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1219 !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
1220 num_chunks = 1;
1221 should_compress = false;
1222 } else {
1223 should_compress = true;
1224 }
1225
1226 nofs_flag = memalloc_nofs_save();
1227 ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
1228 memalloc_nofs_restore(nofs_flag);
1229
1230 if (!ctx) {
1231 unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
1232 EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
1233 EXTENT_DO_ACCOUNTING;
1234 unsigned long page_ops = PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1235 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
1236 PAGE_SET_ERROR;
1237
1238 extent_clear_unlock_delalloc(inode, start, end, 0, locked_page,
1239 clear_bits, page_ops);
1240 return -ENOMEM;
1241 }
1242
1243 async_chunk = ctx->chunks;
1244 atomic_set(&ctx->num_chunks, num_chunks);
1245
1246 for (i = 0; i < num_chunks; i++) {
1247 if (should_compress)
1248 cur_end = min(end, start + SZ_512K - 1);
1249 else
1250 cur_end = end;
1251
1252
1253
1254
1255
1256 ihold(inode);
1257 async_chunk[i].pending = &ctx->num_chunks;
1258 async_chunk[i].inode = inode;
1259 async_chunk[i].start = start;
1260 async_chunk[i].end = cur_end;
1261 async_chunk[i].locked_page = locked_page;
1262 async_chunk[i].write_flags = write_flags;
1263 INIT_LIST_HEAD(&async_chunk[i].extents);
1264
1265 btrfs_init_work(&async_chunk[i].work,
1266 btrfs_delalloc_helper,
1267 async_cow_start, async_cow_submit,
1268 async_cow_free);
1269
1270 nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
1271 atomic_add(nr_pages, &fs_info->async_delalloc_pages);
1272
1273 btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
1274
1275 *nr_written += nr_pages;
1276 start = cur_end + 1;
1277 }
1278 *page_started = 1;
1279 return 0;
1280}
1281
1282static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
1283 u64 bytenr, u64 num_bytes)
1284{
1285 int ret;
1286 struct btrfs_ordered_sum *sums;
1287 LIST_HEAD(list);
1288
1289 ret = btrfs_lookup_csums_range(fs_info->csum_root, bytenr,
1290 bytenr + num_bytes - 1, &list, 0);
1291 if (ret == 0 && list_empty(&list))
1292 return 0;
1293
1294 while (!list_empty(&list)) {
1295 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1296 list_del(&sums->list);
1297 kfree(sums);
1298 }
1299 if (ret < 0)
1300 return ret;
1301 return 1;
1302}
1303
1304
1305
1306
1307
1308
1309
1310
1311static noinline int run_delalloc_nocow(struct inode *inode,
1312 struct page *locked_page,
1313 u64 start, u64 end, int *page_started, int force,
1314 unsigned long *nr_written)
1315{
1316 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1317 struct btrfs_root *root = BTRFS_I(inode)->root;
1318 struct extent_buffer *leaf;
1319 struct btrfs_path *path;
1320 struct btrfs_file_extent_item *fi;
1321 struct btrfs_key found_key;
1322 struct extent_map *em;
1323 u64 cow_start;
1324 u64 cur_offset;
1325 u64 extent_end;
1326 u64 extent_offset;
1327 u64 disk_bytenr;
1328 u64 num_bytes;
1329 u64 disk_num_bytes;
1330 u64 ram_bytes;
1331 int extent_type;
1332 int ret;
1333 int type;
1334 int nocow;
1335 int check_prev = 1;
1336 bool nolock;
1337 u64 ino = btrfs_ino(BTRFS_I(inode));
1338
1339 path = btrfs_alloc_path();
1340 if (!path) {
1341 extent_clear_unlock_delalloc(inode, start, end, end,
1342 locked_page,
1343 EXTENT_LOCKED | EXTENT_DELALLOC |
1344 EXTENT_DO_ACCOUNTING |
1345 EXTENT_DEFRAG, PAGE_UNLOCK |
1346 PAGE_CLEAR_DIRTY |
1347 PAGE_SET_WRITEBACK |
1348 PAGE_END_WRITEBACK);
1349 return -ENOMEM;
1350 }
1351
1352 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
1353
1354 cow_start = (u64)-1;
1355 cur_offset = start;
1356 while (1) {
1357 ret = btrfs_lookup_file_extent(NULL, root, path, ino,
1358 cur_offset, 0);
1359 if (ret < 0)
1360 goto error;
1361 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1362 leaf = path->nodes[0];
1363 btrfs_item_key_to_cpu(leaf, &found_key,
1364 path->slots[0] - 1);
1365 if (found_key.objectid == ino &&
1366 found_key.type == BTRFS_EXTENT_DATA_KEY)
1367 path->slots[0]--;
1368 }
1369 check_prev = 0;
1370next_slot:
1371 leaf = path->nodes[0];
1372 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1373 ret = btrfs_next_leaf(root, path);
1374 if (ret < 0) {
1375 if (cow_start != (u64)-1)
1376 cur_offset = cow_start;
1377 goto error;
1378 }
1379 if (ret > 0)
1380 break;
1381 leaf = path->nodes[0];
1382 }
1383
1384 nocow = 0;
1385 disk_bytenr = 0;
1386 num_bytes = 0;
1387 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1388
1389 if (found_key.objectid > ino)
1390 break;
1391 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1392 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1393 path->slots[0]++;
1394 goto next_slot;
1395 }
1396 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1397 found_key.offset > end)
1398 break;
1399
1400 if (found_key.offset > cur_offset) {
1401 extent_end = found_key.offset;
1402 extent_type = 0;
1403 goto out_check;
1404 }
1405
1406 fi = btrfs_item_ptr(leaf, path->slots[0],
1407 struct btrfs_file_extent_item);
1408 extent_type = btrfs_file_extent_type(leaf, fi);
1409
1410 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1411 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1412 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1413 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1414 extent_offset = btrfs_file_extent_offset(leaf, fi);
1415 extent_end = found_key.offset +
1416 btrfs_file_extent_num_bytes(leaf, fi);
1417 disk_num_bytes =
1418 btrfs_file_extent_disk_num_bytes(leaf, fi);
1419 if (extent_end <= start) {
1420 path->slots[0]++;
1421 goto next_slot;
1422 }
1423 if (disk_bytenr == 0)
1424 goto out_check;
1425 if (btrfs_file_extent_compression(leaf, fi) ||
1426 btrfs_file_extent_encryption(leaf, fi) ||
1427 btrfs_file_extent_other_encoding(leaf, fi))
1428 goto out_check;
1429
1430
1431
1432
1433 if (!nolock &&
1434 btrfs_file_extent_generation(leaf, fi) <=
1435 btrfs_root_last_snapshot(&root->root_item))
1436 goto out_check;
1437 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1438 goto out_check;
1439 if (btrfs_extent_readonly(fs_info, disk_bytenr))
1440 goto out_check;
1441 ret = btrfs_cross_ref_exist(root, ino,
1442 found_key.offset -
1443 extent_offset, disk_bytenr);
1444 if (ret) {
1445
1446
1447
1448
1449 if (ret < 0) {
1450 if (cow_start != (u64)-1)
1451 cur_offset = cow_start;
1452 goto error;
1453 }
1454
1455 WARN_ON_ONCE(nolock);
1456 goto out_check;
1457 }
1458 disk_bytenr += extent_offset;
1459 disk_bytenr += cur_offset - found_key.offset;
1460 num_bytes = min(end + 1, extent_end) - cur_offset;
1461
1462
1463
1464
1465 if (!nolock && atomic_read(&root->snapshot_force_cow))
1466 goto out_check;
1467
1468
1469
1470
1471
1472 ret = csum_exist_in_range(fs_info, disk_bytenr,
1473 num_bytes);
1474 if (ret) {
1475
1476
1477
1478
1479 if (ret < 0) {
1480 if (cow_start != (u64)-1)
1481 cur_offset = cow_start;
1482 goto error;
1483 }
1484 WARN_ON_ONCE(nolock);
1485 goto out_check;
1486 }
1487 if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
1488 goto out_check;
1489 nocow = 1;
1490 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1491 extent_end = found_key.offset +
1492 btrfs_file_extent_ram_bytes(leaf, fi);
1493 extent_end = ALIGN(extent_end,
1494 fs_info->sectorsize);
1495 } else {
1496 BUG();
1497 }
1498out_check:
1499 if (extent_end <= start) {
1500 path->slots[0]++;
1501 if (nocow)
1502 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1503 goto next_slot;
1504 }
1505 if (!nocow) {
1506 if (cow_start == (u64)-1)
1507 cow_start = cur_offset;
1508 cur_offset = extent_end;
1509 if (cur_offset > end)
1510 break;
1511 path->slots[0]++;
1512 goto next_slot;
1513 }
1514
1515 btrfs_release_path(path);
1516 if (cow_start != (u64)-1) {
1517 ret = cow_file_range(inode, locked_page,
1518 cow_start, found_key.offset - 1,
1519 end, page_started, nr_written, 1,
1520 NULL);
1521 if (ret) {
1522 if (nocow)
1523 btrfs_dec_nocow_writers(fs_info,
1524 disk_bytenr);
1525 goto error;
1526 }
1527 cow_start = (u64)-1;
1528 }
1529
1530 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1531 u64 orig_start = found_key.offset - extent_offset;
1532
1533 em = create_io_em(inode, cur_offset, num_bytes,
1534 orig_start,
1535 disk_bytenr,
1536 num_bytes,
1537 disk_num_bytes,
1538 ram_bytes, BTRFS_COMPRESS_NONE,
1539 BTRFS_ORDERED_PREALLOC);
1540 if (IS_ERR(em)) {
1541 if (nocow)
1542 btrfs_dec_nocow_writers(fs_info,
1543 disk_bytenr);
1544 ret = PTR_ERR(em);
1545 goto error;
1546 }
1547 free_extent_map(em);
1548 }
1549
1550 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1551 type = BTRFS_ORDERED_PREALLOC;
1552 } else {
1553 type = BTRFS_ORDERED_NOCOW;
1554 }
1555
1556 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1557 num_bytes, num_bytes, type);
1558 if (nocow)
1559 btrfs_dec_nocow_writers(fs_info, disk_bytenr);
1560 BUG_ON(ret);
1561
1562 if (root->root_key.objectid ==
1563 BTRFS_DATA_RELOC_TREE_OBJECTID)
1564
1565
1566
1567
1568
1569 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1570 num_bytes);
1571
1572 extent_clear_unlock_delalloc(inode, cur_offset,
1573 cur_offset + num_bytes - 1, end,
1574 locked_page, EXTENT_LOCKED |
1575 EXTENT_DELALLOC |
1576 EXTENT_CLEAR_DATA_RESV,
1577 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1578
1579 cur_offset = extent_end;
1580
1581
1582
1583
1584
1585
1586 if (ret)
1587 goto error;
1588 if (cur_offset > end)
1589 break;
1590 }
1591 btrfs_release_path(path);
1592
1593 if (cur_offset <= end && cow_start == (u64)-1)
1594 cow_start = cur_offset;
1595
1596 if (cow_start != (u64)-1) {
1597 cur_offset = end;
1598 ret = cow_file_range(inode, locked_page, cow_start, end, end,
1599 page_started, nr_written, 1, NULL);
1600 if (ret)
1601 goto error;
1602 }
1603
1604error:
1605 if (ret && cur_offset < end)
1606 extent_clear_unlock_delalloc(inode, cur_offset, end, end,
1607 locked_page, EXTENT_LOCKED |
1608 EXTENT_DELALLOC | EXTENT_DEFRAG |
1609 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1610 PAGE_CLEAR_DIRTY |
1611 PAGE_SET_WRITEBACK |
1612 PAGE_END_WRITEBACK);
1613 btrfs_free_path(path);
1614 return ret;
1615}
1616
1617static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
1618{
1619
1620 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
1621 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
1622 return 0;
1623
1624
1625
1626
1627
1628
1629 if (BTRFS_I(inode)->defrag_bytes &&
1630 test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1631 EXTENT_DEFRAG, 0, NULL))
1632 return 1;
1633
1634 return 0;
1635}
1636
1637
1638
1639
1640
1641int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
1642 u64 start, u64 end, int *page_started, unsigned long *nr_written,
1643 struct writeback_control *wbc)
1644{
1645 int ret;
1646 int force_cow = need_force_cow(inode, start, end);
1647 unsigned int write_flags = wbc_to_write_flags(wbc);
1648
1649 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1650 ret = run_delalloc_nocow(inode, locked_page, start, end,
1651 page_started, 1, nr_written);
1652 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1653 ret = run_delalloc_nocow(inode, locked_page, start, end,
1654 page_started, 0, nr_written);
1655 } else if (!inode_can_compress(inode) ||
1656 !inode_need_compress(inode, start, end)) {
1657 ret = cow_file_range(inode, locked_page, start, end, end,
1658 page_started, nr_written, 1, NULL);
1659 } else {
1660 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1661 &BTRFS_I(inode)->runtime_flags);
1662 ret = cow_file_range_async(inode, locked_page, start, end,
1663 page_started, nr_written,
1664 write_flags);
1665 }
1666 if (ret)
1667 btrfs_cleanup_ordered_extents(inode, locked_page, start,
1668 end - start + 1);
1669 return ret;
1670}
1671
1672void btrfs_split_delalloc_extent(struct inode *inode,
1673 struct extent_state *orig, u64 split)
1674{
1675 u64 size;
1676
1677
1678 if (!(orig->state & EXTENT_DELALLOC))
1679 return;
1680
1681 size = orig->end - orig->start + 1;
1682 if (size > BTRFS_MAX_EXTENT_SIZE) {
1683 u32 num_extents;
1684 u64 new_size;
1685
1686
1687
1688
1689
1690 new_size = orig->end - split + 1;
1691 num_extents = count_max_extents(new_size);
1692 new_size = split - orig->start;
1693 num_extents += count_max_extents(new_size);
1694 if (count_max_extents(size) >= num_extents)
1695 return;
1696 }
1697
1698 spin_lock(&BTRFS_I(inode)->lock);
1699 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1);
1700 spin_unlock(&BTRFS_I(inode)->lock);
1701}
1702
1703
1704
1705
1706
1707
1708void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
1709 struct extent_state *other)
1710{
1711 u64 new_size, old_size;
1712 u32 num_extents;
1713
1714
1715 if (!(other->state & EXTENT_DELALLOC))
1716 return;
1717
1718 if (new->start > other->start)
1719 new_size = new->end - other->start + 1;
1720 else
1721 new_size = other->end - new->start + 1;
1722
1723
1724 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1725 spin_lock(&BTRFS_I(inode)->lock);
1726 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1727 spin_unlock(&BTRFS_I(inode)->lock);
1728 return;
1729 }
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749 old_size = other->end - other->start + 1;
1750 num_extents = count_max_extents(old_size);
1751 old_size = new->end - new->start + 1;
1752 num_extents += count_max_extents(old_size);
1753 if (count_max_extents(new_size) >= num_extents)
1754 return;
1755
1756 spin_lock(&BTRFS_I(inode)->lock);
1757 btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
1758 spin_unlock(&BTRFS_I(inode)->lock);
1759}
1760
1761static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1762 struct inode *inode)
1763{
1764 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1765
1766 spin_lock(&root->delalloc_lock);
1767 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1768 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1769 &root->delalloc_inodes);
1770 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1771 &BTRFS_I(inode)->runtime_flags);
1772 root->nr_delalloc_inodes++;
1773 if (root->nr_delalloc_inodes == 1) {
1774 spin_lock(&fs_info->delalloc_root_lock);
1775 BUG_ON(!list_empty(&root->delalloc_root));
1776 list_add_tail(&root->delalloc_root,
1777 &fs_info->delalloc_roots);
1778 spin_unlock(&fs_info->delalloc_root_lock);
1779 }
1780 }
1781 spin_unlock(&root->delalloc_lock);
1782}
1783
1784
1785void __btrfs_del_delalloc_inode(struct btrfs_root *root,
1786 struct btrfs_inode *inode)
1787{
1788 struct btrfs_fs_info *fs_info = root->fs_info;
1789
1790 if (!list_empty(&inode->delalloc_inodes)) {
1791 list_del_init(&inode->delalloc_inodes);
1792 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1793 &inode->runtime_flags);
1794 root->nr_delalloc_inodes--;
1795 if (!root->nr_delalloc_inodes) {
1796 ASSERT(list_empty(&root->delalloc_inodes));
1797 spin_lock(&fs_info->delalloc_root_lock);
1798 BUG_ON(list_empty(&root->delalloc_root));
1799 list_del_init(&root->delalloc_root);
1800 spin_unlock(&fs_info->delalloc_root_lock);
1801 }
1802 }
1803}
1804
1805static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1806 struct btrfs_inode *inode)
1807{
1808 spin_lock(&root->delalloc_lock);
1809 __btrfs_del_delalloc_inode(root, inode);
1810 spin_unlock(&root->delalloc_lock);
1811}
1812
1813
1814
1815
1816
1817void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
1818 unsigned *bits)
1819{
1820 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1821
1822 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
1823 WARN_ON(1);
1824
1825
1826
1827
1828
1829 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1830 struct btrfs_root *root = BTRFS_I(inode)->root;
1831 u64 len = state->end + 1 - state->start;
1832 u32 num_extents = count_max_extents(len);
1833 bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
1834
1835 spin_lock(&BTRFS_I(inode)->lock);
1836 btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents);
1837 spin_unlock(&BTRFS_I(inode)->lock);
1838
1839
1840 if (btrfs_is_testing(fs_info))
1841 return;
1842
1843 percpu_counter_add_batch(&fs_info->delalloc_bytes, len,
1844 fs_info->delalloc_batch);
1845 spin_lock(&BTRFS_I(inode)->lock);
1846 BTRFS_I(inode)->delalloc_bytes += len;
1847 if (*bits & EXTENT_DEFRAG)
1848 BTRFS_I(inode)->defrag_bytes += len;
1849 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1850 &BTRFS_I(inode)->runtime_flags))
1851 btrfs_add_delalloc_inodes(root, inode);
1852 spin_unlock(&BTRFS_I(inode)->lock);
1853 }
1854
1855 if (!(state->state & EXTENT_DELALLOC_NEW) &&
1856 (*bits & EXTENT_DELALLOC_NEW)) {
1857 spin_lock(&BTRFS_I(inode)->lock);
1858 BTRFS_I(inode)->new_delalloc_bytes += state->end + 1 -
1859 state->start;
1860 spin_unlock(&BTRFS_I(inode)->lock);
1861 }
1862}
1863
1864
1865
1866
1867
1868void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
1869 struct extent_state *state, unsigned *bits)
1870{
1871 struct btrfs_inode *inode = BTRFS_I(vfs_inode);
1872 struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
1873 u64 len = state->end + 1 - state->start;
1874 u32 num_extents = count_max_extents(len);
1875
1876 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
1877 spin_lock(&inode->lock);
1878 inode->defrag_bytes -= len;
1879 spin_unlock(&inode->lock);
1880 }
1881
1882
1883
1884
1885
1886
1887 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1888 struct btrfs_root *root = inode->root;
1889 bool do_list = !btrfs_is_free_space_inode(inode);
1890
1891 spin_lock(&inode->lock);
1892 btrfs_mod_outstanding_extents(inode, -num_extents);
1893 spin_unlock(&inode->lock);
1894
1895
1896
1897
1898
1899
1900 if (*bits & EXTENT_CLEAR_META_RESV &&
1901 root != fs_info->tree_root)
1902 btrfs_delalloc_release_metadata(inode, len, false);
1903
1904
1905 if (btrfs_is_testing(fs_info))
1906 return;
1907
1908 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
1909 do_list && !(state->state & EXTENT_NORESERVE) &&
1910 (*bits & EXTENT_CLEAR_DATA_RESV))
1911 btrfs_free_reserved_data_space_noquota(
1912 &inode->vfs_inode,
1913 state->start, len);
1914
1915 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
1916 fs_info->delalloc_batch);
1917 spin_lock(&inode->lock);
1918 inode->delalloc_bytes -= len;
1919 if (do_list && inode->delalloc_bytes == 0 &&
1920 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1921 &inode->runtime_flags))
1922 btrfs_del_delalloc_inode(root, inode);
1923 spin_unlock(&inode->lock);
1924 }
1925
1926 if ((state->state & EXTENT_DELALLOC_NEW) &&
1927 (*bits & EXTENT_DELALLOC_NEW)) {
1928 spin_lock(&inode->lock);
1929 ASSERT(inode->new_delalloc_bytes >= len);
1930 inode->new_delalloc_bytes -= len;
1931 spin_unlock(&inode->lock);
1932 }
1933}
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
1950 unsigned long bio_flags)
1951{
1952 struct inode *inode = page->mapping->host;
1953 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1954 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1955 u64 length = 0;
1956 u64 map_length;
1957 int ret;
1958 struct btrfs_io_geometry geom;
1959
1960 if (bio_flags & EXTENT_BIO_COMPRESSED)
1961 return 0;
1962
1963 length = bio->bi_iter.bi_size;
1964 map_length = length;
1965 ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
1966 &geom);
1967 if (ret < 0)
1968 return ret;
1969
1970 if (geom.len < length + size)
1971 return 1;
1972 return 0;
1973}
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983static blk_status_t btrfs_submit_bio_start(void *private_data, struct bio *bio,
1984 u64 bio_offset)
1985{
1986 struct inode *inode = private_data;
1987 blk_status_t ret = 0;
1988
1989 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
1990 BUG_ON(ret);
1991 return 0;
1992}
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
2013 int mirror_num,
2014 unsigned long bio_flags)
2015
2016{
2017 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2018 struct btrfs_root *root = BTRFS_I(inode)->root;
2019 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
2020 blk_status_t ret = 0;
2021 int skip_sum;
2022 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
2023
2024 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
2025
2026 if (btrfs_is_free_space_inode(BTRFS_I(inode)))
2027 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
2028
2029 if (bio_op(bio) != REQ_OP_WRITE) {
2030 ret = btrfs_bio_wq_end_io(fs_info, bio, metadata);
2031 if (ret)
2032 goto out;
2033
2034 if (bio_flags & EXTENT_BIO_COMPRESSED) {
2035 ret = btrfs_submit_compressed_read(inode, bio,
2036 mirror_num,
2037 bio_flags);
2038 goto out;
2039 } else if (!skip_sum) {
2040 ret = btrfs_lookup_bio_sums(inode, bio, NULL);
2041 if (ret)
2042 goto out;
2043 }
2044 goto mapit;
2045 } else if (async && !skip_sum) {
2046
2047 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2048 goto mapit;
2049
2050 ret = btrfs_wq_submit_bio(fs_info, bio, mirror_num, bio_flags,
2051 0, inode, btrfs_submit_bio_start);
2052 goto out;
2053 } else if (!skip_sum) {
2054 ret = btrfs_csum_one_bio(inode, bio, 0, 0);
2055 if (ret)
2056 goto out;
2057 }
2058
2059mapit:
2060 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
2061
2062out:
2063 if (ret) {
2064 bio->bi_status = ret;
2065 bio_endio(bio);
2066 }
2067 return ret;
2068}
2069
2070
2071
2072
2073
2074static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
2075 struct inode *inode, struct list_head *list)
2076{
2077 struct btrfs_ordered_sum *sum;
2078 int ret;
2079
2080 list_for_each_entry(sum, list, list) {
2081 trans->adding_csums = true;
2082 ret = btrfs_csum_file_blocks(trans,
2083 BTRFS_I(inode)->root->fs_info->csum_root, sum);
2084 trans->adding_csums = false;
2085 if (ret)
2086 return ret;
2087 }
2088 return 0;
2089}
2090
2091int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2092 unsigned int extra_bits,
2093 struct extent_state **cached_state, int dedupe)
2094{
2095 WARN_ON(PAGE_ALIGNED(end));
2096 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
2097 extra_bits, cached_state);
2098}
2099
2100
2101struct btrfs_writepage_fixup {
2102 struct page *page;
2103 struct btrfs_work work;
2104};
2105
2106static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2107{
2108 struct btrfs_writepage_fixup *fixup;
2109 struct btrfs_ordered_extent *ordered;
2110 struct extent_state *cached_state = NULL;
2111 struct extent_changeset *data_reserved = NULL;
2112 struct page *page;
2113 struct inode *inode;
2114 u64 page_start;
2115 u64 page_end;
2116 int ret;
2117
2118 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2119 page = fixup->page;
2120again:
2121 lock_page(page);
2122 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2123 ClearPageChecked(page);
2124 goto out_page;
2125 }
2126
2127 inode = page->mapping->host;
2128 page_start = page_offset(page);
2129 page_end = page_offset(page) + PAGE_SIZE - 1;
2130
2131 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
2132 &cached_state);
2133
2134
2135 if (PagePrivate2(page))
2136 goto out;
2137
2138 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
2139 PAGE_SIZE);
2140 if (ordered) {
2141 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
2142 page_end, &cached_state);
2143 unlock_page(page);
2144 btrfs_start_ordered_extent(inode, ordered, 1);
2145 btrfs_put_ordered_extent(ordered);
2146 goto again;
2147 }
2148
2149 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
2150 PAGE_SIZE);
2151 if (ret) {
2152 mapping_set_error(page->mapping, ret);
2153 end_extent_writepage(page, ret, page_start, page_end);
2154 ClearPageChecked(page);
2155 goto out;
2156 }
2157
2158 ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
2159 &cached_state, 0);
2160 if (ret) {
2161 mapping_set_error(page->mapping, ret);
2162 end_extent_writepage(page, ret, page_start, page_end);
2163 ClearPageChecked(page);
2164 goto out;
2165 }
2166
2167 ClearPageChecked(page);
2168 set_page_dirty(page);
2169 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
2170out:
2171 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2172 &cached_state);
2173out_page:
2174 unlock_page(page);
2175 put_page(page);
2176 kfree(fixup);
2177 extent_changeset_free(data_reserved);
2178}
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
2192{
2193 struct inode *inode = page->mapping->host;
2194 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2195 struct btrfs_writepage_fixup *fixup;
2196
2197
2198 if (TestClearPagePrivate2(page))
2199 return 0;
2200
2201 if (PageChecked(page))
2202 return -EAGAIN;
2203
2204 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2205 if (!fixup)
2206 return -EAGAIN;
2207
2208 SetPageChecked(page);
2209 get_page(page);
2210 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
2211 btrfs_writepage_fixup_worker, NULL, NULL);
2212 fixup->page = page;
2213 btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
2214 return -EBUSY;
2215}
2216
2217static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2218 struct inode *inode, u64 file_pos,
2219 u64 disk_bytenr, u64 disk_num_bytes,
2220 u64 num_bytes, u64 ram_bytes,
2221 u8 compression, u8 encryption,
2222 u16 other_encoding, int extent_type)
2223{
2224 struct btrfs_root *root = BTRFS_I(inode)->root;
2225 struct btrfs_file_extent_item *fi;
2226 struct btrfs_path *path;
2227 struct extent_buffer *leaf;
2228 struct btrfs_key ins;
2229 u64 qg_released;
2230 int extent_inserted = 0;
2231 int ret;
2232
2233 path = btrfs_alloc_path();
2234 if (!path)
2235 return -ENOMEM;
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
2247 file_pos + num_bytes, NULL, 0,
2248 1, sizeof(*fi), &extent_inserted);
2249 if (ret)
2250 goto out;
2251
2252 if (!extent_inserted) {
2253 ins.objectid = btrfs_ino(BTRFS_I(inode));
2254 ins.offset = file_pos;
2255 ins.type = BTRFS_EXTENT_DATA_KEY;
2256
2257 path->leave_spinning = 1;
2258 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2259 sizeof(*fi));
2260 if (ret)
2261 goto out;
2262 }
2263 leaf = path->nodes[0];
2264 fi = btrfs_item_ptr(leaf, path->slots[0],
2265 struct btrfs_file_extent_item);
2266 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
2267 btrfs_set_file_extent_type(leaf, fi, extent_type);
2268 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
2269 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
2270 btrfs_set_file_extent_offset(leaf, fi, 0);
2271 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2272 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
2273 btrfs_set_file_extent_compression(leaf, fi, compression);
2274 btrfs_set_file_extent_encryption(leaf, fi, encryption);
2275 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
2276
2277 btrfs_mark_buffer_dirty(leaf);
2278 btrfs_release_path(path);
2279
2280 inode_add_bytes(inode, num_bytes);
2281
2282 ins.objectid = disk_bytenr;
2283 ins.offset = disk_num_bytes;
2284 ins.type = BTRFS_EXTENT_ITEM_KEY;
2285
2286
2287
2288
2289
2290 ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
2291 if (ret < 0)
2292 goto out;
2293 qg_released = ret;
2294 ret = btrfs_alloc_reserved_file_extent(trans, root,
2295 btrfs_ino(BTRFS_I(inode)),
2296 file_pos, qg_released, &ins);
2297out:
2298 btrfs_free_path(path);
2299
2300 return ret;
2301}
2302
2303
2304struct sa_defrag_extent_backref {
2305 struct rb_node node;
2306 struct old_sa_defrag_extent *old;
2307 u64 root_id;
2308 u64 inum;
2309 u64 file_pos;
2310 u64 extent_offset;
2311 u64 num_bytes;
2312 u64 generation;
2313};
2314
2315struct old_sa_defrag_extent {
2316 struct list_head list;
2317 struct new_sa_defrag_extent *new;
2318
2319 u64 extent_offset;
2320 u64 bytenr;
2321 u64 offset;
2322 u64 len;
2323 int count;
2324};
2325
2326struct new_sa_defrag_extent {
2327 struct rb_root root;
2328 struct list_head head;
2329 struct btrfs_path *path;
2330 struct inode *inode;
2331 u64 file_pos;
2332 u64 len;
2333 u64 bytenr;
2334 u64 disk_len;
2335 u8 compress_type;
2336};
2337
2338static int backref_comp(struct sa_defrag_extent_backref *b1,
2339 struct sa_defrag_extent_backref *b2)
2340{
2341 if (b1->root_id < b2->root_id)
2342 return -1;
2343 else if (b1->root_id > b2->root_id)
2344 return 1;
2345
2346 if (b1->inum < b2->inum)
2347 return -1;
2348 else if (b1->inum > b2->inum)
2349 return 1;
2350
2351 if (b1->file_pos < b2->file_pos)
2352 return -1;
2353 else if (b1->file_pos > b2->file_pos)
2354 return 1;
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368 return 0;
2369}
2370
2371static void backref_insert(struct rb_root *root,
2372 struct sa_defrag_extent_backref *backref)
2373{
2374 struct rb_node **p = &root->rb_node;
2375 struct rb_node *parent = NULL;
2376 struct sa_defrag_extent_backref *entry;
2377 int ret;
2378
2379 while (*p) {
2380 parent = *p;
2381 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2382
2383 ret = backref_comp(backref, entry);
2384 if (ret < 0)
2385 p = &(*p)->rb_left;
2386 else
2387 p = &(*p)->rb_right;
2388 }
2389
2390 rb_link_node(&backref->node, parent, p);
2391 rb_insert_color(&backref->node, root);
2392}
2393
2394
2395
2396
2397static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2398 void *ctx)
2399{
2400 struct btrfs_file_extent_item *extent;
2401 struct old_sa_defrag_extent *old = ctx;
2402 struct new_sa_defrag_extent *new = old->new;
2403 struct btrfs_path *path = new->path;
2404 struct btrfs_key key;
2405 struct btrfs_root *root;
2406 struct sa_defrag_extent_backref *backref;
2407 struct extent_buffer *leaf;
2408 struct inode *inode = new->inode;
2409 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2410 int slot;
2411 int ret;
2412 u64 extent_offset;
2413 u64 num_bytes;
2414
2415 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2416 inum == btrfs_ino(BTRFS_I(inode)))
2417 return 0;
2418
2419 key.objectid = root_id;
2420 key.type = BTRFS_ROOT_ITEM_KEY;
2421 key.offset = (u64)-1;
2422
2423 root = btrfs_read_fs_root_no_name(fs_info, &key);
2424 if (IS_ERR(root)) {
2425 if (PTR_ERR(root) == -ENOENT)
2426 return 0;
2427 WARN_ON(1);
2428 btrfs_debug(fs_info, "inum=%llu, offset=%llu, root_id=%llu",
2429 inum, offset, root_id);
2430 return PTR_ERR(root);
2431 }
2432
2433 key.objectid = inum;
2434 key.type = BTRFS_EXTENT_DATA_KEY;
2435 if (offset > (u64)-1 << 32)
2436 key.offset = 0;
2437 else
2438 key.offset = offset;
2439
2440 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2441 if (WARN_ON(ret < 0))
2442 return ret;
2443 ret = 0;
2444
2445 while (1) {
2446 cond_resched();
2447
2448 leaf = path->nodes[0];
2449 slot = path->slots[0];
2450
2451 if (slot >= btrfs_header_nritems(leaf)) {
2452 ret = btrfs_next_leaf(root, path);
2453 if (ret < 0) {
2454 goto out;
2455 } else if (ret > 0) {
2456 ret = 0;
2457 goto out;
2458 }
2459 continue;
2460 }
2461
2462 path->slots[0]++;
2463
2464 btrfs_item_key_to_cpu(leaf, &key, slot);
2465
2466 if (key.objectid > inum)
2467 goto out;
2468
2469 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2470 continue;
2471
2472 extent = btrfs_item_ptr(leaf, slot,
2473 struct btrfs_file_extent_item);
2474
2475 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2476 continue;
2477
2478
2479
2480
2481
2482
2483 if (key.offset != offset)
2484 continue;
2485
2486 extent_offset = btrfs_file_extent_offset(leaf, extent);
2487 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2488
2489 if (extent_offset >= old->extent_offset + old->offset +
2490 old->len || extent_offset + num_bytes <=
2491 old->extent_offset + old->offset)
2492 continue;
2493 break;
2494 }
2495
2496 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2497 if (!backref) {
2498 ret = -ENOENT;
2499 goto out;
2500 }
2501
2502 backref->root_id = root_id;
2503 backref->inum = inum;
2504 backref->file_pos = offset;
2505 backref->num_bytes = num_bytes;
2506 backref->extent_offset = extent_offset;
2507 backref->generation = btrfs_file_extent_generation(leaf, extent);
2508 backref->old = old;
2509 backref_insert(&new->root, backref);
2510 old->count++;
2511out:
2512 btrfs_release_path(path);
2513 WARN_ON(ret);
2514 return ret;
2515}
2516
2517static noinline bool record_extent_backrefs(struct btrfs_path *path,
2518 struct new_sa_defrag_extent *new)
2519{
2520 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2521 struct old_sa_defrag_extent *old, *tmp;
2522 int ret;
2523
2524 new->path = path;
2525
2526 list_for_each_entry_safe(old, tmp, &new->head, list) {
2527 ret = iterate_inodes_from_logical(old->bytenr +
2528 old->extent_offset, fs_info,
2529 path, record_one_backref,
2530 old, false);
2531 if (ret < 0 && ret != -ENOENT)
2532 return false;
2533
2534
2535 if (!old->count) {
2536 list_del(&old->list);
2537 kfree(old);
2538 }
2539 }
2540
2541 if (list_empty(&new->head))
2542 return false;
2543
2544 return true;
2545}
2546
2547static int relink_is_mergable(struct extent_buffer *leaf,
2548 struct btrfs_file_extent_item *fi,
2549 struct new_sa_defrag_extent *new)
2550{
2551 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2552 return 0;
2553
2554 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2555 return 0;
2556
2557 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2558 return 0;
2559
2560 if (btrfs_file_extent_encryption(leaf, fi) ||
2561 btrfs_file_extent_other_encoding(leaf, fi))
2562 return 0;
2563
2564 return 1;
2565}
2566
2567
2568
2569
2570static noinline int relink_extent_backref(struct btrfs_path *path,
2571 struct sa_defrag_extent_backref *prev,
2572 struct sa_defrag_extent_backref *backref)
2573{
2574 struct btrfs_file_extent_item *extent;
2575 struct btrfs_file_extent_item *item;
2576 struct btrfs_ordered_extent *ordered;
2577 struct btrfs_trans_handle *trans;
2578 struct btrfs_ref ref = { 0 };
2579 struct btrfs_root *root;
2580 struct btrfs_key key;
2581 struct extent_buffer *leaf;
2582 struct old_sa_defrag_extent *old = backref->old;
2583 struct new_sa_defrag_extent *new = old->new;
2584 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2585 struct inode *inode;
2586 struct extent_state *cached = NULL;
2587 int ret = 0;
2588 u64 start;
2589 u64 len;
2590 u64 lock_start;
2591 u64 lock_end;
2592 bool merge = false;
2593 int index;
2594
2595 if (prev && prev->root_id == backref->root_id &&
2596 prev->inum == backref->inum &&
2597 prev->file_pos + prev->num_bytes == backref->file_pos)
2598 merge = true;
2599
2600
2601 key.objectid = backref->root_id;
2602 key.type = BTRFS_ROOT_ITEM_KEY;
2603 key.offset = (u64)-1;
2604
2605 index = srcu_read_lock(&fs_info->subvol_srcu);
2606
2607 root = btrfs_read_fs_root_no_name(fs_info, &key);
2608 if (IS_ERR(root)) {
2609 srcu_read_unlock(&fs_info->subvol_srcu, index);
2610 if (PTR_ERR(root) == -ENOENT)
2611 return 0;
2612 return PTR_ERR(root);
2613 }
2614
2615 if (btrfs_root_readonly(root)) {
2616 srcu_read_unlock(&fs_info->subvol_srcu, index);
2617 return 0;
2618 }
2619
2620
2621 key.objectid = backref->inum;
2622 key.type = BTRFS_INODE_ITEM_KEY;
2623 key.offset = 0;
2624
2625 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2626 if (IS_ERR(inode)) {
2627 srcu_read_unlock(&fs_info->subvol_srcu, index);
2628 return 0;
2629 }
2630
2631 srcu_read_unlock(&fs_info->subvol_srcu, index);
2632
2633
2634 lock_start = backref->file_pos;
2635 lock_end = backref->file_pos + backref->num_bytes - 1;
2636 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2637 &cached);
2638
2639 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2640 if (ordered) {
2641 btrfs_put_ordered_extent(ordered);
2642 goto out_unlock;
2643 }
2644
2645 trans = btrfs_join_transaction(root);
2646 if (IS_ERR(trans)) {
2647 ret = PTR_ERR(trans);
2648 goto out_unlock;
2649 }
2650
2651 key.objectid = backref->inum;
2652 key.type = BTRFS_EXTENT_DATA_KEY;
2653 key.offset = backref->file_pos;
2654
2655 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2656 if (ret < 0) {
2657 goto out_free_path;
2658 } else if (ret > 0) {
2659 ret = 0;
2660 goto out_free_path;
2661 }
2662
2663 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2664 struct btrfs_file_extent_item);
2665
2666 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2667 backref->generation)
2668 goto out_free_path;
2669
2670 btrfs_release_path(path);
2671
2672 start = backref->file_pos;
2673 if (backref->extent_offset < old->extent_offset + old->offset)
2674 start += old->extent_offset + old->offset -
2675 backref->extent_offset;
2676
2677 len = min(backref->extent_offset + backref->num_bytes,
2678 old->extent_offset + old->offset + old->len);
2679 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2680
2681 ret = btrfs_drop_extents(trans, root, inode, start,
2682 start + len, 1);
2683 if (ret)
2684 goto out_free_path;
2685again:
2686 key.objectid = btrfs_ino(BTRFS_I(inode));
2687 key.type = BTRFS_EXTENT_DATA_KEY;
2688 key.offset = start;
2689
2690 path->leave_spinning = 1;
2691 if (merge) {
2692 struct btrfs_file_extent_item *fi;
2693 u64 extent_len;
2694 struct btrfs_key found_key;
2695
2696 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2697 if (ret < 0)
2698 goto out_free_path;
2699
2700 path->slots[0]--;
2701 leaf = path->nodes[0];
2702 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2703
2704 fi = btrfs_item_ptr(leaf, path->slots[0],
2705 struct btrfs_file_extent_item);
2706 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2707
2708 if (extent_len + found_key.offset == start &&
2709 relink_is_mergable(leaf, fi, new)) {
2710 btrfs_set_file_extent_num_bytes(leaf, fi,
2711 extent_len + len);
2712 btrfs_mark_buffer_dirty(leaf);
2713 inode_add_bytes(inode, len);
2714
2715 ret = 1;
2716 goto out_free_path;
2717 } else {
2718 merge = false;
2719 btrfs_release_path(path);
2720 goto again;
2721 }
2722 }
2723
2724 ret = btrfs_insert_empty_item(trans, root, path, &key,
2725 sizeof(*extent));
2726 if (ret) {
2727 btrfs_abort_transaction(trans, ret);
2728 goto out_free_path;
2729 }
2730
2731 leaf = path->nodes[0];
2732 item = btrfs_item_ptr(leaf, path->slots[0],
2733 struct btrfs_file_extent_item);
2734 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2735 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2736 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2737 btrfs_set_file_extent_num_bytes(leaf, item, len);
2738 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2739 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2740 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2741 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2742 btrfs_set_file_extent_encryption(leaf, item, 0);
2743 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2744
2745 btrfs_mark_buffer_dirty(leaf);
2746 inode_add_bytes(inode, len);
2747 btrfs_release_path(path);
2748
2749 btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new->bytenr,
2750 new->disk_len, 0);
2751 btrfs_init_data_ref(&ref, backref->root_id, backref->inum,
2752 new->file_pos);
2753 ret = btrfs_inc_extent_ref(trans, &ref);
2754 if (ret) {
2755 btrfs_abort_transaction(trans, ret);
2756 goto out_free_path;
2757 }
2758
2759 ret = 1;
2760out_free_path:
2761 btrfs_release_path(path);
2762 path->leave_spinning = 0;
2763 btrfs_end_transaction(trans);
2764out_unlock:
2765 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2766 &cached);
2767 iput(inode);
2768 return ret;
2769}
2770
2771static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2772{
2773 struct old_sa_defrag_extent *old, *tmp;
2774
2775 if (!new)
2776 return;
2777
2778 list_for_each_entry_safe(old, tmp, &new->head, list) {
2779 kfree(old);
2780 }
2781 kfree(new);
2782}
2783
2784static void relink_file_extents(struct new_sa_defrag_extent *new)
2785{
2786 struct btrfs_fs_info *fs_info = btrfs_sb(new->inode->i_sb);
2787 struct btrfs_path *path;
2788 struct sa_defrag_extent_backref *backref;
2789 struct sa_defrag_extent_backref *prev = NULL;
2790 struct rb_node *node;
2791 int ret;
2792
2793 path = btrfs_alloc_path();
2794 if (!path)
2795 return;
2796
2797 if (!record_extent_backrefs(path, new)) {
2798 btrfs_free_path(path);
2799 goto out;
2800 }
2801 btrfs_release_path(path);
2802
2803 while (1) {
2804 node = rb_first(&new->root);
2805 if (!node)
2806 break;
2807 rb_erase(node, &new->root);
2808
2809 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2810
2811 ret = relink_extent_backref(path, prev, backref);
2812 WARN_ON(ret < 0);
2813
2814 kfree(prev);
2815
2816 if (ret == 1)
2817 prev = backref;
2818 else
2819 prev = NULL;
2820 cond_resched();
2821 }
2822 kfree(prev);
2823
2824 btrfs_free_path(path);
2825out:
2826 free_sa_defrag_extent(new);
2827
2828 atomic_dec(&fs_info->defrag_running);
2829 wake_up(&fs_info->transaction_wait);
2830}
2831
2832static struct new_sa_defrag_extent *
2833record_old_file_extents(struct inode *inode,
2834 struct btrfs_ordered_extent *ordered)
2835{
2836 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2837 struct btrfs_root *root = BTRFS_I(inode)->root;
2838 struct btrfs_path *path;
2839 struct btrfs_key key;
2840 struct old_sa_defrag_extent *old;
2841 struct new_sa_defrag_extent *new;
2842 int ret;
2843
2844 new = kmalloc(sizeof(*new), GFP_NOFS);
2845 if (!new)
2846 return NULL;
2847
2848 new->inode = inode;
2849 new->file_pos = ordered->file_offset;
2850 new->len = ordered->len;
2851 new->bytenr = ordered->start;
2852 new->disk_len = ordered->disk_len;
2853 new->compress_type = ordered->compress_type;
2854 new->root = RB_ROOT;
2855 INIT_LIST_HEAD(&new->head);
2856
2857 path = btrfs_alloc_path();
2858 if (!path)
2859 goto out_kfree;
2860
2861 key.objectid = btrfs_ino(BTRFS_I(inode));
2862 key.type = BTRFS_EXTENT_DATA_KEY;
2863 key.offset = new->file_pos;
2864
2865 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2866 if (ret < 0)
2867 goto out_free_path;
2868 if (ret > 0 && path->slots[0] > 0)
2869 path->slots[0]--;
2870
2871
2872 while (1) {
2873 struct btrfs_file_extent_item *extent;
2874 struct extent_buffer *l;
2875 int slot;
2876 u64 num_bytes;
2877 u64 offset;
2878 u64 end;
2879 u64 disk_bytenr;
2880 u64 extent_offset;
2881
2882 l = path->nodes[0];
2883 slot = path->slots[0];
2884
2885 if (slot >= btrfs_header_nritems(l)) {
2886 ret = btrfs_next_leaf(root, path);
2887 if (ret < 0)
2888 goto out_free_path;
2889 else if (ret > 0)
2890 break;
2891 continue;
2892 }
2893
2894 btrfs_item_key_to_cpu(l, &key, slot);
2895
2896 if (key.objectid != btrfs_ino(BTRFS_I(inode)))
2897 break;
2898 if (key.type != BTRFS_EXTENT_DATA_KEY)
2899 break;
2900 if (key.offset >= new->file_pos + new->len)
2901 break;
2902
2903 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2904
2905 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2906 if (key.offset + num_bytes < new->file_pos)
2907 goto next;
2908
2909 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2910 if (!disk_bytenr)
2911 goto next;
2912
2913 extent_offset = btrfs_file_extent_offset(l, extent);
2914
2915 old = kmalloc(sizeof(*old), GFP_NOFS);
2916 if (!old)
2917 goto out_free_path;
2918
2919 offset = max(new->file_pos, key.offset);
2920 end = min(new->file_pos + new->len, key.offset + num_bytes);
2921
2922 old->bytenr = disk_bytenr;
2923 old->extent_offset = extent_offset;
2924 old->offset = offset - key.offset;
2925 old->len = end - offset;
2926 old->new = new;
2927 old->count = 0;
2928 list_add_tail(&old->list, &new->head);
2929next:
2930 path->slots[0]++;
2931 cond_resched();
2932 }
2933
2934 btrfs_free_path(path);
2935 atomic_inc(&fs_info->defrag_running);
2936
2937 return new;
2938
2939out_free_path:
2940 btrfs_free_path(path);
2941out_kfree:
2942 free_sa_defrag_extent(new);
2943 return NULL;
2944}
2945
2946static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
2947 u64 start, u64 len)
2948{
2949 struct btrfs_block_group_cache *cache;
2950
2951 cache = btrfs_lookup_block_group(fs_info, start);
2952 ASSERT(cache);
2953
2954 spin_lock(&cache->lock);
2955 cache->delalloc_bytes -= len;
2956 spin_unlock(&cache->lock);
2957
2958 btrfs_put_block_group(cache);
2959}
2960
2961
2962
2963
2964
2965static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2966{
2967 struct inode *inode = ordered_extent->inode;
2968 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2969 struct btrfs_root *root = BTRFS_I(inode)->root;
2970 struct btrfs_trans_handle *trans = NULL;
2971 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2972 struct extent_state *cached_state = NULL;
2973 struct new_sa_defrag_extent *new = NULL;
2974 int compress_type = 0;
2975 int ret = 0;
2976 u64 logical_len = ordered_extent->len;
2977 bool nolock;
2978 bool truncated = false;
2979 bool range_locked = false;
2980 bool clear_new_delalloc_bytes = false;
2981 bool clear_reserved_extent = true;
2982
2983 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2984 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
2985 !test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
2986 clear_new_delalloc_bytes = true;
2987
2988 nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
2989
2990 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2991 ret = -EIO;
2992 goto out;
2993 }
2994
2995 btrfs_free_io_failure_record(BTRFS_I(inode),
2996 ordered_extent->file_offset,
2997 ordered_extent->file_offset +
2998 ordered_extent->len - 1);
2999
3000 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
3001 truncated = true;
3002 logical_len = ordered_extent->truncated_len;
3003
3004 if (!logical_len)
3005 goto out;
3006 }
3007
3008 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
3009 BUG_ON(!list_empty(&ordered_extent->list));
3010
3011
3012
3013
3014
3015
3016 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
3017 ordered_extent->len);
3018 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
3019 if (nolock)
3020 trans = btrfs_join_transaction_nolock(root);
3021 else
3022 trans = btrfs_join_transaction(root);
3023 if (IS_ERR(trans)) {
3024 ret = PTR_ERR(trans);
3025 trans = NULL;
3026 goto out;
3027 }
3028 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
3029 ret = btrfs_update_inode_fallback(trans, root, inode);
3030 if (ret)
3031 btrfs_abort_transaction(trans, ret);
3032 goto out;
3033 }
3034
3035 range_locked = true;
3036 lock_extent_bits(io_tree, ordered_extent->file_offset,
3037 ordered_extent->file_offset + ordered_extent->len - 1,
3038 &cached_state);
3039
3040 ret = test_range_bit(io_tree, ordered_extent->file_offset,
3041 ordered_extent->file_offset + ordered_extent->len - 1,
3042 EXTENT_DEFRAG, 0, cached_state);
3043 if (ret) {
3044 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
3045 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
3046
3047 new = record_old_file_extents(inode, ordered_extent);
3048
3049 clear_extent_bit(io_tree, ordered_extent->file_offset,
3050 ordered_extent->file_offset + ordered_extent->len - 1,
3051 EXTENT_DEFRAG, 0, 0, &cached_state);
3052 }
3053
3054 if (nolock)
3055 trans = btrfs_join_transaction_nolock(root);
3056 else
3057 trans = btrfs_join_transaction(root);
3058 if (IS_ERR(trans)) {
3059 ret = PTR_ERR(trans);
3060 trans = NULL;
3061 goto out;
3062 }
3063
3064 trans->block_rsv = &BTRFS_I(inode)->block_rsv;
3065
3066 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
3067 compress_type = ordered_extent->compress_type;
3068 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3069 BUG_ON(compress_type);
3070 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
3071 ordered_extent->len);
3072 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
3073 ordered_extent->file_offset,
3074 ordered_extent->file_offset +
3075 logical_len);
3076 } else {
3077 BUG_ON(root == fs_info->tree_root);
3078 ret = insert_reserved_file_extent(trans, inode,
3079 ordered_extent->file_offset,
3080 ordered_extent->start,
3081 ordered_extent->disk_len,
3082 logical_len, logical_len,
3083 compress_type, 0, 0,
3084 BTRFS_FILE_EXTENT_REG);
3085 if (!ret) {
3086 clear_reserved_extent = false;
3087 btrfs_release_delalloc_bytes(fs_info,
3088 ordered_extent->start,
3089 ordered_extent->disk_len);
3090 }
3091 }
3092 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
3093 ordered_extent->file_offset, ordered_extent->len,
3094 trans->transid);
3095 if (ret < 0) {
3096 btrfs_abort_transaction(trans, ret);
3097 goto out;
3098 }
3099
3100 ret = add_pending_csums(trans, inode, &ordered_extent->list);
3101 if (ret) {
3102 btrfs_abort_transaction(trans, ret);
3103 goto out;
3104 }
3105
3106 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
3107 ret = btrfs_update_inode_fallback(trans, root, inode);
3108 if (ret) {
3109 btrfs_abort_transaction(trans, ret);
3110 goto out;
3111 }
3112 ret = 0;
3113out:
3114 if (range_locked || clear_new_delalloc_bytes) {
3115 unsigned int clear_bits = 0;
3116
3117 if (range_locked)
3118 clear_bits |= EXTENT_LOCKED;
3119 if (clear_new_delalloc_bytes)
3120 clear_bits |= EXTENT_DELALLOC_NEW;
3121 clear_extent_bit(&BTRFS_I(inode)->io_tree,
3122 ordered_extent->file_offset,
3123 ordered_extent->file_offset +
3124 ordered_extent->len - 1,
3125 clear_bits,
3126 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
3127 0, &cached_state);
3128 }
3129
3130 if (trans)
3131 btrfs_end_transaction(trans);
3132
3133 if (ret || truncated) {
3134 u64 start, end;
3135
3136 if (truncated)
3137 start = ordered_extent->file_offset + logical_len;
3138 else
3139 start = ordered_extent->file_offset;
3140 end = ordered_extent->file_offset + ordered_extent->len - 1;
3141 clear_extent_uptodate(io_tree, start, end, NULL);
3142
3143
3144 btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156 if ((ret || !logical_len) &&
3157 clear_reserved_extent &&
3158 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3159 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
3160 btrfs_free_reserved_extent(fs_info,
3161 ordered_extent->start,
3162 ordered_extent->disk_len, 1);
3163 }
3164
3165
3166
3167
3168
3169
3170 btrfs_remove_ordered_extent(inode, ordered_extent);
3171
3172
3173 if (new) {
3174 if (ret) {
3175 free_sa_defrag_extent(new);
3176 atomic_dec(&fs_info->defrag_running);
3177 } else {
3178 relink_file_extents(new);
3179 }
3180 }
3181
3182
3183 btrfs_put_ordered_extent(ordered_extent);
3184
3185 btrfs_put_ordered_extent(ordered_extent);
3186
3187 return ret;
3188}
3189
3190static void finish_ordered_fn(struct btrfs_work *work)
3191{
3192 struct btrfs_ordered_extent *ordered_extent;
3193 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3194 btrfs_finish_ordered_io(ordered_extent);
3195}
3196
3197void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
3198 u64 end, int uptodate)
3199{
3200 struct inode *inode = page->mapping->host;
3201 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3202 struct btrfs_ordered_extent *ordered_extent = NULL;
3203 struct btrfs_workqueue *wq;
3204 btrfs_work_func_t func;
3205
3206 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
3207
3208 ClearPagePrivate2(page);
3209 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
3210 end - start + 1, uptodate))
3211 return;
3212
3213 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
3214 wq = fs_info->endio_freespace_worker;
3215 func = btrfs_freespace_write_helper;
3216 } else {
3217 wq = fs_info->endio_write_workers;
3218 func = btrfs_endio_write_helper;
3219 }
3220
3221 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
3222 NULL);
3223 btrfs_queue_work(wq, &ordered_extent->work);
3224}
3225
3226static int __readpage_endio_check(struct inode *inode,
3227 struct btrfs_io_bio *io_bio,
3228 int icsum, struct page *page,
3229 int pgoff, u64 start, size_t len)
3230{
3231 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3232 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
3233 char *kaddr;
3234 u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
3235 u8 *csum_expected;
3236 u8 csum[BTRFS_CSUM_SIZE];
3237
3238 csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
3239
3240 kaddr = kmap_atomic(page);
3241 shash->tfm = fs_info->csum_shash;
3242
3243 crypto_shash_init(shash);
3244 crypto_shash_update(shash, kaddr + pgoff, len);
3245 crypto_shash_final(shash, csum);
3246
3247 if (memcmp(csum, csum_expected, csum_size))
3248 goto zeroit;
3249
3250 kunmap_atomic(kaddr);
3251 return 0;
3252zeroit:
3253 btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
3254 io_bio->mirror_num);
3255 memset(kaddr + pgoff, 1, len);
3256 flush_dcache_page(page);
3257 kunmap_atomic(kaddr);
3258 return -EIO;
3259}
3260
3261
3262
3263
3264
3265
3266static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
3267 u64 phy_offset, struct page *page,
3268 u64 start, u64 end, int mirror)
3269{
3270 size_t offset = start - page_offset(page);
3271 struct inode *inode = page->mapping->host;
3272 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3273 struct btrfs_root *root = BTRFS_I(inode)->root;
3274
3275 if (PageChecked(page)) {
3276 ClearPageChecked(page);
3277 return 0;
3278 }
3279
3280 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3281 return 0;
3282
3283 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
3284 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
3285 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
3286 return 0;
3287 }
3288
3289 phy_offset >>= inode->i_sb->s_blocksize_bits;
3290 return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
3291 start, (size_t)(end - start + 1));
3292}
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304void btrfs_add_delayed_iput(struct inode *inode)
3305{
3306 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3307 struct btrfs_inode *binode = BTRFS_I(inode);
3308
3309 if (atomic_add_unless(&inode->i_count, -1, 1))
3310 return;
3311
3312 atomic_inc(&fs_info->nr_delayed_iputs);
3313 spin_lock(&fs_info->delayed_iput_lock);
3314 ASSERT(list_empty(&binode->delayed_iput));
3315 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3316 spin_unlock(&fs_info->delayed_iput_lock);
3317 if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
3318 wake_up_process(fs_info->cleaner_kthread);
3319}
3320
3321static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
3322 struct btrfs_inode *inode)
3323{
3324 list_del_init(&inode->delayed_iput);
3325 spin_unlock(&fs_info->delayed_iput_lock);
3326 iput(&inode->vfs_inode);
3327 if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
3328 wake_up(&fs_info->delayed_iputs_wait);
3329 spin_lock(&fs_info->delayed_iput_lock);
3330}
3331
3332static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
3333 struct btrfs_inode *inode)
3334{
3335 if (!list_empty(&inode->delayed_iput)) {
3336 spin_lock(&fs_info->delayed_iput_lock);
3337 if (!list_empty(&inode->delayed_iput))
3338 run_delayed_iput_locked(fs_info, inode);
3339 spin_unlock(&fs_info->delayed_iput_lock);
3340 }
3341}
3342
3343void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
3344{
3345
3346 spin_lock(&fs_info->delayed_iput_lock);
3347 while (!list_empty(&fs_info->delayed_iputs)) {
3348 struct btrfs_inode *inode;
3349
3350 inode = list_first_entry(&fs_info->delayed_iputs,
3351 struct btrfs_inode, delayed_iput);
3352 run_delayed_iput_locked(fs_info, inode);
3353 }
3354 spin_unlock(&fs_info->delayed_iput_lock);
3355}
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info)
3368{
3369 int ret = wait_event_killable(fs_info->delayed_iputs_wait,
3370 atomic_read(&fs_info->nr_delayed_iputs) == 0);
3371 if (ret)
3372 return -EINTR;
3373 return 0;
3374}
3375
3376
3377
3378
3379
3380int btrfs_orphan_add(struct btrfs_trans_handle *trans,
3381 struct btrfs_inode *inode)
3382{
3383 int ret;
3384
3385 ret = btrfs_insert_orphan_item(trans, inode->root, btrfs_ino(inode));
3386 if (ret && ret != -EEXIST) {
3387 btrfs_abort_transaction(trans, ret);
3388 return ret;
3389 }
3390
3391 return 0;
3392}
3393
3394
3395
3396
3397
3398static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3399 struct btrfs_inode *inode)
3400{
3401 return btrfs_del_orphan_item(trans, inode->root, btrfs_ino(inode));
3402}
3403
3404
3405
3406
3407
3408int btrfs_orphan_cleanup(struct btrfs_root *root)
3409{
3410 struct btrfs_fs_info *fs_info = root->fs_info;
3411 struct btrfs_path *path;
3412 struct extent_buffer *leaf;
3413 struct btrfs_key key, found_key;
3414 struct btrfs_trans_handle *trans;
3415 struct inode *inode;
3416 u64 last_objectid = 0;
3417 int ret = 0, nr_unlink = 0;
3418
3419 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3420 return 0;
3421
3422 path = btrfs_alloc_path();
3423 if (!path) {
3424 ret = -ENOMEM;
3425 goto out;
3426 }
3427 path->reada = READA_BACK;
3428
3429 key.objectid = BTRFS_ORPHAN_OBJECTID;
3430 key.type = BTRFS_ORPHAN_ITEM_KEY;
3431 key.offset = (u64)-1;
3432
3433 while (1) {
3434 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3435 if (ret < 0)
3436 goto out;
3437
3438
3439
3440
3441
3442
3443 if (ret > 0) {
3444 ret = 0;
3445 if (path->slots[0] == 0)
3446 break;
3447 path->slots[0]--;
3448 }
3449
3450
3451 leaf = path->nodes[0];
3452 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3453
3454
3455 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3456 break;
3457 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3458 break;
3459
3460
3461 btrfs_release_path(path);
3462
3463
3464
3465
3466
3467
3468
3469 if (found_key.offset == last_objectid) {
3470 btrfs_err(fs_info,
3471 "Error removing orphan entry, stopping orphan cleanup");
3472 ret = -EINVAL;
3473 goto out;
3474 }
3475
3476 last_objectid = found_key.offset;
3477
3478 found_key.objectid = found_key.offset;
3479 found_key.type = BTRFS_INODE_ITEM_KEY;
3480 found_key.offset = 0;
3481 inode = btrfs_iget(fs_info->sb, &found_key, root, NULL);
3482 ret = PTR_ERR_OR_ZERO(inode);
3483 if (ret && ret != -ENOENT)
3484 goto out;
3485
3486 if (ret == -ENOENT && root == fs_info->tree_root) {
3487 struct btrfs_root *dead_root;
3488 struct btrfs_fs_info *fs_info = root->fs_info;
3489 int is_dead_root = 0;
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502 spin_lock(&fs_info->trans_lock);
3503 list_for_each_entry(dead_root, &fs_info->dead_roots,
3504 root_list) {
3505 if (dead_root->root_key.objectid ==
3506 found_key.objectid) {
3507 is_dead_root = 1;
3508 break;
3509 }
3510 }
3511 spin_unlock(&fs_info->trans_lock);
3512 if (is_dead_root) {
3513
3514 key.offset = found_key.objectid - 1;
3515 continue;
3516 }
3517
3518 }
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539 if (ret == -ENOENT || inode->i_nlink) {
3540 if (!ret)
3541 iput(inode);
3542 trans = btrfs_start_transaction(root, 1);
3543 if (IS_ERR(trans)) {
3544 ret = PTR_ERR(trans);
3545 goto out;
3546 }
3547 btrfs_debug(fs_info, "auto deleting %Lu",
3548 found_key.objectid);
3549 ret = btrfs_del_orphan_item(trans, root,
3550 found_key.objectid);
3551 btrfs_end_transaction(trans);
3552 if (ret)
3553 goto out;
3554 continue;
3555 }
3556
3557 nr_unlink++;
3558
3559
3560 iput(inode);
3561 }
3562
3563 btrfs_release_path(path);
3564
3565 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3566
3567 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3568 trans = btrfs_join_transaction(root);
3569 if (!IS_ERR(trans))
3570 btrfs_end_transaction(trans);
3571 }
3572
3573 if (nr_unlink)
3574 btrfs_debug(fs_info, "unlinked %d orphans", nr_unlink);
3575
3576out:
3577 if (ret)
3578 btrfs_err(fs_info, "could not do orphan cleanup %d", ret);
3579 btrfs_free_path(path);
3580 return ret;
3581}
3582
3583
3584
3585
3586
3587
3588
3589static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3590 int slot, u64 objectid,
3591 int *first_xattr_slot)
3592{
3593 u32 nritems = btrfs_header_nritems(leaf);
3594 struct btrfs_key found_key;
3595 static u64 xattr_access = 0;
3596 static u64 xattr_default = 0;
3597 int scanned = 0;
3598
3599 if (!xattr_access) {
3600 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3601 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3602 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3603 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3604 }
3605
3606 slot++;
3607 *first_xattr_slot = -1;
3608 while (slot < nritems) {
3609 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3610
3611
3612 if (found_key.objectid != objectid)
3613 return 0;
3614
3615
3616 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3617 if (*first_xattr_slot == -1)
3618 *first_xattr_slot = slot;
3619 if (found_key.offset == xattr_access ||
3620 found_key.offset == xattr_default)
3621 return 1;
3622 }
3623
3624
3625
3626
3627
3628 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3629 return 0;
3630
3631 slot++;
3632 scanned++;
3633
3634
3635
3636
3637
3638
3639
3640 if (scanned >= 8)
3641 break;
3642 }
3643
3644
3645
3646
3647 if (*first_xattr_slot == -1)
3648 *first_xattr_slot = slot;
3649 return 1;
3650}
3651
3652
3653
3654
3655static int btrfs_read_locked_inode(struct inode *inode,
3656 struct btrfs_path *in_path)
3657{
3658 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3659 struct btrfs_path *path = in_path;
3660 struct extent_buffer *leaf;
3661 struct btrfs_inode_item *inode_item;
3662 struct btrfs_root *root = BTRFS_I(inode)->root;
3663 struct btrfs_key location;
3664 unsigned long ptr;
3665 int maybe_acls;
3666 u32 rdev;
3667 int ret;
3668 bool filled = false;
3669 int first_xattr_slot;
3670
3671 ret = btrfs_fill_inode(inode, &rdev);
3672 if (!ret)
3673 filled = true;
3674
3675 if (!path) {
3676 path = btrfs_alloc_path();
3677 if (!path)
3678 return -ENOMEM;
3679 }
3680
3681 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3682
3683 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3684 if (ret) {
3685 if (path != in_path)
3686 btrfs_free_path(path);
3687 return ret;
3688 }
3689
3690 leaf = path->nodes[0];
3691
3692 if (filled)
3693 goto cache_index;
3694
3695 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3696 struct btrfs_inode_item);
3697 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3698 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3699 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3700 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3701 btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
3702
3703 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3704 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3705
3706 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3707 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3708
3709 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3710 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3711
3712 BTRFS_I(inode)->i_otime.tv_sec =
3713 btrfs_timespec_sec(leaf, &inode_item->otime);
3714 BTRFS_I(inode)->i_otime.tv_nsec =
3715 btrfs_timespec_nsec(leaf, &inode_item->otime);
3716
3717 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3718 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3719 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3720
3721 inode_set_iversion_queried(inode,
3722 btrfs_inode_sequence(leaf, inode_item));
3723 inode->i_generation = BTRFS_I(inode)->generation;
3724 inode->i_rdev = 0;
3725 rdev = btrfs_inode_rdev(leaf, inode_item);
3726
3727 BTRFS_I(inode)->index_cnt = (u64)-1;
3728 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3729
3730cache_index:
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740 if (BTRFS_I(inode)->last_trans == fs_info->generation)
3741 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3742 &BTRFS_I(inode)->runtime_flags);
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767
3768
3769
3770
3771 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3772
3773 path->slots[0]++;
3774 if (inode->i_nlink != 1 ||
3775 path->slots[0] >= btrfs_header_nritems(leaf))
3776 goto cache_acl;
3777
3778 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3779 if (location.objectid != btrfs_ino(BTRFS_I(inode)))
3780 goto cache_acl;
3781
3782 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3783 if (location.type == BTRFS_INODE_REF_KEY) {
3784 struct btrfs_inode_ref *ref;
3785
3786 ref = (struct btrfs_inode_ref *)ptr;
3787 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3788 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3789 struct btrfs_inode_extref *extref;
3790
3791 extref = (struct btrfs_inode_extref *)ptr;
3792 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3793 extref);
3794 }
3795cache_acl:
3796
3797
3798
3799
3800 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3801 btrfs_ino(BTRFS_I(inode)), &first_xattr_slot);
3802 if (first_xattr_slot != -1) {
3803 path->slots[0] = first_xattr_slot;
3804 ret = btrfs_load_inode_props(inode, path);
3805 if (ret)
3806 btrfs_err(fs_info,
3807 "error loading props for ino %llu (root %llu): %d",
3808 btrfs_ino(BTRFS_I(inode)),
3809 root->root_key.objectid, ret);
3810 }
3811 if (path != in_path)
3812 btrfs_free_path(path);
3813
3814 if (!maybe_acls)
3815 cache_no_acl(inode);
3816
3817 switch (inode->i_mode & S_IFMT) {
3818 case S_IFREG:
3819 inode->i_mapping->a_ops = &btrfs_aops;
3820 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3821 inode->i_fop = &btrfs_file_operations;
3822 inode->i_op = &btrfs_file_inode_operations;
3823 break;
3824 case S_IFDIR:
3825 inode->i_fop = &btrfs_dir_file_operations;
3826 inode->i_op = &btrfs_dir_inode_operations;
3827 break;
3828 case S_IFLNK:
3829 inode->i_op = &btrfs_symlink_inode_operations;
3830 inode_nohighmem(inode);
3831 inode->i_mapping->a_ops = &btrfs_aops;
3832 break;
3833 default:
3834 inode->i_op = &btrfs_special_inode_operations;
3835 init_special_inode(inode, inode->i_mode, rdev);
3836 break;
3837 }
3838
3839 btrfs_sync_inode_flags_to_i_flags(inode);
3840 return 0;
3841}
3842
3843
3844
3845
3846static void fill_inode_item(struct btrfs_trans_handle *trans,
3847 struct extent_buffer *leaf,
3848 struct btrfs_inode_item *item,
3849 struct inode *inode)
3850{
3851 struct btrfs_map_token token;
3852
3853 btrfs_init_map_token(&token);
3854
3855 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3856 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3857 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3858 &token);
3859 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3860 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3861
3862 btrfs_set_token_timespec_sec(leaf, &item->atime,
3863 inode->i_atime.tv_sec, &token);
3864 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3865 inode->i_atime.tv_nsec, &token);
3866
3867 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3868 inode->i_mtime.tv_sec, &token);
3869 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3870 inode->i_mtime.tv_nsec, &token);
3871
3872 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3873 inode->i_ctime.tv_sec, &token);
3874 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3875 inode->i_ctime.tv_nsec, &token);
3876
3877 btrfs_set_token_timespec_sec(leaf, &item->otime,
3878 BTRFS_I(inode)->i_otime.tv_sec, &token);
3879 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3880 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3881
3882 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3883 &token);
3884 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
3885 &token);
3886 btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode),
3887 &token);
3888 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3889 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3890 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3891 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
3892}
3893
3894
3895
3896
3897static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3898 struct btrfs_root *root, struct inode *inode)
3899{
3900 struct btrfs_inode_item *inode_item;
3901 struct btrfs_path *path;
3902 struct extent_buffer *leaf;
3903 int ret;
3904
3905 path = btrfs_alloc_path();
3906 if (!path)
3907 return -ENOMEM;
3908
3909 path->leave_spinning = 1;
3910 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3911 1);
3912 if (ret) {
3913 if (ret > 0)
3914 ret = -ENOENT;
3915 goto failed;
3916 }
3917
3918 leaf = path->nodes[0];
3919 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3920 struct btrfs_inode_item);
3921
3922 fill_inode_item(trans, leaf, inode_item, inode);
3923 btrfs_mark_buffer_dirty(leaf);
3924 btrfs_set_inode_last_trans(trans, inode);
3925 ret = 0;
3926failed:
3927 btrfs_free_path(path);
3928 return ret;
3929}
3930
3931
3932
3933
3934noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3935 struct btrfs_root *root, struct inode *inode)
3936{
3937 struct btrfs_fs_info *fs_info = root->fs_info;
3938 int ret;
3939
3940
3941
3942
3943
3944
3945
3946
3947 if (!btrfs_is_free_space_inode(BTRFS_I(inode))
3948 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3949 && !test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
3950 btrfs_update_root_times(trans, root);
3951
3952 ret = btrfs_delayed_update_inode(trans, root, inode);
3953 if (!ret)
3954 btrfs_set_inode_last_trans(trans, inode);
3955 return ret;
3956 }
3957
3958 return btrfs_update_inode_item(trans, root, inode);
3959}
3960
3961noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3962 struct btrfs_root *root,
3963 struct inode *inode)
3964{
3965 int ret;
3966
3967 ret = btrfs_update_inode(trans, root, inode);
3968 if (ret == -ENOSPC)
3969 return btrfs_update_inode_item(trans, root, inode);
3970 return ret;
3971}
3972
3973
3974
3975
3976
3977
3978static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3979 struct btrfs_root *root,
3980 struct btrfs_inode *dir,
3981 struct btrfs_inode *inode,
3982 const char *name, int name_len)
3983{
3984 struct btrfs_fs_info *fs_info = root->fs_info;
3985 struct btrfs_path *path;
3986 int ret = 0;
3987 struct btrfs_dir_item *di;
3988 u64 index;
3989 u64 ino = btrfs_ino(inode);
3990 u64 dir_ino = btrfs_ino(dir);
3991
3992 path = btrfs_alloc_path();
3993 if (!path) {
3994 ret = -ENOMEM;
3995 goto out;
3996 }
3997
3998 path->leave_spinning = 1;
3999 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4000 name, name_len, -1);
4001 if (IS_ERR_OR_NULL(di)) {
4002 ret = di ? PTR_ERR(di) : -ENOENT;
4003 goto err;
4004 }
4005 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4006 if (ret)
4007 goto err;
4008 btrfs_release_path(path);
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020 if (inode->dir_index) {
4021 ret = btrfs_delayed_delete_inode_ref(inode);
4022 if (!ret) {
4023 index = inode->dir_index;
4024 goto skip_backref;
4025 }
4026 }
4027
4028 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
4029 dir_ino, &index);
4030 if (ret) {
4031 btrfs_info(fs_info,
4032 "failed to delete reference to %.*s, inode %llu parent %llu",
4033 name_len, name, ino, dir_ino);
4034 btrfs_abort_transaction(trans, ret);
4035 goto err;
4036 }
4037skip_backref:
4038 ret = btrfs_delete_delayed_dir_index(trans, dir, index);
4039 if (ret) {
4040 btrfs_abort_transaction(trans, ret);
4041 goto err;
4042 }
4043
4044 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
4045 dir_ino);
4046 if (ret != 0 && ret != -ENOENT) {
4047 btrfs_abort_transaction(trans, ret);
4048 goto err;
4049 }
4050
4051 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
4052 index);
4053 if (ret == -ENOENT)
4054 ret = 0;
4055 else if (ret)
4056 btrfs_abort_transaction(trans, ret);
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067 btrfs_run_delayed_iput(fs_info, inode);
4068err:
4069 btrfs_free_path(path);
4070 if (ret)
4071 goto out;
4072
4073 btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
4074 inode_inc_iversion(&inode->vfs_inode);
4075 inode_inc_iversion(&dir->vfs_inode);
4076 inode->vfs_inode.i_ctime = dir->vfs_inode.i_mtime =
4077 dir->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
4078 ret = btrfs_update_inode(trans, root, &dir->vfs_inode);
4079out:
4080 return ret;
4081}
4082
4083int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4084 struct btrfs_root *root,
4085 struct btrfs_inode *dir, struct btrfs_inode *inode,
4086 const char *name, int name_len)
4087{
4088 int ret;
4089 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
4090 if (!ret) {
4091 drop_nlink(&inode->vfs_inode);
4092 ret = btrfs_update_inode(trans, root, &inode->vfs_inode);
4093 }
4094 return ret;
4095}
4096
4097
4098
4099
4100
4101
4102
4103
4104
4105static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4106{
4107 struct btrfs_root *root = BTRFS_I(dir)->root;
4108
4109
4110
4111
4112
4113
4114
4115
4116 return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
4117}
4118
4119static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4120{
4121 struct btrfs_root *root = BTRFS_I(dir)->root;
4122 struct btrfs_trans_handle *trans;
4123 struct inode *inode = d_inode(dentry);
4124 int ret;
4125
4126 trans = __unlink_start_trans(dir);
4127 if (IS_ERR(trans))
4128 return PTR_ERR(trans);
4129
4130 btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
4131 0);
4132
4133 ret = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4134 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4135 dentry->d_name.len);
4136 if (ret)
4137 goto out;
4138
4139 if (inode->i_nlink == 0) {
4140 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
4141 if (ret)
4142 goto out;
4143 }
4144
4145out:
4146 btrfs_end_transaction(trans);
4147 btrfs_btree_balance_dirty(root->fs_info);
4148 return ret;
4149}
4150
4151static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4152 struct inode *dir, u64 objectid,
4153 const char *name, int name_len)
4154{
4155 struct btrfs_root *root = BTRFS_I(dir)->root;
4156 struct btrfs_path *path;
4157 struct extent_buffer *leaf;
4158 struct btrfs_dir_item *di;
4159 struct btrfs_key key;
4160 u64 index;
4161 int ret;
4162 u64 dir_ino = btrfs_ino(BTRFS_I(dir));
4163
4164 path = btrfs_alloc_path();
4165 if (!path)
4166 return -ENOMEM;
4167
4168 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4169 name, name_len, -1);
4170 if (IS_ERR_OR_NULL(di)) {
4171 ret = di ? PTR_ERR(di) : -ENOENT;
4172 goto out;
4173 }
4174
4175 leaf = path->nodes[0];
4176 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4177 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4178 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4179 if (ret) {
4180 btrfs_abort_transaction(trans, ret);
4181 goto out;
4182 }
4183 btrfs_release_path(path);
4184
4185 ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
4186 dir_ino, &index, name, name_len);
4187 if (ret < 0) {
4188 if (ret != -ENOENT) {
4189 btrfs_abort_transaction(trans, ret);
4190 goto out;
4191 }
4192 di = btrfs_search_dir_index_item(root, path, dir_ino,
4193 name, name_len);
4194 if (IS_ERR_OR_NULL(di)) {
4195 if (!di)
4196 ret = -ENOENT;
4197 else
4198 ret = PTR_ERR(di);
4199 btrfs_abort_transaction(trans, ret);
4200 goto out;
4201 }
4202
4203 leaf = path->nodes[0];
4204 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4205 index = key.offset;
4206 }
4207 btrfs_release_path(path);
4208
4209 ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
4210 if (ret) {
4211 btrfs_abort_transaction(trans, ret);
4212 goto out;
4213 }
4214
4215 btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
4216 inode_inc_iversion(dir);
4217 dir->i_mtime = dir->i_ctime = current_time(dir);
4218 ret = btrfs_update_inode_fallback(trans, root, dir);
4219 if (ret)
4220 btrfs_abort_transaction(trans, ret);
4221out:
4222 btrfs_free_path(path);
4223 return ret;
4224}
4225
4226
4227
4228
4229
4230static noinline int may_destroy_subvol(struct btrfs_root *root)
4231{
4232 struct btrfs_fs_info *fs_info = root->fs_info;
4233 struct btrfs_path *path;
4234 struct btrfs_dir_item *di;
4235 struct btrfs_key key;
4236 u64 dir_id;
4237 int ret;
4238
4239 path = btrfs_alloc_path();
4240 if (!path)
4241 return -ENOMEM;
4242
4243
4244 dir_id = btrfs_super_root_dir(fs_info->super_copy);
4245 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
4246 dir_id, "default", 7, 0);
4247 if (di && !IS_ERR(di)) {
4248 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
4249 if (key.objectid == root->root_key.objectid) {
4250 ret = -EPERM;
4251 btrfs_err(fs_info,
4252 "deleting default subvolume %llu is not allowed",
4253 key.objectid);
4254 goto out;
4255 }
4256 btrfs_release_path(path);
4257 }
4258
4259 key.objectid = root->root_key.objectid;
4260 key.type = BTRFS_ROOT_REF_KEY;
4261 key.offset = (u64)-1;
4262
4263 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
4264 if (ret < 0)
4265 goto out;
4266 BUG_ON(ret == 0);
4267
4268 ret = 0;
4269 if (path->slots[0] > 0) {
4270 path->slots[0]--;
4271 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
4272 if (key.objectid == root->root_key.objectid &&
4273 key.type == BTRFS_ROOT_REF_KEY)
4274 ret = -ENOTEMPTY;
4275 }
4276out:
4277 btrfs_free_path(path);
4278 return ret;
4279}
4280
4281
4282static void btrfs_prune_dentries(struct btrfs_root *root)
4283{
4284 struct btrfs_fs_info *fs_info = root->fs_info;
4285 struct rb_node *node;
4286 struct rb_node *prev;
4287 struct btrfs_inode *entry;
4288 struct inode *inode;
4289 u64 objectid = 0;
4290
4291 if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
4292 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
4293
4294 spin_lock(&root->inode_lock);
4295again:
4296 node = root->inode_tree.rb_node;
4297 prev = NULL;
4298 while (node) {
4299 prev = node;
4300 entry = rb_entry(node, struct btrfs_inode, rb_node);
4301
4302 if (objectid < btrfs_ino(entry))
4303 node = node->rb_left;
4304 else if (objectid > btrfs_ino(entry))
4305 node = node->rb_right;
4306 else
4307 break;
4308 }
4309 if (!node) {
4310 while (prev) {
4311 entry = rb_entry(prev, struct btrfs_inode, rb_node);
4312 if (objectid <= btrfs_ino(entry)) {
4313 node = prev;
4314 break;
4315 }
4316 prev = rb_next(prev);
4317 }
4318 }
4319 while (node) {
4320 entry = rb_entry(node, struct btrfs_inode, rb_node);
4321 objectid = btrfs_ino(entry) + 1;
4322 inode = igrab(&entry->vfs_inode);
4323 if (inode) {
4324 spin_unlock(&root->inode_lock);
4325 if (atomic_read(&inode->i_count) > 1)
4326 d_prune_aliases(inode);
4327
4328
4329
4330
4331 iput(inode);
4332 cond_resched();
4333 spin_lock(&root->inode_lock);
4334 goto again;
4335 }
4336
4337 if (cond_resched_lock(&root->inode_lock))
4338 goto again;
4339
4340 node = rb_next(node);
4341 }
4342 spin_unlock(&root->inode_lock);
4343}
4344
4345int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
4346{
4347 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
4348 struct btrfs_root *root = BTRFS_I(dir)->root;
4349 struct inode *inode = d_inode(dentry);
4350 struct btrfs_root *dest = BTRFS_I(inode)->root;
4351 struct btrfs_trans_handle *trans;
4352 struct btrfs_block_rsv block_rsv;
4353 u64 root_flags;
4354 int ret;
4355 int err;
4356
4357
4358
4359
4360
4361
4362 spin_lock(&dest->root_item_lock);
4363 if (dest->send_in_progress) {
4364 spin_unlock(&dest->root_item_lock);
4365 btrfs_warn(fs_info,
4366 "attempt to delete subvolume %llu during send",
4367 dest->root_key.objectid);
4368 return -EPERM;
4369 }
4370 root_flags = btrfs_root_flags(&dest->root_item);
4371 btrfs_set_root_flags(&dest->root_item,
4372 root_flags | BTRFS_ROOT_SUBVOL_DEAD);
4373 spin_unlock(&dest->root_item_lock);
4374
4375 down_write(&fs_info->subvol_sem);
4376
4377 err = may_destroy_subvol(dest);
4378 if (err)
4379 goto out_up_write;
4380
4381 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
4382
4383
4384
4385
4386
4387 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
4388 if (err)
4389 goto out_up_write;
4390
4391 trans = btrfs_start_transaction(root, 0);
4392 if (IS_ERR(trans)) {
4393 err = PTR_ERR(trans);
4394 goto out_release;
4395 }
4396 trans->block_rsv = &block_rsv;
4397 trans->bytes_reserved = block_rsv.size;
4398
4399 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4400
4401 ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
4402 dentry->d_name.name, dentry->d_name.len);
4403 if (ret) {
4404 err = ret;
4405 btrfs_abort_transaction(trans, ret);
4406 goto out_end_trans;
4407 }
4408
4409 btrfs_record_root_in_trans(trans, dest);
4410
4411 memset(&dest->root_item.drop_progress, 0,
4412 sizeof(dest->root_item.drop_progress));
4413 dest->root_item.drop_level = 0;
4414 btrfs_set_root_refs(&dest->root_item, 0);
4415
4416 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {
4417 ret = btrfs_insert_orphan_item(trans,
4418 fs_info->tree_root,
4419 dest->root_key.objectid);
4420 if (ret) {
4421 btrfs_abort_transaction(trans, ret);
4422 err = ret;
4423 goto out_end_trans;
4424 }
4425 }
4426
4427 ret = btrfs_uuid_tree_remove(trans, dest->root_item.uuid,
4428 BTRFS_UUID_KEY_SUBVOL,
4429 dest->root_key.objectid);
4430 if (ret && ret != -ENOENT) {
4431 btrfs_abort_transaction(trans, ret);
4432 err = ret;
4433 goto out_end_trans;
4434 }
4435 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
4436 ret = btrfs_uuid_tree_remove(trans,
4437 dest->root_item.received_uuid,
4438 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4439 dest->root_key.objectid);
4440 if (ret && ret != -ENOENT) {
4441 btrfs_abort_transaction(trans, ret);
4442 err = ret;
4443 goto out_end_trans;
4444 }
4445 }
4446
4447out_end_trans:
4448 trans->block_rsv = NULL;
4449 trans->bytes_reserved = 0;
4450 ret = btrfs_end_transaction(trans);
4451 if (ret && !err)
4452 err = ret;
4453 inode->i_flags |= S_DEAD;
4454out_release:
4455 btrfs_subvolume_release_metadata(fs_info, &block_rsv);
4456out_up_write:
4457 up_write(&fs_info->subvol_sem);
4458 if (err) {
4459 spin_lock(&dest->root_item_lock);
4460 root_flags = btrfs_root_flags(&dest->root_item);
4461 btrfs_set_root_flags(&dest->root_item,
4462 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
4463 spin_unlock(&dest->root_item_lock);
4464 } else {
4465 d_invalidate(dentry);
4466 btrfs_prune_dentries(dest);
4467 ASSERT(dest->send_in_progress == 0);
4468
4469
4470 if (dest->ino_cache_inode) {
4471 iput(dest->ino_cache_inode);
4472 dest->ino_cache_inode = NULL;
4473 }
4474 }
4475
4476 return err;
4477}
4478
4479static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4480{
4481 struct inode *inode = d_inode(dentry);
4482 int err = 0;
4483 struct btrfs_root *root = BTRFS_I(dir)->root;
4484 struct btrfs_trans_handle *trans;
4485 u64 last_unlink_trans;
4486
4487 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4488 return -ENOTEMPTY;
4489 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_FIRST_FREE_OBJECTID)
4490 return btrfs_delete_subvolume(dir, dentry);
4491
4492 trans = __unlink_start_trans(dir);
4493 if (IS_ERR(trans))
4494 return PTR_ERR(trans);
4495
4496 if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4497 err = btrfs_unlink_subvol(trans, dir,
4498 BTRFS_I(inode)->location.objectid,
4499 dentry->d_name.name,
4500 dentry->d_name.len);
4501 goto out;
4502 }
4503
4504 err = btrfs_orphan_add(trans, BTRFS_I(inode));
4505 if (err)
4506 goto out;
4507
4508 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4509
4510
4511 err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
4512 BTRFS_I(d_inode(dentry)), dentry->d_name.name,
4513 dentry->d_name.len);
4514 if (!err) {
4515 btrfs_i_size_write(BTRFS_I(inode), 0);
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527 if (last_unlink_trans >= trans->transid)
4528 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4529 }
4530out:
4531 btrfs_end_transaction(trans);
4532 btrfs_btree_balance_dirty(root->fs_info);
4533
4534 return err;
4535}
4536
4537
4538
4539
4540
4541#define NEED_TRUNCATE_BLOCK 1
4542
4543
4544
4545
4546
4547
4548
4549
4550
4551
4552
4553
4554int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4555 struct btrfs_root *root,
4556 struct inode *inode,
4557 u64 new_size, u32 min_type)
4558{
4559 struct btrfs_fs_info *fs_info = root->fs_info;
4560 struct btrfs_path *path;
4561 struct extent_buffer *leaf;
4562 struct btrfs_file_extent_item *fi;
4563 struct btrfs_key key;
4564 struct btrfs_key found_key;
4565 u64 extent_start = 0;
4566 u64 extent_num_bytes = 0;
4567 u64 extent_offset = 0;
4568 u64 item_end = 0;
4569 u64 last_size = new_size;
4570 u32 found_type = (u8)-1;
4571 int found_extent;
4572 int del_item;
4573 int pending_del_nr = 0;
4574 int pending_del_slot = 0;
4575 int extent_type = -1;
4576 int ret;
4577 u64 ino = btrfs_ino(BTRFS_I(inode));
4578 u64 bytes_deleted = 0;
4579 bool be_nice = false;
4580 bool should_throttle = false;
4581
4582 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4583
4584
4585
4586
4587
4588 if (!btrfs_is_free_space_inode(BTRFS_I(inode)) &&
4589 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4590 be_nice = true;
4591
4592 path = btrfs_alloc_path();
4593 if (!path)
4594 return -ENOMEM;
4595 path->reada = READA_BACK;
4596
4597
4598
4599
4600
4601
4602 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4603 root == fs_info->tree_root)
4604 btrfs_drop_extent_cache(BTRFS_I(inode), ALIGN(new_size,
4605 fs_info->sectorsize),
4606 (u64)-1, 0);
4607
4608
4609
4610
4611
4612
4613
4614 if (min_type == 0 && root == BTRFS_I(inode)->root)
4615 btrfs_kill_delayed_inode_items(BTRFS_I(inode));
4616
4617 key.objectid = ino;
4618 key.offset = (u64)-1;
4619 key.type = (u8)-1;
4620
4621search_again:
4622
4623
4624
4625
4626
4627 if (be_nice && bytes_deleted > SZ_32M &&
4628 btrfs_should_end_transaction(trans)) {
4629 ret = -EAGAIN;
4630 goto out;
4631 }
4632
4633 path->leave_spinning = 1;
4634 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4635 if (ret < 0)
4636 goto out;
4637
4638 if (ret > 0) {
4639 ret = 0;
4640
4641
4642
4643 if (path->slots[0] == 0)
4644 goto out;
4645 path->slots[0]--;
4646 }
4647
4648 while (1) {
4649 fi = NULL;
4650 leaf = path->nodes[0];
4651 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4652 found_type = found_key.type;
4653
4654 if (found_key.objectid != ino)
4655 break;
4656
4657 if (found_type < min_type)
4658 break;
4659
4660 item_end = found_key.offset;
4661 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4662 fi = btrfs_item_ptr(leaf, path->slots[0],
4663 struct btrfs_file_extent_item);
4664 extent_type = btrfs_file_extent_type(leaf, fi);
4665 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4666 item_end +=
4667 btrfs_file_extent_num_bytes(leaf, fi);
4668
4669 trace_btrfs_truncate_show_fi_regular(
4670 BTRFS_I(inode), leaf, fi,
4671 found_key.offset);
4672 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4673 item_end += btrfs_file_extent_ram_bytes(leaf,
4674 fi);
4675
4676 trace_btrfs_truncate_show_fi_inline(
4677 BTRFS_I(inode), leaf, fi, path->slots[0],
4678 found_key.offset);
4679 }
4680 item_end--;
4681 }
4682 if (found_type > min_type) {
4683 del_item = 1;
4684 } else {
4685 if (item_end < new_size)
4686 break;
4687 if (found_key.offset >= new_size)
4688 del_item = 1;
4689 else
4690 del_item = 0;
4691 }
4692 found_extent = 0;
4693
4694 if (found_type != BTRFS_EXTENT_DATA_KEY)
4695 goto delete;
4696
4697 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4698 u64 num_dec;
4699 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4700 if (!del_item) {
4701 u64 orig_num_bytes =
4702 btrfs_file_extent_num_bytes(leaf, fi);
4703 extent_num_bytes = ALIGN(new_size -
4704 found_key.offset,
4705 fs_info->sectorsize);
4706 btrfs_set_file_extent_num_bytes(leaf, fi,
4707 extent_num_bytes);
4708 num_dec = (orig_num_bytes -
4709 extent_num_bytes);
4710 if (test_bit(BTRFS_ROOT_REF_COWS,
4711 &root->state) &&
4712 extent_start != 0)
4713 inode_sub_bytes(inode, num_dec);
4714 btrfs_mark_buffer_dirty(leaf);
4715 } else {
4716 extent_num_bytes =
4717 btrfs_file_extent_disk_num_bytes(leaf,
4718 fi);
4719 extent_offset = found_key.offset -
4720 btrfs_file_extent_offset(leaf, fi);
4721
4722
4723 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4724 if (extent_start != 0) {
4725 found_extent = 1;
4726 if (test_bit(BTRFS_ROOT_REF_COWS,
4727 &root->state))
4728 inode_sub_bytes(inode, num_dec);
4729 }
4730 }
4731 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4732
4733
4734
4735
4736 if (!del_item &&
4737 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4738 btrfs_file_extent_other_encoding(leaf, fi) == 0 &&
4739 btrfs_file_extent_compression(leaf, fi) == 0) {
4740 u32 size = (u32)(new_size - found_key.offset);
4741
4742 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4743 size = btrfs_file_extent_calc_inline_size(size);
4744 btrfs_truncate_item(path, size, 1);
4745 } else if (!del_item) {
4746
4747
4748
4749
4750 ret = NEED_TRUNCATE_BLOCK;
4751 break;
4752 }
4753
4754 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4755 inode_sub_bytes(inode, item_end + 1 - new_size);
4756 }
4757delete:
4758 if (del_item)
4759 last_size = found_key.offset;
4760 else
4761 last_size = new_size;
4762 if (del_item) {
4763 if (!pending_del_nr) {
4764
4765 pending_del_slot = path->slots[0];
4766 pending_del_nr = 1;
4767 } else if (pending_del_nr &&
4768 path->slots[0] + 1 == pending_del_slot) {
4769
4770 pending_del_nr++;
4771 pending_del_slot = path->slots[0];
4772 } else {
4773 BUG();
4774 }
4775 } else {
4776 break;
4777 }
4778 should_throttle = false;
4779
4780 if (found_extent &&
4781 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4782 root == fs_info->tree_root)) {
4783 struct btrfs_ref ref = { 0 };
4784
4785 btrfs_set_path_blocking(path);
4786 bytes_deleted += extent_num_bytes;
4787
4788 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF,
4789 extent_start, extent_num_bytes, 0);
4790 ref.real_root = root->root_key.objectid;
4791 btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
4792 ino, extent_offset);
4793 ret = btrfs_free_extent(trans, &ref);
4794 if (ret) {
4795 btrfs_abort_transaction(trans, ret);
4796 break;
4797 }
4798 if (be_nice) {
4799 if (btrfs_should_throttle_delayed_refs(trans))
4800 should_throttle = true;
4801 }
4802 }
4803
4804 if (found_type == BTRFS_INODE_ITEM_KEY)
4805 break;
4806
4807 if (path->slots[0] == 0 ||
4808 path->slots[0] != pending_del_slot ||
4809 should_throttle) {
4810 if (pending_del_nr) {
4811 ret = btrfs_del_items(trans, root, path,
4812 pending_del_slot,
4813 pending_del_nr);
4814 if (ret) {
4815 btrfs_abort_transaction(trans, ret);
4816 break;
4817 }
4818 pending_del_nr = 0;
4819 }
4820 btrfs_release_path(path);
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832 if (should_throttle) {
4833 ret = btrfs_delayed_refs_rsv_refill(fs_info,
4834 BTRFS_RESERVE_NO_FLUSH);
4835 if (ret) {
4836 ret = -EAGAIN;
4837 break;
4838 }
4839 }
4840 goto search_again;
4841 } else {
4842 path->slots[0]--;
4843 }
4844 }
4845out:
4846 if (ret >= 0 && pending_del_nr) {
4847 int err;
4848
4849 err = btrfs_del_items(trans, root, path, pending_del_slot,
4850 pending_del_nr);
4851 if (err) {
4852 btrfs_abort_transaction(trans, err);
4853 ret = err;
4854 }
4855 }
4856 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4857 ASSERT(last_size >= new_size);
4858 if (!ret && last_size > new_size)
4859 last_size = new_size;
4860 btrfs_ordered_update_i_size(inode, last_size, NULL);
4861 }
4862
4863 btrfs_free_path(path);
4864 return ret;
4865}
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4879 int front)
4880{
4881 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4882 struct address_space *mapping = inode->i_mapping;
4883 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4884 struct btrfs_ordered_extent *ordered;
4885 struct extent_state *cached_state = NULL;
4886 struct extent_changeset *data_reserved = NULL;
4887 char *kaddr;
4888 u32 blocksize = fs_info->sectorsize;
4889 pgoff_t index = from >> PAGE_SHIFT;
4890 unsigned offset = from & (blocksize - 1);
4891 struct page *page;
4892 gfp_t mask = btrfs_alloc_write_mask(mapping);
4893 int ret = 0;
4894 u64 block_start;
4895 u64 block_end;
4896
4897 if (IS_ALIGNED(offset, blocksize) &&
4898 (!len || IS_ALIGNED(len, blocksize)))
4899 goto out;
4900
4901 block_start = round_down(from, blocksize);
4902 block_end = block_start + blocksize - 1;
4903
4904 ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
4905 block_start, blocksize);
4906 if (ret)
4907 goto out;
4908
4909again:
4910 page = find_or_create_page(mapping, index, mask);
4911 if (!page) {
4912 btrfs_delalloc_release_space(inode, data_reserved,
4913 block_start, blocksize, true);
4914 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
4915 ret = -ENOMEM;
4916 goto out;
4917 }
4918
4919 if (!PageUptodate(page)) {
4920 ret = btrfs_readpage(NULL, page);
4921 lock_page(page);
4922 if (page->mapping != mapping) {
4923 unlock_page(page);
4924 put_page(page);
4925 goto again;
4926 }
4927 if (!PageUptodate(page)) {
4928 ret = -EIO;
4929 goto out_unlock;
4930 }
4931 }
4932 wait_on_page_writeback(page);
4933
4934 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4935 set_page_extent_mapped(page);
4936
4937 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4938 if (ordered) {
4939 unlock_extent_cached(io_tree, block_start, block_end,
4940 &cached_state);
4941 unlock_page(page);
4942 put_page(page);
4943 btrfs_start_ordered_extent(inode, ordered, 1);
4944 btrfs_put_ordered_extent(ordered);
4945 goto again;
4946 }
4947
4948 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4949 EXTENT_DIRTY | EXTENT_DELALLOC |
4950 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4951 0, 0, &cached_state);
4952
4953 ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
4954 &cached_state, 0);
4955 if (ret) {
4956 unlock_extent_cached(io_tree, block_start, block_end,
4957 &cached_state);
4958 goto out_unlock;
4959 }
4960
4961 if (offset != blocksize) {
4962 if (!len)
4963 len = blocksize - offset;
4964 kaddr = kmap(page);
4965 if (front)
4966 memset(kaddr + (block_start - page_offset(page)),
4967 0, offset);
4968 else
4969 memset(kaddr + (block_start - page_offset(page)) + offset,
4970 0, len);
4971 flush_dcache_page(page);
4972 kunmap(page);
4973 }
4974 ClearPageChecked(page);
4975 set_page_dirty(page);
4976 unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
4977
4978out_unlock:
4979 if (ret)
4980 btrfs_delalloc_release_space(inode, data_reserved, block_start,
4981 blocksize, true);
4982 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
4983 unlock_page(page);
4984 put_page(page);
4985out:
4986 extent_changeset_free(data_reserved);
4987 return ret;
4988}
4989
4990static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4991 u64 offset, u64 len)
4992{
4993 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
4994 struct btrfs_trans_handle *trans;
4995 int ret;
4996
4997
4998
4999
5000
5001 if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
5002 BTRFS_I(inode)->last_trans = fs_info->generation;
5003 BTRFS_I(inode)->last_sub_trans = root->log_transid;
5004 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
5005 return 0;
5006 }
5007
5008
5009
5010
5011
5012
5013 trans = btrfs_start_transaction(root, 3);
5014 if (IS_ERR(trans))
5015 return PTR_ERR(trans);
5016
5017 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
5018 if (ret) {
5019 btrfs_abort_transaction(trans, ret);
5020 btrfs_end_transaction(trans);
5021 return ret;
5022 }
5023
5024 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(BTRFS_I(inode)),
5025 offset, 0, 0, len, 0, len, 0, 0, 0);
5026 if (ret)
5027 btrfs_abort_transaction(trans, ret);
5028 else
5029 btrfs_update_inode(trans, root, inode);
5030 btrfs_end_transaction(trans);
5031 return ret;
5032}
5033
5034
5035
5036
5037
5038
5039
5040int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
5041{
5042 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5043 struct btrfs_root *root = BTRFS_I(inode)->root;
5044 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5045 struct extent_map *em = NULL;
5046 struct extent_state *cached_state = NULL;
5047 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5048 u64 hole_start = ALIGN(oldsize, fs_info->sectorsize);
5049 u64 block_end = ALIGN(size, fs_info->sectorsize);
5050 u64 last_byte;
5051 u64 cur_offset;
5052 u64 hole_size;
5053 int err = 0;
5054
5055
5056
5057
5058
5059
5060 err = btrfs_truncate_block(inode, oldsize, 0, 0);
5061 if (err)
5062 return err;
5063
5064 if (size <= hole_start)
5065 return 0;
5066
5067 btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start,
5068 block_end - 1, &cached_state);
5069 cur_offset = hole_start;
5070 while (1) {
5071 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
5072 block_end - cur_offset, 0);
5073 if (IS_ERR(em)) {
5074 err = PTR_ERR(em);
5075 em = NULL;
5076 break;
5077 }
5078 last_byte = min(extent_map_end(em), block_end);
5079 last_byte = ALIGN(last_byte, fs_info->sectorsize);
5080 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
5081 struct extent_map *hole_em;
5082 hole_size = last_byte - cur_offset;
5083
5084 err = maybe_insert_hole(root, inode, cur_offset,
5085 hole_size);
5086 if (err)
5087 break;
5088 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
5089 cur_offset + hole_size - 1, 0);
5090 hole_em = alloc_extent_map();
5091 if (!hole_em) {
5092 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
5093 &BTRFS_I(inode)->runtime_flags);
5094 goto next;
5095 }
5096 hole_em->start = cur_offset;
5097 hole_em->len = hole_size;
5098 hole_em->orig_start = cur_offset;
5099
5100 hole_em->block_start = EXTENT_MAP_HOLE;
5101 hole_em->block_len = 0;
5102 hole_em->orig_block_len = 0;
5103 hole_em->ram_bytes = hole_size;
5104 hole_em->bdev = fs_info->fs_devices->latest_bdev;
5105 hole_em->compress_type = BTRFS_COMPRESS_NONE;
5106 hole_em->generation = fs_info->generation;
5107
5108 while (1) {
5109 write_lock(&em_tree->lock);
5110 err = add_extent_mapping(em_tree, hole_em, 1);
5111 write_unlock(&em_tree->lock);
5112 if (err != -EEXIST)
5113 break;
5114 btrfs_drop_extent_cache(BTRFS_I(inode),
5115 cur_offset,
5116 cur_offset +
5117 hole_size - 1, 0);
5118 }
5119 free_extent_map(hole_em);
5120 }
5121next:
5122 free_extent_map(em);
5123 em = NULL;
5124 cur_offset = last_byte;
5125 if (cur_offset >= block_end)
5126 break;
5127 }
5128 free_extent_map(em);
5129 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state);
5130 return err;
5131}
5132
5133static int btrfs_setsize(struct inode *inode, struct iattr *attr)
5134{
5135 struct btrfs_root *root = BTRFS_I(inode)->root;
5136 struct btrfs_trans_handle *trans;
5137 loff_t oldsize = i_size_read(inode);
5138 loff_t newsize = attr->ia_size;
5139 int mask = attr->ia_valid;
5140 int ret;
5141
5142
5143
5144
5145
5146
5147
5148 if (newsize != oldsize) {
5149 inode_inc_iversion(inode);
5150 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
5151 inode->i_ctime = inode->i_mtime =
5152 current_time(inode);
5153 }
5154
5155 if (newsize > oldsize) {
5156
5157
5158
5159
5160
5161
5162
5163 btrfs_wait_for_snapshot_creation(root);
5164 ret = btrfs_cont_expand(inode, oldsize, newsize);
5165 if (ret) {
5166 btrfs_end_write_no_snapshotting(root);
5167 return ret;
5168 }
5169
5170 trans = btrfs_start_transaction(root, 1);
5171 if (IS_ERR(trans)) {
5172 btrfs_end_write_no_snapshotting(root);
5173 return PTR_ERR(trans);
5174 }
5175
5176 i_size_write(inode, newsize);
5177 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
5178 pagecache_isize_extended(inode, oldsize, newsize);
5179 ret = btrfs_update_inode(trans, root, inode);
5180 btrfs_end_write_no_snapshotting(root);
5181 btrfs_end_transaction(trans);
5182 } else {
5183
5184
5185
5186
5187
5188
5189 if (newsize == 0)
5190 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
5191 &BTRFS_I(inode)->runtime_flags);
5192
5193 truncate_setsize(inode, newsize);
5194
5195
5196 btrfs_inode_block_unlocked_dio(BTRFS_I(inode));
5197 inode_dio_wait(inode);
5198 btrfs_inode_resume_unlocked_dio(BTRFS_I(inode));
5199
5200 ret = btrfs_truncate(inode, newsize == oldsize);
5201 if (ret && inode->i_nlink) {
5202 int err;
5203
5204
5205
5206
5207
5208
5209
5210 err = btrfs_wait_ordered_range(inode, 0, (u64)-1);
5211 if (err)
5212 return err;
5213 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5214 }
5215 }
5216
5217 return ret;
5218}
5219
5220static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
5221{
5222 struct inode *inode = d_inode(dentry);
5223 struct btrfs_root *root = BTRFS_I(inode)->root;
5224 int err;
5225
5226 if (btrfs_root_readonly(root))
5227 return -EROFS;
5228
5229 err = setattr_prepare(dentry, attr);
5230 if (err)
5231 return err;
5232
5233 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5234 err = btrfs_setsize(inode, attr);
5235 if (err)
5236 return err;
5237 }
5238
5239 if (attr->ia_valid) {
5240 setattr_copy(inode, attr);
5241 inode_inc_iversion(inode);
5242 err = btrfs_dirty_inode(inode);
5243
5244 if (!err && attr->ia_valid & ATTR_MODE)
5245 err = posix_acl_chmod(inode, inode->i_mode);
5246 }
5247
5248 return err;
5249}
5250
5251
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262
5263static void evict_inode_truncate_pages(struct inode *inode)
5264{
5265 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5266 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5267 struct rb_node *node;
5268
5269 ASSERT(inode->i_state & I_FREEING);
5270 truncate_inode_pages_final(&inode->i_data);
5271
5272 write_lock(&map_tree->lock);
5273 while (!RB_EMPTY_ROOT(&map_tree->map.rb_root)) {
5274 struct extent_map *em;
5275
5276 node = rb_first_cached(&map_tree->map);
5277 em = rb_entry(node, struct extent_map, rb_node);
5278 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5279 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5280 remove_extent_mapping(map_tree, em);
5281 free_extent_map(em);
5282 if (need_resched()) {
5283 write_unlock(&map_tree->lock);
5284 cond_resched();
5285 write_lock(&map_tree->lock);
5286 }
5287 }
5288 write_unlock(&map_tree->lock);
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306 spin_lock(&io_tree->lock);
5307 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5308 struct extent_state *state;
5309 struct extent_state *cached_state = NULL;
5310 u64 start;
5311 u64 end;
5312 unsigned state_flags;
5313
5314 node = rb_first(&io_tree->state);
5315 state = rb_entry(node, struct extent_state, rb_node);
5316 start = state->start;
5317 end = state->end;
5318 state_flags = state->state;
5319 spin_unlock(&io_tree->lock);
5320
5321 lock_extent_bits(io_tree, start, end, &cached_state);
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331 if (state_flags & EXTENT_DELALLOC)
5332 btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
5333
5334 clear_extent_bit(io_tree, start, end,
5335 EXTENT_LOCKED | EXTENT_DIRTY |
5336 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
5337 EXTENT_DEFRAG, 1, 1, &cached_state);
5338
5339 cond_resched();
5340 spin_lock(&io_tree->lock);
5341 }
5342 spin_unlock(&io_tree->lock);
5343}
5344
5345static struct btrfs_trans_handle *evict_refill_and_join(struct btrfs_root *root,
5346 struct btrfs_block_rsv *rsv)
5347{
5348 struct btrfs_fs_info *fs_info = root->fs_info;
5349 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
5350 u64 delayed_refs_extra = btrfs_calc_trans_metadata_size(fs_info, 1);
5351 int failures = 0;
5352
5353 for (;;) {
5354 struct btrfs_trans_handle *trans;
5355 int ret;
5356
5357 ret = btrfs_block_rsv_refill(root, rsv,
5358 rsv->size + delayed_refs_extra,
5359 BTRFS_RESERVE_FLUSH_LIMIT);
5360
5361 if (ret && ++failures > 2) {
5362 btrfs_warn(fs_info,
5363 "could not allocate space for a delete; will truncate on mount");
5364 return ERR_PTR(-ENOSPC);
5365 }
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379 trans = btrfs_join_transaction(root);
5380 if (IS_ERR(trans) || !ret) {
5381 if (!IS_ERR(trans)) {
5382 trans->block_rsv = &fs_info->trans_block_rsv;
5383 trans->bytes_reserved = delayed_refs_extra;
5384 btrfs_block_rsv_migrate(rsv, trans->block_rsv,
5385 delayed_refs_extra, 1);
5386 }
5387 return trans;
5388 }
5389
5390
5391
5392
5393
5394 if (!btrfs_check_space_for_delayed_refs(fs_info) &&
5395 !btrfs_block_rsv_migrate(global_rsv, rsv, rsv->size, 0))
5396 return trans;
5397
5398
5399 ret = btrfs_commit_transaction(trans);
5400 if (ret)
5401 return ERR_PTR(ret);
5402 }
5403}
5404
5405void btrfs_evict_inode(struct inode *inode)
5406{
5407 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5408 struct btrfs_trans_handle *trans;
5409 struct btrfs_root *root = BTRFS_I(inode)->root;
5410 struct btrfs_block_rsv *rsv;
5411 int ret;
5412
5413 trace_btrfs_inode_evict(inode);
5414
5415 if (!root) {
5416 clear_inode(inode);
5417 return;
5418 }
5419
5420 evict_inode_truncate_pages(inode);
5421
5422 if (inode->i_nlink &&
5423 ((btrfs_root_refs(&root->root_item) != 0 &&
5424 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5425 btrfs_is_free_space_inode(BTRFS_I(inode))))
5426 goto no_delete;
5427
5428 if (is_bad_inode(inode))
5429 goto no_delete;
5430
5431 btrfs_free_io_failure_record(BTRFS_I(inode), 0, (u64)-1);
5432
5433 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
5434 goto no_delete;
5435
5436 if (inode->i_nlink > 0) {
5437 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5438 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5439 goto no_delete;
5440 }
5441
5442 ret = btrfs_commit_inode_delayed_inode(BTRFS_I(inode));
5443 if (ret)
5444 goto no_delete;
5445
5446 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
5447 if (!rsv)
5448 goto no_delete;
5449 rsv->size = btrfs_calc_trunc_metadata_size(fs_info, 1);
5450 rsv->failfast = 1;
5451
5452 btrfs_i_size_write(BTRFS_I(inode), 0);
5453
5454 while (1) {
5455 trans = evict_refill_and_join(root, rsv);
5456 if (IS_ERR(trans))
5457 goto free_rsv;
5458
5459 trans->block_rsv = rsv;
5460
5461 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5462 trans->block_rsv = &fs_info->trans_block_rsv;
5463 btrfs_end_transaction(trans);
5464 btrfs_btree_balance_dirty(fs_info);
5465 if (ret && ret != -ENOSPC && ret != -EAGAIN)
5466 goto free_rsv;
5467 else if (!ret)
5468 break;
5469 }
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480 trans = evict_refill_and_join(root, rsv);
5481 if (!IS_ERR(trans)) {
5482 trans->block_rsv = rsv;
5483 btrfs_orphan_del(trans, BTRFS_I(inode));
5484 trans->block_rsv = &fs_info->trans_block_rsv;
5485 btrfs_end_transaction(trans);
5486 }
5487
5488 if (!(root == fs_info->tree_root ||
5489 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
5490 btrfs_return_ino(root, btrfs_ino(BTRFS_I(inode)));
5491
5492free_rsv:
5493 btrfs_free_block_rsv(fs_info, rsv);
5494no_delete:
5495
5496
5497
5498
5499
5500 btrfs_remove_delayed_node(BTRFS_I(inode));
5501 clear_inode(inode);
5502}
5503
5504
5505
5506
5507
5508
5509
5510
5511static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5512 struct btrfs_key *location, u8 *type)
5513{
5514 const char *name = dentry->d_name.name;
5515 int namelen = dentry->d_name.len;
5516 struct btrfs_dir_item *di;
5517 struct btrfs_path *path;
5518 struct btrfs_root *root = BTRFS_I(dir)->root;
5519 int ret = 0;
5520
5521 path = btrfs_alloc_path();
5522 if (!path)
5523 return -ENOMEM;
5524
5525 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
5526 name, namelen, 0);
5527 if (IS_ERR_OR_NULL(di)) {
5528 ret = di ? PTR_ERR(di) : -ENOENT;
5529 goto out;
5530 }
5531
5532 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5533 if (location->type != BTRFS_INODE_ITEM_KEY &&
5534 location->type != BTRFS_ROOT_ITEM_KEY) {
5535 ret = -EUCLEAN;
5536 btrfs_warn(root->fs_info,
5537"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
5538 __func__, name, btrfs_ino(BTRFS_I(dir)),
5539 location->objectid, location->type, location->offset);
5540 }
5541 if (!ret)
5542 *type = btrfs_dir_type(path->nodes[0], di);
5543out:
5544 btrfs_free_path(path);
5545 return ret;
5546}
5547
5548
5549
5550
5551
5552
5553static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
5554 struct inode *dir,
5555 struct dentry *dentry,
5556 struct btrfs_key *location,
5557 struct btrfs_root **sub_root)
5558{
5559 struct btrfs_path *path;
5560 struct btrfs_root *new_root;
5561 struct btrfs_root_ref *ref;
5562 struct extent_buffer *leaf;
5563 struct btrfs_key key;
5564 int ret;
5565 int err = 0;
5566
5567 path = btrfs_alloc_path();
5568 if (!path) {
5569 err = -ENOMEM;
5570 goto out;
5571 }
5572
5573 err = -ENOENT;
5574 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5575 key.type = BTRFS_ROOT_REF_KEY;
5576 key.offset = location->objectid;
5577
5578 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
5579 if (ret) {
5580 if (ret < 0)
5581 err = ret;
5582 goto out;
5583 }
5584
5585 leaf = path->nodes[0];
5586 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5587 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
5588 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5589 goto out;
5590
5591 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5592 (unsigned long)(ref + 1),
5593 dentry->d_name.len);
5594 if (ret)
5595 goto out;
5596
5597 btrfs_release_path(path);
5598
5599 new_root = btrfs_read_fs_root_no_name(fs_info, location);
5600 if (IS_ERR(new_root)) {
5601 err = PTR_ERR(new_root);
5602 goto out;
5603 }
5604
5605 *sub_root = new_root;
5606 location->objectid = btrfs_root_dirid(&new_root->root_item);
5607 location->type = BTRFS_INODE_ITEM_KEY;
5608 location->offset = 0;
5609 err = 0;
5610out:
5611 btrfs_free_path(path);
5612 return err;
5613}
5614
5615static void inode_tree_add(struct inode *inode)
5616{
5617 struct btrfs_root *root = BTRFS_I(inode)->root;
5618 struct btrfs_inode *entry;
5619 struct rb_node **p;
5620 struct rb_node *parent;
5621 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5622 u64 ino = btrfs_ino(BTRFS_I(inode));
5623
5624 if (inode_unhashed(inode))
5625 return;
5626 parent = NULL;
5627 spin_lock(&root->inode_lock);
5628 p = &root->inode_tree.rb_node;
5629 while (*p) {
5630 parent = *p;
5631 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5632
5633 if (ino < btrfs_ino(entry))
5634 p = &parent->rb_left;
5635 else if (ino > btrfs_ino(entry))
5636 p = &parent->rb_right;
5637 else {
5638 WARN_ON(!(entry->vfs_inode.i_state &
5639 (I_WILL_FREE | I_FREEING)));
5640 rb_replace_node(parent, new, &root->inode_tree);
5641 RB_CLEAR_NODE(parent);
5642 spin_unlock(&root->inode_lock);
5643 return;
5644 }
5645 }
5646 rb_link_node(new, parent, p);
5647 rb_insert_color(new, &root->inode_tree);
5648 spin_unlock(&root->inode_lock);
5649}
5650
5651static void inode_tree_del(struct inode *inode)
5652{
5653 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
5654 struct btrfs_root *root = BTRFS_I(inode)->root;
5655 int empty = 0;
5656
5657 spin_lock(&root->inode_lock);
5658 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5659 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5660 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5661 empty = RB_EMPTY_ROOT(&root->inode_tree);
5662 }
5663 spin_unlock(&root->inode_lock);
5664
5665 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5666 synchronize_srcu(&fs_info->subvol_srcu);
5667 spin_lock(&root->inode_lock);
5668 empty = RB_EMPTY_ROOT(&root->inode_tree);
5669 spin_unlock(&root->inode_lock);
5670 if (empty)
5671 btrfs_add_dead_root(root);
5672 }
5673}
5674
5675
5676static int btrfs_init_locked_inode(struct inode *inode, void *p)
5677{
5678 struct btrfs_iget_args *args = p;
5679 inode->i_ino = args->location->objectid;
5680 memcpy(&BTRFS_I(inode)->location, args->location,
5681 sizeof(*args->location));
5682 BTRFS_I(inode)->root = args->root;
5683 return 0;
5684}
5685
5686static int btrfs_find_actor(struct inode *inode, void *opaque)
5687{
5688 struct btrfs_iget_args *args = opaque;
5689 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
5690 args->root == BTRFS_I(inode)->root;
5691}
5692
5693static struct inode *btrfs_iget_locked(struct super_block *s,
5694 struct btrfs_key *location,
5695 struct btrfs_root *root)
5696{
5697 struct inode *inode;
5698 struct btrfs_iget_args args;
5699 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
5700
5701 args.location = location;
5702 args.root = root;
5703
5704 inode = iget5_locked(s, hashval, btrfs_find_actor,
5705 btrfs_init_locked_inode,
5706 (void *)&args);
5707 return inode;
5708}
5709
5710
5711
5712
5713struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
5714 struct btrfs_root *root, int *new,
5715 struct btrfs_path *path)
5716{
5717 struct inode *inode;
5718
5719 inode = btrfs_iget_locked(s, location, root);
5720 if (!inode)
5721 return ERR_PTR(-ENOMEM);
5722
5723 if (inode->i_state & I_NEW) {
5724 int ret;
5725
5726 ret = btrfs_read_locked_inode(inode, path);
5727 if (!ret) {
5728 inode_tree_add(inode);
5729 unlock_new_inode(inode);
5730 if (new)
5731 *new = 1;
5732 } else {
5733 iget_failed(inode);
5734
5735
5736
5737
5738
5739 if (ret > 0)
5740 ret = -ENOENT;
5741 inode = ERR_PTR(ret);
5742 }
5743 }
5744
5745 return inode;
5746}
5747
5748struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5749 struct btrfs_root *root, int *new)
5750{
5751 return btrfs_iget_path(s, location, root, new, NULL);
5752}
5753
5754static struct inode *new_simple_dir(struct super_block *s,
5755 struct btrfs_key *key,
5756 struct btrfs_root *root)
5757{
5758 struct inode *inode = new_inode(s);
5759
5760 if (!inode)
5761 return ERR_PTR(-ENOMEM);
5762
5763 BTRFS_I(inode)->root = root;
5764 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5765 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5766
5767 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5768 inode->i_op = &btrfs_dir_ro_inode_operations;
5769 inode->i_opflags &= ~IOP_XATTR;
5770 inode->i_fop = &simple_dir_operations;
5771 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5772 inode->i_mtime = current_time(inode);
5773 inode->i_atime = inode->i_mtime;
5774 inode->i_ctime = inode->i_mtime;
5775 BTRFS_I(inode)->i_otime = inode->i_mtime;
5776
5777 return inode;
5778}
5779
5780static inline u8 btrfs_inode_type(struct inode *inode)
5781{
5782
5783
5784
5785
5786 BUILD_BUG_ON(BTRFS_FT_UNKNOWN != FT_UNKNOWN);
5787 BUILD_BUG_ON(BTRFS_FT_REG_FILE != FT_REG_FILE);
5788 BUILD_BUG_ON(BTRFS_FT_DIR != FT_DIR);
5789 BUILD_BUG_ON(BTRFS_FT_CHRDEV != FT_CHRDEV);
5790 BUILD_BUG_ON(BTRFS_FT_BLKDEV != FT_BLKDEV);
5791 BUILD_BUG_ON(BTRFS_FT_FIFO != FT_FIFO);
5792 BUILD_BUG_ON(BTRFS_FT_SOCK != FT_SOCK);
5793 BUILD_BUG_ON(BTRFS_FT_SYMLINK != FT_SYMLINK);
5794
5795 return fs_umode_to_ftype(inode->i_mode);
5796}
5797
5798struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5799{
5800 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
5801 struct inode *inode;
5802 struct btrfs_root *root = BTRFS_I(dir)->root;
5803 struct btrfs_root *sub_root = root;
5804 struct btrfs_key location;
5805 u8 di_type = 0;
5806 int index;
5807 int ret = 0;
5808
5809 if (dentry->d_name.len > BTRFS_NAME_LEN)
5810 return ERR_PTR(-ENAMETOOLONG);
5811
5812 ret = btrfs_inode_by_name(dir, dentry, &location, &di_type);
5813 if (ret < 0)
5814 return ERR_PTR(ret);
5815
5816 if (location.type == BTRFS_INODE_ITEM_KEY) {
5817 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5818 if (IS_ERR(inode))
5819 return inode;
5820
5821
5822 if (btrfs_inode_type(inode) != di_type) {
5823 btrfs_crit(fs_info,
5824"inode mode mismatch with dir: inode mode=0%o btrfs type=%u dir type=%u",
5825 inode->i_mode, btrfs_inode_type(inode),
5826 di_type);
5827 iput(inode);
5828 return ERR_PTR(-EUCLEAN);
5829 }
5830 return inode;
5831 }
5832
5833 index = srcu_read_lock(&fs_info->subvol_srcu);
5834 ret = fixup_tree_root_location(fs_info, dir, dentry,
5835 &location, &sub_root);
5836 if (ret < 0) {
5837 if (ret != -ENOENT)
5838 inode = ERR_PTR(ret);
5839 else
5840 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5841 } else {
5842 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
5843 }
5844 srcu_read_unlock(&fs_info->subvol_srcu, index);
5845
5846 if (!IS_ERR(inode) && root != sub_root) {
5847 down_read(&fs_info->cleanup_work_sem);
5848 if (!sb_rdonly(inode->i_sb))
5849 ret = btrfs_orphan_cleanup(sub_root);
5850 up_read(&fs_info->cleanup_work_sem);
5851 if (ret) {
5852 iput(inode);
5853 inode = ERR_PTR(ret);
5854 }
5855 }
5856
5857 return inode;
5858}
5859
5860static int btrfs_dentry_delete(const struct dentry *dentry)
5861{
5862 struct btrfs_root *root;
5863 struct inode *inode = d_inode(dentry);
5864
5865 if (!inode && !IS_ROOT(dentry))
5866 inode = d_inode(dentry->d_parent);
5867
5868 if (inode) {
5869 root = BTRFS_I(inode)->root;
5870 if (btrfs_root_refs(&root->root_item) == 0)
5871 return 1;
5872
5873 if (btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5874 return 1;
5875 }
5876 return 0;
5877}
5878
5879static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5880 unsigned int flags)
5881{
5882 struct inode *inode = btrfs_lookup_dentry(dir, dentry);
5883
5884 if (inode == ERR_PTR(-ENOENT))
5885 inode = NULL;
5886 return d_splice_alias(inode, dentry);
5887}
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898static int btrfs_opendir(struct inode *inode, struct file *file)
5899{
5900 struct btrfs_file_private *private;
5901
5902 private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
5903 if (!private)
5904 return -ENOMEM;
5905 private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
5906 if (!private->filldir_buf) {
5907 kfree(private);
5908 return -ENOMEM;
5909 }
5910 file->private_data = private;
5911 return 0;
5912}
5913
5914struct dir_entry {
5915 u64 ino;
5916 u64 offset;
5917 unsigned type;
5918 int name_len;
5919};
5920
5921static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
5922{
5923 while (entries--) {
5924 struct dir_entry *entry = addr;
5925 char *name = (char *)(entry + 1);
5926
5927 ctx->pos = get_unaligned(&entry->offset);
5928 if (!dir_emit(ctx, name, get_unaligned(&entry->name_len),
5929 get_unaligned(&entry->ino),
5930 get_unaligned(&entry->type)))
5931 return 1;
5932 addr += sizeof(struct dir_entry) +
5933 get_unaligned(&entry->name_len);
5934 ctx->pos++;
5935 }
5936 return 0;
5937}
5938
5939static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5940{
5941 struct inode *inode = file_inode(file);
5942 struct btrfs_root *root = BTRFS_I(inode)->root;
5943 struct btrfs_file_private *private = file->private_data;
5944 struct btrfs_dir_item *di;
5945 struct btrfs_key key;
5946 struct btrfs_key found_key;
5947 struct btrfs_path *path;
5948 void *addr;
5949 struct list_head ins_list;
5950 struct list_head del_list;
5951 int ret;
5952 struct extent_buffer *leaf;
5953 int slot;
5954 char *name_ptr;
5955 int name_len;
5956 int entries = 0;
5957 int total_len = 0;
5958 bool put = false;
5959 struct btrfs_key location;
5960
5961 if (!dir_emit_dots(file, ctx))
5962 return 0;
5963
5964 path = btrfs_alloc_path();
5965 if (!path)
5966 return -ENOMEM;
5967
5968 addr = private->filldir_buf;
5969 path->reada = READA_FORWARD;
5970
5971 INIT_LIST_HEAD(&ins_list);
5972 INIT_LIST_HEAD(&del_list);
5973 put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
5974
5975again:
5976 key.type = BTRFS_DIR_INDEX_KEY;
5977 key.offset = ctx->pos;
5978 key.objectid = btrfs_ino(BTRFS_I(inode));
5979
5980 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5981 if (ret < 0)
5982 goto err;
5983
5984 while (1) {
5985 struct dir_entry *entry;
5986
5987 leaf = path->nodes[0];
5988 slot = path->slots[0];
5989 if (slot >= btrfs_header_nritems(leaf)) {
5990 ret = btrfs_next_leaf(root, path);
5991 if (ret < 0)
5992 goto err;
5993 else if (ret > 0)
5994 break;
5995 continue;
5996 }
5997
5998 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5999
6000 if (found_key.objectid != key.objectid)
6001 break;
6002 if (found_key.type != BTRFS_DIR_INDEX_KEY)
6003 break;
6004 if (found_key.offset < ctx->pos)
6005 goto next;
6006 if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
6007 goto next;
6008 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
6009 name_len = btrfs_dir_name_len(leaf, di);
6010 if ((total_len + sizeof(struct dir_entry) + name_len) >=
6011 PAGE_SIZE) {
6012 btrfs_release_path(path);
6013 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6014 if (ret)
6015 goto nopos;
6016 addr = private->filldir_buf;
6017 entries = 0;
6018 total_len = 0;
6019 goto again;
6020 }
6021
6022 entry = addr;
6023 put_unaligned(name_len, &entry->name_len);
6024 name_ptr = (char *)(entry + 1);
6025 read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
6026 name_len);
6027 put_unaligned(fs_ftype_to_dtype(btrfs_dir_type(leaf, di)),
6028 &entry->type);
6029 btrfs_dir_item_key_to_cpu(leaf, di, &location);
6030 put_unaligned(location.objectid, &entry->ino);
6031 put_unaligned(found_key.offset, &entry->offset);
6032 entries++;
6033 addr += sizeof(struct dir_entry) + name_len;
6034 total_len += sizeof(struct dir_entry) + name_len;
6035next:
6036 path->slots[0]++;
6037 }
6038 btrfs_release_path(path);
6039
6040 ret = btrfs_filldir(private->filldir_buf, entries, ctx);
6041 if (ret)
6042 goto nopos;
6043
6044 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
6045 if (ret)
6046 goto nopos;
6047
6048
6049
6050
6051
6052
6053
6054
6055
6056
6057
6058
6059
6060
6061
6062
6063
6064
6065 if (ctx->pos >= INT_MAX)
6066 ctx->pos = LLONG_MAX;
6067 else
6068 ctx->pos = INT_MAX;
6069nopos:
6070 ret = 0;
6071err:
6072 if (put)
6073 btrfs_readdir_put_delayed_items(inode, &ins_list, &del_list);
6074 btrfs_free_path(path);
6075 return ret;
6076}
6077
6078
6079
6080
6081
6082
6083
6084static int btrfs_dirty_inode(struct inode *inode)
6085{
6086 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6087 struct btrfs_root *root = BTRFS_I(inode)->root;
6088 struct btrfs_trans_handle *trans;
6089 int ret;
6090
6091 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6092 return 0;
6093
6094 trans = btrfs_join_transaction(root);
6095 if (IS_ERR(trans))
6096 return PTR_ERR(trans);
6097
6098 ret = btrfs_update_inode(trans, root, inode);
6099 if (ret && ret == -ENOSPC) {
6100
6101 btrfs_end_transaction(trans);
6102 trans = btrfs_start_transaction(root, 1);
6103 if (IS_ERR(trans))
6104 return PTR_ERR(trans);
6105
6106 ret = btrfs_update_inode(trans, root, inode);
6107 }
6108 btrfs_end_transaction(trans);
6109 if (BTRFS_I(inode)->delayed_node)
6110 btrfs_balance_delayed_items(fs_info);
6111
6112 return ret;
6113}
6114
6115
6116
6117
6118
6119static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
6120 int flags)
6121{
6122 struct btrfs_root *root = BTRFS_I(inode)->root;
6123 bool dirty = flags & ~S_VERSION;
6124
6125 if (btrfs_root_readonly(root))
6126 return -EROFS;
6127
6128 if (flags & S_VERSION)
6129 dirty |= inode_maybe_inc_iversion(inode, dirty);
6130 if (flags & S_CTIME)
6131 inode->i_ctime = *now;
6132 if (flags & S_MTIME)
6133 inode->i_mtime = *now;
6134 if (flags & S_ATIME)
6135 inode->i_atime = *now;
6136 return dirty ? btrfs_dirty_inode(inode) : 0;
6137}
6138
6139
6140
6141
6142
6143
6144static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
6145{
6146 struct btrfs_root *root = inode->root;
6147 struct btrfs_key key, found_key;
6148 struct btrfs_path *path;
6149 struct extent_buffer *leaf;
6150 int ret;
6151
6152 key.objectid = btrfs_ino(inode);
6153 key.type = BTRFS_DIR_INDEX_KEY;
6154 key.offset = (u64)-1;
6155
6156 path = btrfs_alloc_path();
6157 if (!path)
6158 return -ENOMEM;
6159
6160 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6161 if (ret < 0)
6162 goto out;
6163
6164 if (ret == 0)
6165 goto out;
6166 ret = 0;
6167
6168
6169
6170
6171
6172
6173
6174 if (path->slots[0] == 0) {
6175 inode->index_cnt = 2;
6176 goto out;
6177 }
6178
6179 path->slots[0]--;
6180
6181 leaf = path->nodes[0];
6182 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6183
6184 if (found_key.objectid != btrfs_ino(inode) ||
6185 found_key.type != BTRFS_DIR_INDEX_KEY) {
6186 inode->index_cnt = 2;
6187 goto out;
6188 }
6189
6190 inode->index_cnt = found_key.offset + 1;
6191out:
6192 btrfs_free_path(path);
6193 return ret;
6194}
6195
6196
6197
6198
6199
6200int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index)
6201{
6202 int ret = 0;
6203
6204 if (dir->index_cnt == (u64)-1) {
6205 ret = btrfs_inode_delayed_dir_index_count(dir);
6206 if (ret) {
6207 ret = btrfs_set_inode_index_count(dir);
6208 if (ret)
6209 return ret;
6210 }
6211 }
6212
6213 *index = dir->index_cnt;
6214 dir->index_cnt++;
6215
6216 return ret;
6217}
6218
6219static int btrfs_insert_inode_locked(struct inode *inode)
6220{
6221 struct btrfs_iget_args args;
6222 args.location = &BTRFS_I(inode)->location;
6223 args.root = BTRFS_I(inode)->root;
6224
6225 return insert_inode_locked4(inode,
6226 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6227 btrfs_find_actor, &args);
6228}
6229
6230
6231
6232
6233
6234
6235static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
6236{
6237 unsigned int flags;
6238
6239 if (!dir)
6240 return;
6241
6242 flags = BTRFS_I(dir)->flags;
6243
6244 if (flags & BTRFS_INODE_NOCOMPRESS) {
6245 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
6246 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
6247 } else if (flags & BTRFS_INODE_COMPRESS) {
6248 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
6249 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
6250 }
6251
6252 if (flags & BTRFS_INODE_NODATACOW) {
6253 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
6254 if (S_ISREG(inode->i_mode))
6255 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6256 }
6257
6258 btrfs_sync_inode_flags_to_i_flags(inode);
6259}
6260
6261static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6262 struct btrfs_root *root,
6263 struct inode *dir,
6264 const char *name, int name_len,
6265 u64 ref_objectid, u64 objectid,
6266 umode_t mode, u64 *index)
6267{
6268 struct btrfs_fs_info *fs_info = root->fs_info;
6269 struct inode *inode;
6270 struct btrfs_inode_item *inode_item;
6271 struct btrfs_key *location;
6272 struct btrfs_path *path;
6273 struct btrfs_inode_ref *ref;
6274 struct btrfs_key key[2];
6275 u32 sizes[2];
6276 int nitems = name ? 2 : 1;
6277 unsigned long ptr;
6278 int ret;
6279
6280 path = btrfs_alloc_path();
6281 if (!path)
6282 return ERR_PTR(-ENOMEM);
6283
6284 inode = new_inode(fs_info->sb);
6285 if (!inode) {
6286 btrfs_free_path(path);
6287 return ERR_PTR(-ENOMEM);
6288 }
6289
6290
6291
6292
6293
6294 if (!name)
6295 set_nlink(inode, 0);
6296
6297
6298
6299
6300
6301 inode->i_ino = objectid;
6302
6303 if (dir && name) {
6304 trace_btrfs_inode_request(dir);
6305
6306 ret = btrfs_set_inode_index(BTRFS_I(dir), index);
6307 if (ret) {
6308 btrfs_free_path(path);
6309 iput(inode);
6310 return ERR_PTR(ret);
6311 }
6312 } else if (dir) {
6313 *index = 0;
6314 }
6315
6316
6317
6318
6319
6320 BTRFS_I(inode)->index_cnt = 2;
6321 BTRFS_I(inode)->dir_index = *index;
6322 BTRFS_I(inode)->root = root;
6323 BTRFS_I(inode)->generation = trans->transid;
6324 inode->i_generation = BTRFS_I(inode)->generation;
6325
6326
6327
6328
6329
6330
6331
6332 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6333
6334 key[0].objectid = objectid;
6335 key[0].type = BTRFS_INODE_ITEM_KEY;
6336 key[0].offset = 0;
6337
6338 sizes[0] = sizeof(struct btrfs_inode_item);
6339
6340 if (name) {
6341
6342
6343
6344
6345
6346
6347 key[1].objectid = objectid;
6348 key[1].type = BTRFS_INODE_REF_KEY;
6349 key[1].offset = ref_objectid;
6350
6351 sizes[1] = name_len + sizeof(*ref);
6352 }
6353
6354 location = &BTRFS_I(inode)->location;
6355 location->objectid = objectid;
6356 location->offset = 0;
6357 location->type = BTRFS_INODE_ITEM_KEY;
6358
6359 ret = btrfs_insert_inode_locked(inode);
6360 if (ret < 0) {
6361 iput(inode);
6362 goto fail;
6363 }
6364
6365 path->leave_spinning = 1;
6366 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
6367 if (ret != 0)
6368 goto fail_unlock;
6369
6370 inode_init_owner(inode, dir, mode);
6371 inode_set_bytes(inode, 0);
6372
6373 inode->i_mtime = current_time(inode);
6374 inode->i_atime = inode->i_mtime;
6375 inode->i_ctime = inode->i_mtime;
6376 BTRFS_I(inode)->i_otime = inode->i_mtime;
6377
6378 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6379 struct btrfs_inode_item);
6380 memzero_extent_buffer(path->nodes[0], (unsigned long)inode_item,
6381 sizeof(*inode_item));
6382 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6383
6384 if (name) {
6385 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6386 struct btrfs_inode_ref);
6387 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6388 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6389 ptr = (unsigned long)(ref + 1);
6390 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6391 }
6392
6393 btrfs_mark_buffer_dirty(path->nodes[0]);
6394 btrfs_free_path(path);
6395
6396 btrfs_inherit_iflags(inode, dir);
6397
6398 if (S_ISREG(mode)) {
6399 if (btrfs_test_opt(fs_info, NODATASUM))
6400 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6401 if (btrfs_test_opt(fs_info, NODATACOW))
6402 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6403 BTRFS_INODE_NODATASUM;
6404 }
6405
6406 inode_tree_add(inode);
6407
6408 trace_btrfs_inode_new(inode);
6409 btrfs_set_inode_last_trans(trans, inode);
6410
6411 btrfs_update_root_times(trans, root);
6412
6413 ret = btrfs_inode_inherit_props(trans, inode, dir);
6414 if (ret)
6415 btrfs_err(fs_info,
6416 "error inheriting props for ino %llu (root %llu): %d",
6417 btrfs_ino(BTRFS_I(inode)), root->root_key.objectid, ret);
6418
6419 return inode;
6420
6421fail_unlock:
6422 discard_new_inode(inode);
6423fail:
6424 if (dir && name)
6425 BTRFS_I(dir)->index_cnt--;
6426 btrfs_free_path(path);
6427 return ERR_PTR(ret);
6428}
6429
6430
6431
6432
6433
6434
6435
6436int btrfs_add_link(struct btrfs_trans_handle *trans,
6437 struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
6438 const char *name, int name_len, int add_backref, u64 index)
6439{
6440 int ret = 0;
6441 struct btrfs_key key;
6442 struct btrfs_root *root = parent_inode->root;
6443 u64 ino = btrfs_ino(inode);
6444 u64 parent_ino = btrfs_ino(parent_inode);
6445
6446 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6447 memcpy(&key, &inode->root->root_key, sizeof(key));
6448 } else {
6449 key.objectid = ino;
6450 key.type = BTRFS_INODE_ITEM_KEY;
6451 key.offset = 0;
6452 }
6453
6454 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6455 ret = btrfs_add_root_ref(trans, key.objectid,
6456 root->root_key.objectid, parent_ino,
6457 index, name, name_len);
6458 } else if (add_backref) {
6459 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6460 parent_ino, index);
6461 }
6462
6463
6464 if (ret)
6465 return ret;
6466
6467 ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
6468 btrfs_inode_type(&inode->vfs_inode), index);
6469 if (ret == -EEXIST || ret == -EOVERFLOW)
6470 goto fail_dir_item;
6471 else if (ret) {
6472 btrfs_abort_transaction(trans, ret);
6473 return ret;
6474 }
6475
6476 btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
6477 name_len * 2);
6478 inode_inc_iversion(&parent_inode->vfs_inode);
6479
6480
6481
6482
6483
6484
6485 if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
6486 struct timespec64 now = current_time(&parent_inode->vfs_inode);
6487
6488 parent_inode->vfs_inode.i_mtime = now;
6489 parent_inode->vfs_inode.i_ctime = now;
6490 }
6491 ret = btrfs_update_inode(trans, root, &parent_inode->vfs_inode);
6492 if (ret)
6493 btrfs_abort_transaction(trans, ret);
6494 return ret;
6495
6496fail_dir_item:
6497 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6498 u64 local_index;
6499 int err;
6500 err = btrfs_del_root_ref(trans, key.objectid,
6501 root->root_key.objectid, parent_ino,
6502 &local_index, name, name_len);
6503 if (err)
6504 btrfs_abort_transaction(trans, err);
6505 } else if (add_backref) {
6506 u64 local_index;
6507 int err;
6508
6509 err = btrfs_del_inode_ref(trans, root, name, name_len,
6510 ino, parent_ino, &local_index);
6511 if (err)
6512 btrfs_abort_transaction(trans, err);
6513 }
6514
6515
6516 return ret;
6517}
6518
6519static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6520 struct btrfs_inode *dir, struct dentry *dentry,
6521 struct btrfs_inode *inode, int backref, u64 index)
6522{
6523 int err = btrfs_add_link(trans, dir, inode,
6524 dentry->d_name.name, dentry->d_name.len,
6525 backref, index);
6526 if (err > 0)
6527 err = -EEXIST;
6528 return err;
6529}
6530
6531static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6532 umode_t mode, dev_t rdev)
6533{
6534 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6535 struct btrfs_trans_handle *trans;
6536 struct btrfs_root *root = BTRFS_I(dir)->root;
6537 struct inode *inode = NULL;
6538 int err;
6539 u64 objectid;
6540 u64 index = 0;
6541
6542
6543
6544
6545
6546
6547 trans = btrfs_start_transaction(root, 5);
6548 if (IS_ERR(trans))
6549 return PTR_ERR(trans);
6550
6551 err = btrfs_find_free_ino(root, &objectid);
6552 if (err)
6553 goto out_unlock;
6554
6555 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6556 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6557 mode, &index);
6558 if (IS_ERR(inode)) {
6559 err = PTR_ERR(inode);
6560 inode = NULL;
6561 goto out_unlock;
6562 }
6563
6564
6565
6566
6567
6568
6569
6570 inode->i_op = &btrfs_special_inode_operations;
6571 init_special_inode(inode, inode->i_mode, rdev);
6572
6573 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6574 if (err)
6575 goto out_unlock;
6576
6577 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6578 0, index);
6579 if (err)
6580 goto out_unlock;
6581
6582 btrfs_update_inode(trans, root, inode);
6583 d_instantiate_new(dentry, inode);
6584
6585out_unlock:
6586 btrfs_end_transaction(trans);
6587 btrfs_btree_balance_dirty(fs_info);
6588 if (err && inode) {
6589 inode_dec_link_count(inode);
6590 discard_new_inode(inode);
6591 }
6592 return err;
6593}
6594
6595static int btrfs_create(struct inode *dir, struct dentry *dentry,
6596 umode_t mode, bool excl)
6597{
6598 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6599 struct btrfs_trans_handle *trans;
6600 struct btrfs_root *root = BTRFS_I(dir)->root;
6601 struct inode *inode = NULL;
6602 int err;
6603 u64 objectid;
6604 u64 index = 0;
6605
6606
6607
6608
6609
6610
6611 trans = btrfs_start_transaction(root, 5);
6612 if (IS_ERR(trans))
6613 return PTR_ERR(trans);
6614
6615 err = btrfs_find_free_ino(root, &objectid);
6616 if (err)
6617 goto out_unlock;
6618
6619 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6620 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6621 mode, &index);
6622 if (IS_ERR(inode)) {
6623 err = PTR_ERR(inode);
6624 inode = NULL;
6625 goto out_unlock;
6626 }
6627
6628
6629
6630
6631
6632
6633 inode->i_fop = &btrfs_file_operations;
6634 inode->i_op = &btrfs_file_inode_operations;
6635 inode->i_mapping->a_ops = &btrfs_aops;
6636
6637 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6638 if (err)
6639 goto out_unlock;
6640
6641 err = btrfs_update_inode(trans, root, inode);
6642 if (err)
6643 goto out_unlock;
6644
6645 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6646 0, index);
6647 if (err)
6648 goto out_unlock;
6649
6650 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6651 d_instantiate_new(dentry, inode);
6652
6653out_unlock:
6654 btrfs_end_transaction(trans);
6655 if (err && inode) {
6656 inode_dec_link_count(inode);
6657 discard_new_inode(inode);
6658 }
6659 btrfs_btree_balance_dirty(fs_info);
6660 return err;
6661}
6662
6663static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6664 struct dentry *dentry)
6665{
6666 struct btrfs_trans_handle *trans = NULL;
6667 struct btrfs_root *root = BTRFS_I(dir)->root;
6668 struct inode *inode = d_inode(old_dentry);
6669 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
6670 u64 index;
6671 int err;
6672 int drop_inode = 0;
6673
6674
6675 if (root->root_key.objectid != BTRFS_I(inode)->root->root_key.objectid)
6676 return -EXDEV;
6677
6678 if (inode->i_nlink >= BTRFS_LINK_MAX)
6679 return -EMLINK;
6680
6681 err = btrfs_set_inode_index(BTRFS_I(dir), &index);
6682 if (err)
6683 goto fail;
6684
6685
6686
6687
6688
6689
6690
6691 trans = btrfs_start_transaction(root, inode->i_nlink ? 5 : 6);
6692 if (IS_ERR(trans)) {
6693 err = PTR_ERR(trans);
6694 trans = NULL;
6695 goto fail;
6696 }
6697
6698
6699 BTRFS_I(inode)->dir_index = 0ULL;
6700 inc_nlink(inode);
6701 inode_inc_iversion(inode);
6702 inode->i_ctime = current_time(inode);
6703 ihold(inode);
6704 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6705
6706 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry, BTRFS_I(inode),
6707 1, index);
6708
6709 if (err) {
6710 drop_inode = 1;
6711 } else {
6712 struct dentry *parent = dentry->d_parent;
6713 int ret;
6714
6715 err = btrfs_update_inode(trans, root, inode);
6716 if (err)
6717 goto fail;
6718 if (inode->i_nlink == 1) {
6719
6720
6721
6722
6723 err = btrfs_orphan_del(trans, BTRFS_I(inode));
6724 if (err)
6725 goto fail;
6726 }
6727 d_instantiate(dentry, inode);
6728 ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
6729 true, NULL);
6730 if (ret == BTRFS_NEED_TRANS_COMMIT) {
6731 err = btrfs_commit_transaction(trans);
6732 trans = NULL;
6733 }
6734 }
6735
6736fail:
6737 if (trans)
6738 btrfs_end_transaction(trans);
6739 if (drop_inode) {
6740 inode_dec_link_count(inode);
6741 iput(inode);
6742 }
6743 btrfs_btree_balance_dirty(fs_info);
6744 return err;
6745}
6746
6747static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6748{
6749 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
6750 struct inode *inode = NULL;
6751 struct btrfs_trans_handle *trans;
6752 struct btrfs_root *root = BTRFS_I(dir)->root;
6753 int err = 0;
6754 u64 objectid = 0;
6755 u64 index = 0;
6756
6757
6758
6759
6760
6761
6762 trans = btrfs_start_transaction(root, 5);
6763 if (IS_ERR(trans))
6764 return PTR_ERR(trans);
6765
6766 err = btrfs_find_free_ino(root, &objectid);
6767 if (err)
6768 goto out_fail;
6769
6770 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6771 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)), objectid,
6772 S_IFDIR | mode, &index);
6773 if (IS_ERR(inode)) {
6774 err = PTR_ERR(inode);
6775 inode = NULL;
6776 goto out_fail;
6777 }
6778
6779
6780 inode->i_op = &btrfs_dir_inode_operations;
6781 inode->i_fop = &btrfs_dir_file_operations;
6782
6783 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6784 if (err)
6785 goto out_fail;
6786
6787 btrfs_i_size_write(BTRFS_I(inode), 0);
6788 err = btrfs_update_inode(trans, root, inode);
6789 if (err)
6790 goto out_fail;
6791
6792 err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
6793 dentry->d_name.name,
6794 dentry->d_name.len, 0, index);
6795 if (err)
6796 goto out_fail;
6797
6798 d_instantiate_new(dentry, inode);
6799
6800out_fail:
6801 btrfs_end_transaction(trans);
6802 if (err && inode) {
6803 inode_dec_link_count(inode);
6804 discard_new_inode(inode);
6805 }
6806 btrfs_btree_balance_dirty(fs_info);
6807 return err;
6808}
6809
6810static noinline int uncompress_inline(struct btrfs_path *path,
6811 struct page *page,
6812 size_t pg_offset, u64 extent_offset,
6813 struct btrfs_file_extent_item *item)
6814{
6815 int ret;
6816 struct extent_buffer *leaf = path->nodes[0];
6817 char *tmp;
6818 size_t max_size;
6819 unsigned long inline_size;
6820 unsigned long ptr;
6821 int compress_type;
6822
6823 WARN_ON(pg_offset != 0);
6824 compress_type = btrfs_file_extent_compression(leaf, item);
6825 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6826 inline_size = btrfs_file_extent_inline_item_len(leaf,
6827 btrfs_item_nr(path->slots[0]));
6828 tmp = kmalloc(inline_size, GFP_NOFS);
6829 if (!tmp)
6830 return -ENOMEM;
6831 ptr = btrfs_file_extent_inline_start(item);
6832
6833 read_extent_buffer(leaf, tmp, ptr, inline_size);
6834
6835 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6836 ret = btrfs_decompress(compress_type, tmp, page,
6837 extent_offset, inline_size, max_size);
6838
6839
6840
6841
6842
6843
6844
6845
6846
6847 if (max_size + pg_offset < PAGE_SIZE) {
6848 char *map = kmap(page);
6849 memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
6850 kunmap(page);
6851 }
6852 kfree(tmp);
6853 return ret;
6854}
6855
6856
6857
6858
6859
6860
6861
6862
6863
6864struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
6865 struct page *page,
6866 size_t pg_offset, u64 start, u64 len,
6867 int create)
6868{
6869 struct btrfs_fs_info *fs_info = inode->root->fs_info;
6870 int ret;
6871 int err = 0;
6872 u64 extent_start = 0;
6873 u64 extent_end = 0;
6874 u64 objectid = btrfs_ino(inode);
6875 int extent_type = -1;
6876 struct btrfs_path *path = NULL;
6877 struct btrfs_root *root = inode->root;
6878 struct btrfs_file_extent_item *item;
6879 struct extent_buffer *leaf;
6880 struct btrfs_key found_key;
6881 struct extent_map *em = NULL;
6882 struct extent_map_tree *em_tree = &inode->extent_tree;
6883 struct extent_io_tree *io_tree = &inode->io_tree;
6884 const bool new_inline = !page || create;
6885
6886 read_lock(&em_tree->lock);
6887 em = lookup_extent_mapping(em_tree, start, len);
6888 if (em)
6889 em->bdev = fs_info->fs_devices->latest_bdev;
6890 read_unlock(&em_tree->lock);
6891
6892 if (em) {
6893 if (em->start > start || em->start + em->len <= start)
6894 free_extent_map(em);
6895 else if (em->block_start == EXTENT_MAP_INLINE && page)
6896 free_extent_map(em);
6897 else
6898 goto out;
6899 }
6900 em = alloc_extent_map();
6901 if (!em) {
6902 err = -ENOMEM;
6903 goto out;
6904 }
6905 em->bdev = fs_info->fs_devices->latest_bdev;
6906 em->start = EXTENT_MAP_HOLE;
6907 em->orig_start = EXTENT_MAP_HOLE;
6908 em->len = (u64)-1;
6909 em->block_len = (u64)-1;
6910
6911 path = btrfs_alloc_path();
6912 if (!path) {
6913 err = -ENOMEM;
6914 goto out;
6915 }
6916
6917
6918 path->reada = READA_FORWARD;
6919
6920
6921
6922
6923
6924 path->leave_spinning = 1;
6925
6926 ret = btrfs_lookup_file_extent(NULL, root, path, objectid, start, 0);
6927 if (ret < 0) {
6928 err = ret;
6929 goto out;
6930 } else if (ret > 0) {
6931 if (path->slots[0] == 0)
6932 goto not_found;
6933 path->slots[0]--;
6934 }
6935
6936 leaf = path->nodes[0];
6937 item = btrfs_item_ptr(leaf, path->slots[0],
6938 struct btrfs_file_extent_item);
6939 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6940 if (found_key.objectid != objectid ||
6941 found_key.type != BTRFS_EXTENT_DATA_KEY) {
6942
6943
6944
6945
6946
6947
6948 extent_end = start;
6949 goto next;
6950 }
6951
6952 extent_type = btrfs_file_extent_type(leaf, item);
6953 extent_start = found_key.offset;
6954 if (extent_type == BTRFS_FILE_EXTENT_REG ||
6955 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
6956
6957 if (!S_ISREG(inode->vfs_inode.i_mode)) {
6958 ret = -EUCLEAN;
6959 btrfs_crit(fs_info,
6960 "regular/prealloc extent found for non-regular inode %llu",
6961 btrfs_ino(inode));
6962 goto out;
6963 }
6964 extent_end = extent_start +
6965 btrfs_file_extent_num_bytes(leaf, item);
6966
6967 trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
6968 extent_start);
6969 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
6970 size_t size;
6971
6972 size = btrfs_file_extent_ram_bytes(leaf, item);
6973 extent_end = ALIGN(extent_start + size,
6974 fs_info->sectorsize);
6975
6976 trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
6977 path->slots[0],
6978 extent_start);
6979 }
6980next:
6981 if (start >= extent_end) {
6982 path->slots[0]++;
6983 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6984 ret = btrfs_next_leaf(root, path);
6985 if (ret < 0) {
6986 err = ret;
6987 goto out;
6988 } else if (ret > 0) {
6989 goto not_found;
6990 }
6991 leaf = path->nodes[0];
6992 }
6993 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6994 if (found_key.objectid != objectid ||
6995 found_key.type != BTRFS_EXTENT_DATA_KEY)
6996 goto not_found;
6997 if (start + len <= found_key.offset)
6998 goto not_found;
6999 if (start > found_key.offset)
7000 goto next;
7001
7002
7003 em->start = start;
7004 em->orig_start = start;
7005 em->len = found_key.offset - start;
7006 em->block_start = EXTENT_MAP_HOLE;
7007 goto insert;
7008 }
7009
7010 btrfs_extent_item_to_extent_map(inode, path, item,
7011 new_inline, em);
7012
7013 if (extent_type == BTRFS_FILE_EXTENT_REG ||
7014 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
7015 goto insert;
7016 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
7017 unsigned long ptr;
7018 char *map;
7019 size_t size;
7020 size_t extent_offset;
7021 size_t copy_size;
7022
7023 if (new_inline)
7024 goto out;
7025
7026 size = btrfs_file_extent_ram_bytes(leaf, item);
7027 extent_offset = page_offset(page) + pg_offset - extent_start;
7028 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
7029 size - extent_offset);
7030 em->start = extent_start + extent_offset;
7031 em->len = ALIGN(copy_size, fs_info->sectorsize);
7032 em->orig_block_len = em->len;
7033 em->orig_start = em->start;
7034 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
7035
7036 btrfs_set_path_blocking(path);
7037 if (!PageUptodate(page)) {
7038 if (btrfs_file_extent_compression(leaf, item) !=
7039 BTRFS_COMPRESS_NONE) {
7040 ret = uncompress_inline(path, page, pg_offset,
7041 extent_offset, item);
7042 if (ret) {
7043 err = ret;
7044 goto out;
7045 }
7046 } else {
7047 map = kmap(page);
7048 read_extent_buffer(leaf, map + pg_offset, ptr,
7049 copy_size);
7050 if (pg_offset + copy_size < PAGE_SIZE) {
7051 memset(map + pg_offset + copy_size, 0,
7052 PAGE_SIZE - pg_offset -
7053 copy_size);
7054 }
7055 kunmap(page);
7056 }
7057 flush_dcache_page(page);
7058 }
7059 set_extent_uptodate(io_tree, em->start,
7060 extent_map_end(em) - 1, NULL, GFP_NOFS);
7061 goto insert;
7062 }
7063not_found:
7064 em->start = start;
7065 em->orig_start = start;
7066 em->len = len;
7067 em->block_start = EXTENT_MAP_HOLE;
7068insert:
7069 btrfs_release_path(path);
7070 if (em->start > start || extent_map_end(em) <= start) {
7071 btrfs_err(fs_info,
7072 "bad extent! em: [%llu %llu] passed [%llu %llu]",
7073 em->start, em->len, start, len);
7074 err = -EIO;
7075 goto out;
7076 }
7077
7078 err = 0;
7079 write_lock(&em_tree->lock);
7080 err = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
7081 write_unlock(&em_tree->lock);
7082out:
7083 btrfs_free_path(path);
7084
7085 trace_btrfs_get_extent(root, inode, em);
7086
7087 if (err) {
7088 free_extent_map(em);
7089 return ERR_PTR(err);
7090 }
7091 BUG_ON(!em);
7092 return em;
7093}
7094
7095struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
7096 u64 start, u64 len)
7097{
7098 struct extent_map *em;
7099 struct extent_map *hole_em = NULL;
7100 u64 delalloc_start = start;
7101 u64 end;
7102 u64 delalloc_len;
7103 u64 delalloc_end;
7104 int err = 0;
7105
7106 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7107 if (IS_ERR(em))
7108 return em;
7109
7110
7111
7112
7113
7114
7115 if (em->block_start != EXTENT_MAP_HOLE &&
7116 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7117 return em;
7118 else
7119 hole_em = em;
7120
7121
7122 end = start + len;
7123 if (end < start)
7124 end = (u64)-1;
7125 else
7126 end -= 1;
7127
7128 em = NULL;
7129
7130
7131 delalloc_len = count_range_bits(&inode->io_tree, &delalloc_start,
7132 end, len, EXTENT_DELALLOC, 1);
7133 delalloc_end = delalloc_start + delalloc_len;
7134 if (delalloc_end < delalloc_start)
7135 delalloc_end = (u64)-1;
7136
7137
7138
7139
7140
7141 if (delalloc_start > end || delalloc_end <= start) {
7142 em = hole_em;
7143 hole_em = NULL;
7144 goto out;
7145 }
7146
7147
7148
7149
7150
7151 delalloc_start = max(start, delalloc_start);
7152 delalloc_len = delalloc_end - delalloc_start;
7153
7154 if (delalloc_len > 0) {
7155 u64 hole_start;
7156 u64 hole_len;
7157 const u64 hole_end = extent_map_end(hole_em);
7158
7159 em = alloc_extent_map();
7160 if (!em) {
7161 err = -ENOMEM;
7162 goto out;
7163 }
7164 em->bdev = NULL;
7165
7166 ASSERT(hole_em);
7167
7168
7169
7170
7171
7172
7173
7174 if (hole_end <= start || hole_em->start > end) {
7175 free_extent_map(hole_em);
7176 hole_em = NULL;
7177 } else {
7178 hole_start = max(hole_em->start, start);
7179 hole_len = hole_end - hole_start;
7180 }
7181
7182 if (hole_em && delalloc_start > hole_start) {
7183
7184
7185
7186
7187
7188 em->len = min(hole_len, delalloc_start - hole_start);
7189 em->start = hole_start;
7190 em->orig_start = hole_start;
7191
7192
7193
7194
7195 em->block_start = hole_em->block_start;
7196 em->block_len = hole_len;
7197 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7198 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7199 } else {
7200
7201
7202
7203
7204 em->start = delalloc_start;
7205 em->len = delalloc_len;
7206 em->orig_start = delalloc_start;
7207 em->block_start = EXTENT_MAP_DELALLOC;
7208 em->block_len = delalloc_len;
7209 }
7210 } else {
7211 return hole_em;
7212 }
7213out:
7214
7215 free_extent_map(hole_em);
7216 if (err) {
7217 free_extent_map(em);
7218 return ERR_PTR(err);
7219 }
7220 return em;
7221}
7222
7223static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
7224 const u64 start,
7225 const u64 len,
7226 const u64 orig_start,
7227 const u64 block_start,
7228 const u64 block_len,
7229 const u64 orig_block_len,
7230 const u64 ram_bytes,
7231 const int type)
7232{
7233 struct extent_map *em = NULL;
7234 int ret;
7235
7236 if (type != BTRFS_ORDERED_NOCOW) {
7237 em = create_io_em(inode, start, len, orig_start,
7238 block_start, block_len, orig_block_len,
7239 ram_bytes,
7240 BTRFS_COMPRESS_NONE,
7241 type);
7242 if (IS_ERR(em))
7243 goto out;
7244 }
7245 ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
7246 len, block_len, type);
7247 if (ret) {
7248 if (em) {
7249 free_extent_map(em);
7250 btrfs_drop_extent_cache(BTRFS_I(inode), start,
7251 start + len - 1, 0);
7252 }
7253 em = ERR_PTR(ret);
7254 }
7255 out:
7256
7257 return em;
7258}
7259
7260static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7261 u64 start, u64 len)
7262{
7263 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7264 struct btrfs_root *root = BTRFS_I(inode)->root;
7265 struct extent_map *em;
7266 struct btrfs_key ins;
7267 u64 alloc_hint;
7268 int ret;
7269
7270 alloc_hint = get_extent_allocation_hint(inode, start, len);
7271 ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize,
7272 0, alloc_hint, &ins, 1, 1);
7273 if (ret)
7274 return ERR_PTR(ret);
7275
7276 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7277 ins.objectid, ins.offset, ins.offset,
7278 ins.offset, BTRFS_ORDERED_REGULAR);
7279 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
7280 if (IS_ERR(em))
7281 btrfs_free_reserved_extent(fs_info, ins.objectid,
7282 ins.offset, 1);
7283
7284 return em;
7285}
7286
7287
7288
7289
7290
7291noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7292 u64 *orig_start, u64 *orig_block_len,
7293 u64 *ram_bytes)
7294{
7295 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7296 struct btrfs_path *path;
7297 int ret;
7298 struct extent_buffer *leaf;
7299 struct btrfs_root *root = BTRFS_I(inode)->root;
7300 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7301 struct btrfs_file_extent_item *fi;
7302 struct btrfs_key key;
7303 u64 disk_bytenr;
7304 u64 backref_offset;
7305 u64 extent_end;
7306 u64 num_bytes;
7307 int slot;
7308 int found_type;
7309 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7310
7311 path = btrfs_alloc_path();
7312 if (!path)
7313 return -ENOMEM;
7314
7315 ret = btrfs_lookup_file_extent(NULL, root, path,
7316 btrfs_ino(BTRFS_I(inode)), offset, 0);
7317 if (ret < 0)
7318 goto out;
7319
7320 slot = path->slots[0];
7321 if (ret == 1) {
7322 if (slot == 0) {
7323
7324 ret = 0;
7325 goto out;
7326 }
7327 slot--;
7328 }
7329 ret = 0;
7330 leaf = path->nodes[0];
7331 btrfs_item_key_to_cpu(leaf, &key, slot);
7332 if (key.objectid != btrfs_ino(BTRFS_I(inode)) ||
7333 key.type != BTRFS_EXTENT_DATA_KEY) {
7334
7335 goto out;
7336 }
7337
7338 if (key.offset > offset) {
7339
7340 goto out;
7341 }
7342
7343 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7344 found_type = btrfs_file_extent_type(leaf, fi);
7345 if (found_type != BTRFS_FILE_EXTENT_REG &&
7346 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7347
7348 goto out;
7349 }
7350
7351 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7352 goto out;
7353
7354 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7355 if (extent_end <= offset)
7356 goto out;
7357
7358 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7359 if (disk_bytenr == 0)
7360 goto out;
7361
7362 if (btrfs_file_extent_compression(leaf, fi) ||
7363 btrfs_file_extent_encryption(leaf, fi) ||
7364 btrfs_file_extent_other_encoding(leaf, fi))
7365 goto out;
7366
7367
7368
7369
7370
7371 if (btrfs_file_extent_generation(leaf, fi) <=
7372 btrfs_root_last_snapshot(&root->root_item))
7373 goto out;
7374
7375 backref_offset = btrfs_file_extent_offset(leaf, fi);
7376
7377 if (orig_start) {
7378 *orig_start = key.offset - backref_offset;
7379 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7380 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7381 }
7382
7383 if (btrfs_extent_readonly(fs_info, disk_bytenr))
7384 goto out;
7385
7386 num_bytes = min(offset + *len, extent_end) - offset;
7387 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7388 u64 range_end;
7389
7390 range_end = round_up(offset + num_bytes,
7391 root->fs_info->sectorsize) - 1;
7392 ret = test_range_bit(io_tree, offset, range_end,
7393 EXTENT_DELALLOC, 0, NULL);
7394 if (ret) {
7395 ret = -EAGAIN;
7396 goto out;
7397 }
7398 }
7399
7400 btrfs_release_path(path);
7401
7402
7403
7404
7405
7406
7407 ret = btrfs_cross_ref_exist(root, btrfs_ino(BTRFS_I(inode)),
7408 key.offset - backref_offset, disk_bytenr);
7409 if (ret) {
7410 ret = 0;
7411 goto out;
7412 }
7413
7414
7415
7416
7417
7418
7419
7420 disk_bytenr += backref_offset;
7421 disk_bytenr += offset - key.offset;
7422 if (csum_exist_in_range(fs_info, disk_bytenr, num_bytes))
7423 goto out;
7424
7425
7426
7427
7428 *len = num_bytes;
7429 ret = 1;
7430out:
7431 btrfs_free_path(path);
7432 return ret;
7433}
7434
7435static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7436 struct extent_state **cached_state, int writing)
7437{
7438 struct btrfs_ordered_extent *ordered;
7439 int ret = 0;
7440
7441 while (1) {
7442 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7443 cached_state);
7444
7445
7446
7447
7448
7449 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), lockstart,
7450 lockend - lockstart + 1);
7451
7452
7453
7454
7455
7456
7457
7458
7459 if (!ordered &&
7460 (!writing || !filemap_range_has_page(inode->i_mapping,
7461 lockstart, lockend)))
7462 break;
7463
7464 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7465 cached_state);
7466
7467 if (ordered) {
7468
7469
7470
7471
7472
7473
7474
7475
7476
7477
7478
7479
7480
7481
7482
7483 if (writing ||
7484 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7485 btrfs_start_ordered_extent(inode, ordered, 1);
7486 else
7487 ret = -ENOTBLK;
7488 btrfs_put_ordered_extent(ordered);
7489 } else {
7490
7491
7492
7493
7494
7495
7496
7497
7498
7499
7500
7501
7502
7503 ret = -ENOTBLK;
7504 }
7505
7506 if (ret)
7507 break;
7508
7509 cond_resched();
7510 }
7511
7512 return ret;
7513}
7514
7515
7516static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
7517 u64 orig_start, u64 block_start,
7518 u64 block_len, u64 orig_block_len,
7519 u64 ram_bytes, int compress_type,
7520 int type)
7521{
7522 struct extent_map_tree *em_tree;
7523 struct extent_map *em;
7524 struct btrfs_root *root = BTRFS_I(inode)->root;
7525 int ret;
7526
7527 ASSERT(type == BTRFS_ORDERED_PREALLOC ||
7528 type == BTRFS_ORDERED_COMPRESSED ||
7529 type == BTRFS_ORDERED_NOCOW ||
7530 type == BTRFS_ORDERED_REGULAR);
7531
7532 em_tree = &BTRFS_I(inode)->extent_tree;
7533 em = alloc_extent_map();
7534 if (!em)
7535 return ERR_PTR(-ENOMEM);
7536
7537 em->start = start;
7538 em->orig_start = orig_start;
7539 em->len = len;
7540 em->block_len = block_len;
7541 em->block_start = block_start;
7542 em->bdev = root->fs_info->fs_devices->latest_bdev;
7543 em->orig_block_len = orig_block_len;
7544 em->ram_bytes = ram_bytes;
7545 em->generation = -1;
7546 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7547 if (type == BTRFS_ORDERED_PREALLOC) {
7548 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7549 } else if (type == BTRFS_ORDERED_COMPRESSED) {
7550 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
7551 em->compress_type = compress_type;
7552 }
7553
7554 do {
7555 btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
7556 em->start + em->len - 1, 0);
7557 write_lock(&em_tree->lock);
7558 ret = add_extent_mapping(em_tree, em, 1);
7559 write_unlock(&em_tree->lock);
7560
7561
7562
7563
7564 } while (ret == -EEXIST);
7565
7566 if (ret) {
7567 free_extent_map(em);
7568 return ERR_PTR(ret);
7569 }
7570
7571
7572 return em;
7573}
7574
7575
7576static int btrfs_get_blocks_direct_read(struct extent_map *em,
7577 struct buffer_head *bh_result,
7578 struct inode *inode,
7579 u64 start, u64 len)
7580{
7581 if (em->block_start == EXTENT_MAP_HOLE ||
7582 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7583 return -ENOENT;
7584
7585 len = min(len, em->len - (start - em->start));
7586
7587 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7588 inode->i_blkbits;
7589 bh_result->b_size = len;
7590 bh_result->b_bdev = em->bdev;
7591 set_buffer_mapped(bh_result);
7592
7593 return 0;
7594}
7595
7596static int btrfs_get_blocks_direct_write(struct extent_map **map,
7597 struct buffer_head *bh_result,
7598 struct inode *inode,
7599 struct btrfs_dio_data *dio_data,
7600 u64 start, u64 len)
7601{
7602 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7603 struct extent_map *em = *map;
7604 int ret = 0;
7605
7606
7607
7608
7609
7610
7611
7612
7613
7614
7615 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7616 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7617 em->block_start != EXTENT_MAP_HOLE)) {
7618 int type;
7619 u64 block_start, orig_start, orig_block_len, ram_bytes;
7620
7621 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7622 type = BTRFS_ORDERED_PREALLOC;
7623 else
7624 type = BTRFS_ORDERED_NOCOW;
7625 len = min(len, em->len - (start - em->start));
7626 block_start = em->block_start + (start - em->start);
7627
7628 if (can_nocow_extent(inode, start, &len, &orig_start,
7629 &orig_block_len, &ram_bytes) == 1 &&
7630 btrfs_inc_nocow_writers(fs_info, block_start)) {
7631 struct extent_map *em2;
7632
7633 em2 = btrfs_create_dio_extent(inode, start, len,
7634 orig_start, block_start,
7635 len, orig_block_len,
7636 ram_bytes, type);
7637 btrfs_dec_nocow_writers(fs_info, block_start);
7638 if (type == BTRFS_ORDERED_PREALLOC) {
7639 free_extent_map(em);
7640 *map = em = em2;
7641 }
7642
7643 if (em2 && IS_ERR(em2)) {
7644 ret = PTR_ERR(em2);
7645 goto out;
7646 }
7647
7648
7649
7650
7651
7652 btrfs_free_reserved_data_space_noquota(inode, start,
7653 len);
7654 goto skip_cow;
7655 }
7656 }
7657
7658
7659 len = bh_result->b_size;
7660 free_extent_map(em);
7661 *map = em = btrfs_new_extent_direct(inode, start, len);
7662 if (IS_ERR(em)) {
7663 ret = PTR_ERR(em);
7664 goto out;
7665 }
7666
7667 len = min(len, em->len - (start - em->start));
7668
7669skip_cow:
7670 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7671 inode->i_blkbits;
7672 bh_result->b_size = len;
7673 bh_result->b_bdev = em->bdev;
7674 set_buffer_mapped(bh_result);
7675
7676 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7677 set_buffer_new(bh_result);
7678
7679
7680
7681
7682
7683 if (!dio_data->overwrite && start + len > i_size_read(inode))
7684 i_size_write(inode, start + len);
7685
7686 WARN_ON(dio_data->reserve < len);
7687 dio_data->reserve -= len;
7688 dio_data->unsubmitted_oe_range_end = start + len;
7689 current->journal_info = dio_data;
7690out:
7691 return ret;
7692}
7693
7694static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7695 struct buffer_head *bh_result, int create)
7696{
7697 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7698 struct extent_map *em;
7699 struct extent_state *cached_state = NULL;
7700 struct btrfs_dio_data *dio_data = NULL;
7701 u64 start = iblock << inode->i_blkbits;
7702 u64 lockstart, lockend;
7703 u64 len = bh_result->b_size;
7704 int unlock_bits = EXTENT_LOCKED;
7705 int ret = 0;
7706
7707 if (create)
7708 unlock_bits |= EXTENT_DIRTY;
7709 else
7710 len = min_t(u64, len, fs_info->sectorsize);
7711
7712 lockstart = start;
7713 lockend = start + len - 1;
7714
7715 if (current->journal_info) {
7716
7717
7718
7719
7720
7721 dio_data = current->journal_info;
7722 current->journal_info = NULL;
7723 }
7724
7725
7726
7727
7728
7729 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7730 create)) {
7731 ret = -ENOTBLK;
7732 goto err;
7733 }
7734
7735 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
7736 if (IS_ERR(em)) {
7737 ret = PTR_ERR(em);
7738 goto unlock_err;
7739 }
7740
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7756 em->block_start == EXTENT_MAP_INLINE) {
7757 free_extent_map(em);
7758 ret = -ENOTBLK;
7759 goto unlock_err;
7760 }
7761
7762 if (create) {
7763 ret = btrfs_get_blocks_direct_write(&em, bh_result, inode,
7764 dio_data, start, len);
7765 if (ret < 0)
7766 goto unlock_err;
7767
7768
7769 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7770 unlock_bits, 1, 0, &cached_state);
7771 } else {
7772 ret = btrfs_get_blocks_direct_read(em, bh_result, inode,
7773 start, len);
7774
7775 if (ret < 0) {
7776 ret = 0;
7777 free_extent_map(em);
7778 goto unlock_err;
7779 }
7780
7781
7782
7783
7784 lockstart = start + bh_result->b_size;
7785 if (lockstart < lockend) {
7786 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7787 lockend, unlock_bits, 1, 0,
7788 &cached_state);
7789 } else {
7790 free_extent_state(cached_state);
7791 }
7792 }
7793
7794 free_extent_map(em);
7795
7796 return 0;
7797
7798unlock_err:
7799 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7800 unlock_bits, 1, 0, &cached_state);
7801err:
7802 if (dio_data)
7803 current->journal_info = dio_data;
7804 return ret;
7805}
7806
7807static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
7808 struct bio *bio,
7809 int mirror_num)
7810{
7811 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7812 blk_status_t ret;
7813
7814 BUG_ON(bio_op(bio) == REQ_OP_WRITE);
7815
7816 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DIO_REPAIR);
7817 if (ret)
7818 return ret;
7819
7820 ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
7821
7822 return ret;
7823}
7824
7825static int btrfs_check_dio_repairable(struct inode *inode,
7826 struct bio *failed_bio,
7827 struct io_failure_record *failrec,
7828 int failed_mirror)
7829{
7830 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
7831 int num_copies;
7832
7833 num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
7834 if (num_copies == 1) {
7835
7836
7837
7838
7839
7840 btrfs_debug(fs_info,
7841 "Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
7842 num_copies, failrec->this_mirror, failed_mirror);
7843 return 0;
7844 }
7845
7846 failrec->failed_mirror = failed_mirror;
7847 failrec->this_mirror++;
7848 if (failrec->this_mirror == failed_mirror)
7849 failrec->this_mirror++;
7850
7851 if (failrec->this_mirror > num_copies) {
7852 btrfs_debug(fs_info,
7853 "Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
7854 num_copies, failrec->this_mirror, failed_mirror);
7855 return 0;
7856 }
7857
7858 return 1;
7859}
7860
7861static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
7862 struct page *page, unsigned int pgoff,
7863 u64 start, u64 end, int failed_mirror,
7864 bio_end_io_t *repair_endio, void *repair_arg)
7865{
7866 struct io_failure_record *failrec;
7867 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7868 struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
7869 struct bio *bio;
7870 int isector;
7871 unsigned int read_mode = 0;
7872 int segs;
7873 int ret;
7874 blk_status_t status;
7875 struct bio_vec bvec;
7876
7877 BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
7878
7879 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7880 if (ret)
7881 return errno_to_blk_status(ret);
7882
7883 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7884 failed_mirror);
7885 if (!ret) {
7886 free_io_failure(failure_tree, io_tree, failrec);
7887 return BLK_STS_IOERR;
7888 }
7889
7890 segs = bio_segments(failed_bio);
7891 bio_get_first_bvec(failed_bio, &bvec);
7892 if (segs > 1 ||
7893 (bvec.bv_len > btrfs_inode_sectorsize(inode)))
7894 read_mode |= REQ_FAILFAST_DEV;
7895
7896 isector = start - btrfs_io_bio(failed_bio)->logical;
7897 isector >>= inode->i_sb->s_blocksize_bits;
7898 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7899 pgoff, isector, repair_endio, repair_arg);
7900 bio->bi_opf = REQ_OP_READ | read_mode;
7901
7902 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7903 "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
7904 read_mode, failrec->this_mirror, failrec->in_validation);
7905
7906 status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
7907 if (status) {
7908 free_io_failure(failure_tree, io_tree, failrec);
7909 bio_put(bio);
7910 }
7911
7912 return status;
7913}
7914
7915struct btrfs_retry_complete {
7916 struct completion done;
7917 struct inode *inode;
7918 u64 start;
7919 int uptodate;
7920};
7921
7922static void btrfs_retry_endio_nocsum(struct bio *bio)
7923{
7924 struct btrfs_retry_complete *done = bio->bi_private;
7925 struct inode *inode = done->inode;
7926 struct bio_vec *bvec;
7927 struct extent_io_tree *io_tree, *failure_tree;
7928 struct bvec_iter_all iter_all;
7929
7930 if (bio->bi_status)
7931 goto end;
7932
7933 ASSERT(bio->bi_vcnt == 1);
7934 io_tree = &BTRFS_I(inode)->io_tree;
7935 failure_tree = &BTRFS_I(inode)->io_failure_tree;
7936 ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(inode));
7937
7938 done->uptodate = 1;
7939 ASSERT(!bio_flagged(bio, BIO_CLONED));
7940 bio_for_each_segment_all(bvec, bio, iter_all)
7941 clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
7942 io_tree, done->start, bvec->bv_page,
7943 btrfs_ino(BTRFS_I(inode)), 0);
7944end:
7945 complete(&done->done);
7946 bio_put(bio);
7947}
7948
7949static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
7950 struct btrfs_io_bio *io_bio)
7951{
7952 struct btrfs_fs_info *fs_info;
7953 struct bio_vec bvec;
7954 struct bvec_iter iter;
7955 struct btrfs_retry_complete done;
7956 u64 start;
7957 unsigned int pgoff;
7958 u32 sectorsize;
7959 int nr_sectors;
7960 blk_status_t ret;
7961 blk_status_t err = BLK_STS_OK;
7962
7963 fs_info = BTRFS_I(inode)->root->fs_info;
7964 sectorsize = fs_info->sectorsize;
7965
7966 start = io_bio->logical;
7967 done.inode = inode;
7968 io_bio->bio.bi_iter = io_bio->iter;
7969
7970 bio_for_each_segment(bvec, &io_bio->bio, iter) {
7971 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
7972 pgoff = bvec.bv_offset;
7973
7974next_block_or_try_again:
7975 done.uptodate = 0;
7976 done.start = start;
7977 init_completion(&done.done);
7978
7979 ret = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
7980 pgoff, start, start + sectorsize - 1,
7981 io_bio->mirror_num,
7982 btrfs_retry_endio_nocsum, &done);
7983 if (ret) {
7984 err = ret;
7985 goto next;
7986 }
7987
7988 wait_for_completion_io(&done.done);
7989
7990 if (!done.uptodate) {
7991
7992 goto next_block_or_try_again;
7993 }
7994
7995next:
7996 start += sectorsize;
7997
7998 nr_sectors--;
7999 if (nr_sectors) {
8000 pgoff += sectorsize;
8001 ASSERT(pgoff < PAGE_SIZE);
8002 goto next_block_or_try_again;
8003 }
8004 }
8005
8006 return err;
8007}
8008
8009static void btrfs_retry_endio(struct bio *bio)
8010{
8011 struct btrfs_retry_complete *done = bio->bi_private;
8012 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8013 struct extent_io_tree *io_tree, *failure_tree;
8014 struct inode *inode = done->inode;
8015 struct bio_vec *bvec;
8016 int uptodate;
8017 int ret;
8018 int i = 0;
8019 struct bvec_iter_all iter_all;
8020
8021 if (bio->bi_status)
8022 goto end;
8023
8024 uptodate = 1;
8025
8026 ASSERT(bio->bi_vcnt == 1);
8027 ASSERT(bio_first_bvec_all(bio)->bv_len == btrfs_inode_sectorsize(done->inode));
8028
8029 io_tree = &BTRFS_I(inode)->io_tree;
8030 failure_tree = &BTRFS_I(inode)->io_failure_tree;
8031
8032 ASSERT(!bio_flagged(bio, BIO_CLONED));
8033 bio_for_each_segment_all(bvec, bio, iter_all) {
8034 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
8035 bvec->bv_offset, done->start,
8036 bvec->bv_len);
8037 if (!ret)
8038 clean_io_failure(BTRFS_I(inode)->root->fs_info,
8039 failure_tree, io_tree, done->start,
8040 bvec->bv_page,
8041 btrfs_ino(BTRFS_I(inode)),
8042 bvec->bv_offset);
8043 else
8044 uptodate = 0;
8045 i++;
8046 }
8047
8048 done->uptodate = uptodate;
8049end:
8050 complete(&done->done);
8051 bio_put(bio);
8052}
8053
8054static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
8055 struct btrfs_io_bio *io_bio, blk_status_t err)
8056{
8057 struct btrfs_fs_info *fs_info;
8058 struct bio_vec bvec;
8059 struct bvec_iter iter;
8060 struct btrfs_retry_complete done;
8061 u64 start;
8062 u64 offset = 0;
8063 u32 sectorsize;
8064 int nr_sectors;
8065 unsigned int pgoff;
8066 int csum_pos;
8067 bool uptodate = (err == 0);
8068 int ret;
8069 blk_status_t status;
8070
8071 fs_info = BTRFS_I(inode)->root->fs_info;
8072 sectorsize = fs_info->sectorsize;
8073
8074 err = BLK_STS_OK;
8075 start = io_bio->logical;
8076 done.inode = inode;
8077 io_bio->bio.bi_iter = io_bio->iter;
8078
8079 bio_for_each_segment(bvec, &io_bio->bio, iter) {
8080 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec.bv_len);
8081
8082 pgoff = bvec.bv_offset;
8083next_block:
8084 if (uptodate) {
8085 csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
8086 ret = __readpage_endio_check(inode, io_bio, csum_pos,
8087 bvec.bv_page, pgoff, start, sectorsize);
8088 if (likely(!ret))
8089 goto next;
8090 }
8091try_again:
8092 done.uptodate = 0;
8093 done.start = start;
8094 init_completion(&done.done);
8095
8096 status = dio_read_error(inode, &io_bio->bio, bvec.bv_page,
8097 pgoff, start, start + sectorsize - 1,
8098 io_bio->mirror_num, btrfs_retry_endio,
8099 &done);
8100 if (status) {
8101 err = status;
8102 goto next;
8103 }
8104
8105 wait_for_completion_io(&done.done);
8106
8107 if (!done.uptodate) {
8108
8109 goto try_again;
8110 }
8111next:
8112 offset += sectorsize;
8113 start += sectorsize;
8114
8115 ASSERT(nr_sectors);
8116
8117 nr_sectors--;
8118 if (nr_sectors) {
8119 pgoff += sectorsize;
8120 ASSERT(pgoff < PAGE_SIZE);
8121 goto next_block;
8122 }
8123 }
8124
8125 return err;
8126}
8127
8128static blk_status_t btrfs_subio_endio_read(struct inode *inode,
8129 struct btrfs_io_bio *io_bio, blk_status_t err)
8130{
8131 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8132
8133 if (skip_csum) {
8134 if (unlikely(err))
8135 return __btrfs_correct_data_nocsum(inode, io_bio);
8136 else
8137 return BLK_STS_OK;
8138 } else {
8139 return __btrfs_subio_endio_read(inode, io_bio, err);
8140 }
8141}
8142
8143static void btrfs_endio_direct_read(struct bio *bio)
8144{
8145 struct btrfs_dio_private *dip = bio->bi_private;
8146 struct inode *inode = dip->inode;
8147 struct bio *dio_bio;
8148 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8149 blk_status_t err = bio->bi_status;
8150
8151 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8152 err = btrfs_subio_endio_read(inode, io_bio, err);
8153
8154 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8155 dip->logical_offset + dip->bytes - 1);
8156 dio_bio = dip->dio_bio;
8157
8158 kfree(dip);
8159
8160 dio_bio->bi_status = err;
8161 dio_end_io(dio_bio);
8162 btrfs_io_bio_free_csum(io_bio);
8163 bio_put(bio);
8164}
8165
8166static void __endio_write_update_ordered(struct inode *inode,
8167 const u64 offset, const u64 bytes,
8168 const bool uptodate)
8169{
8170 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8171 struct btrfs_ordered_extent *ordered = NULL;
8172 struct btrfs_workqueue *wq;
8173 btrfs_work_func_t func;
8174 u64 ordered_offset = offset;
8175 u64 ordered_bytes = bytes;
8176 u64 last_offset;
8177
8178 if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
8179 wq = fs_info->endio_freespace_worker;
8180 func = btrfs_freespace_write_helper;
8181 } else {
8182 wq = fs_info->endio_write_workers;
8183 func = btrfs_endio_write_helper;
8184 }
8185
8186 while (ordered_offset < offset + bytes) {
8187 last_offset = ordered_offset;
8188 if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
8189 &ordered_offset,
8190 ordered_bytes,
8191 uptodate)) {
8192 btrfs_init_work(&ordered->work, func,
8193 finish_ordered_fn,
8194 NULL, NULL);
8195 btrfs_queue_work(wq, &ordered->work);
8196 }
8197
8198
8199
8200
8201 if (ordered_offset == last_offset)
8202 return;
8203
8204
8205
8206
8207 if (ordered_offset < offset + bytes) {
8208 ordered_bytes = offset + bytes - ordered_offset;
8209 ordered = NULL;
8210 }
8211 }
8212}
8213
8214static void btrfs_endio_direct_write(struct bio *bio)
8215{
8216 struct btrfs_dio_private *dip = bio->bi_private;
8217 struct bio *dio_bio = dip->dio_bio;
8218
8219 __endio_write_update_ordered(dip->inode, dip->logical_offset,
8220 dip->bytes, !bio->bi_status);
8221
8222 kfree(dip);
8223
8224 dio_bio->bi_status = bio->bi_status;
8225 dio_end_io(dio_bio);
8226 bio_put(bio);
8227}
8228
8229static blk_status_t btrfs_submit_bio_start_direct_io(void *private_data,
8230 struct bio *bio, u64 offset)
8231{
8232 struct inode *inode = private_data;
8233 blk_status_t ret;
8234 ret = btrfs_csum_one_bio(inode, bio, offset, 1);
8235 BUG_ON(ret);
8236 return 0;
8237}
8238
8239static void btrfs_end_dio_bio(struct bio *bio)
8240{
8241 struct btrfs_dio_private *dip = bio->bi_private;
8242 blk_status_t err = bio->bi_status;
8243
8244 if (err)
8245 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8246 "direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
8247 btrfs_ino(BTRFS_I(dip->inode)), bio_op(bio),
8248 bio->bi_opf,
8249 (unsigned long long)bio->bi_iter.bi_sector,
8250 bio->bi_iter.bi_size, err);
8251
8252 if (dip->subio_endio)
8253 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
8254
8255 if (err) {
8256
8257
8258
8259
8260
8261
8262 dip->errors = 1;
8263 }
8264
8265
8266 if (!atomic_dec_and_test(&dip->pending_bios))
8267 goto out;
8268
8269 if (dip->errors) {
8270 bio_io_error(dip->orig_bio);
8271 } else {
8272 dip->dio_bio->bi_status = BLK_STS_OK;
8273 bio_endio(dip->orig_bio);
8274 }
8275out:
8276 bio_put(bio);
8277}
8278
8279static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
8280 struct btrfs_dio_private *dip,
8281 struct bio *bio,
8282 u64 file_offset)
8283{
8284 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8285 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8286 blk_status_t ret;
8287
8288
8289
8290
8291
8292
8293 if (dip->logical_offset == file_offset) {
8294 ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio,
8295 file_offset);
8296 if (ret)
8297 return ret;
8298 }
8299
8300 if (bio == dip->orig_bio)
8301 return 0;
8302
8303 file_offset -= dip->logical_offset;
8304 file_offset >>= inode->i_sb->s_blocksize_bits;
8305 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
8306
8307 return 0;
8308}
8309
8310static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
8311 struct inode *inode, u64 file_offset, int async_submit)
8312{
8313 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8314 struct btrfs_dio_private *dip = bio->bi_private;
8315 bool write = bio_op(bio) == REQ_OP_WRITE;
8316 blk_status_t ret;
8317
8318
8319 if (async_submit)
8320 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
8321
8322 if (!write) {
8323 ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
8324 if (ret)
8325 goto err;
8326 }
8327
8328 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
8329 goto map;
8330
8331 if (write && async_submit) {
8332 ret = btrfs_wq_submit_bio(fs_info, bio, 0, 0,
8333 file_offset, inode,
8334 btrfs_submit_bio_start_direct_io);
8335 goto err;
8336 } else if (write) {
8337
8338
8339
8340
8341 ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
8342 if (ret)
8343 goto err;
8344 } else {
8345 ret = btrfs_lookup_and_bind_dio_csum(inode, dip, bio,
8346 file_offset);
8347 if (ret)
8348 goto err;
8349 }
8350map:
8351 ret = btrfs_map_bio(fs_info, bio, 0, 0);
8352err:
8353 return ret;
8354}
8355
8356static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
8357{
8358 struct inode *inode = dip->inode;
8359 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8360 struct bio *bio;
8361 struct bio *orig_bio = dip->orig_bio;
8362 u64 start_sector = orig_bio->bi_iter.bi_sector;
8363 u64 file_offset = dip->logical_offset;
8364 int async_submit = 0;
8365 u64 submit_len;
8366 int clone_offset = 0;
8367 int clone_len;
8368 int ret;
8369 blk_status_t status;
8370 struct btrfs_io_geometry geom;
8371
8372 submit_len = orig_bio->bi_iter.bi_size;
8373 ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
8374 start_sector << 9, submit_len, &geom);
8375 if (ret)
8376 return -EIO;
8377
8378 if (geom.len >= submit_len) {
8379 bio = orig_bio;
8380 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
8381 goto submit;
8382 }
8383
8384
8385 if (btrfs_data_alloc_profile(fs_info) & BTRFS_BLOCK_GROUP_RAID56_MASK)
8386 async_submit = 0;
8387 else
8388 async_submit = 1;
8389
8390
8391 ASSERT(geom.len <= INT_MAX);
8392 atomic_inc(&dip->pending_bios);
8393 do {
8394 clone_len = min_t(int, submit_len, geom.len);
8395
8396
8397
8398
8399
8400 bio = btrfs_bio_clone_partial(orig_bio, clone_offset,
8401 clone_len);
8402 bio->bi_private = dip;
8403 bio->bi_end_io = btrfs_end_dio_bio;
8404 btrfs_io_bio(bio)->logical = file_offset;
8405
8406 ASSERT(submit_len >= clone_len);
8407 submit_len -= clone_len;
8408 if (submit_len == 0)
8409 break;
8410
8411
8412
8413
8414
8415
8416
8417 atomic_inc(&dip->pending_bios);
8418
8419 status = btrfs_submit_dio_bio(bio, inode, file_offset,
8420 async_submit);
8421 if (status) {
8422 bio_put(bio);
8423 atomic_dec(&dip->pending_bios);
8424 goto out_err;
8425 }
8426
8427 clone_offset += clone_len;
8428 start_sector += clone_len >> 9;
8429 file_offset += clone_len;
8430
8431 ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
8432 start_sector << 9, submit_len, &geom);
8433 if (ret)
8434 goto out_err;
8435 } while (submit_len > 0);
8436
8437submit:
8438 status = btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
8439 if (!status)
8440 return 0;
8441
8442 bio_put(bio);
8443out_err:
8444 dip->errors = 1;
8445
8446
8447
8448
8449
8450
8451 if (atomic_dec_and_test(&dip->pending_bios))
8452 bio_io_error(dip->orig_bio);
8453
8454
8455 return 0;
8456}
8457
8458static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
8459 loff_t file_offset)
8460{
8461 struct btrfs_dio_private *dip = NULL;
8462 struct bio *bio = NULL;
8463 struct btrfs_io_bio *io_bio;
8464 bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
8465 int ret = 0;
8466
8467 bio = btrfs_bio_clone(dio_bio);
8468
8469 dip = kzalloc(sizeof(*dip), GFP_NOFS);
8470 if (!dip) {
8471 ret = -ENOMEM;
8472 goto free_ordered;
8473 }
8474
8475 dip->private = dio_bio->bi_private;
8476 dip->inode = inode;
8477 dip->logical_offset = file_offset;
8478 dip->bytes = dio_bio->bi_iter.bi_size;
8479 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
8480 bio->bi_private = dip;
8481 dip->orig_bio = bio;
8482 dip->dio_bio = dio_bio;
8483 atomic_set(&dip->pending_bios, 0);
8484 io_bio = btrfs_io_bio(bio);
8485 io_bio->logical = file_offset;
8486
8487 if (write) {
8488 bio->bi_end_io = btrfs_endio_direct_write;
8489 } else {
8490 bio->bi_end_io = btrfs_endio_direct_read;
8491 dip->subio_endio = btrfs_subio_endio_read;
8492 }
8493
8494
8495
8496
8497
8498
8499
8500 if (write) {
8501 struct btrfs_dio_data *dio_data = current->journal_info;
8502
8503 dio_data->unsubmitted_oe_range_end = dip->logical_offset +
8504 dip->bytes;
8505 dio_data->unsubmitted_oe_range_start =
8506 dio_data->unsubmitted_oe_range_end;
8507 }
8508
8509 ret = btrfs_submit_direct_hook(dip);
8510 if (!ret)
8511 return;
8512
8513 btrfs_io_bio_free_csum(io_bio);
8514
8515free_ordered:
8516
8517
8518
8519
8520
8521
8522
8523
8524
8525 if (bio && dip) {
8526 bio_io_error(bio);
8527
8528
8529
8530
8531
8532 dip = NULL;
8533 bio = NULL;
8534 } else {
8535 if (write)
8536 __endio_write_update_ordered(inode,
8537 file_offset,
8538 dio_bio->bi_iter.bi_size,
8539 false);
8540 else
8541 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8542 file_offset + dio_bio->bi_iter.bi_size - 1);
8543
8544 dio_bio->bi_status = BLK_STS_IOERR;
8545
8546
8547
8548
8549 dio_end_io(dio_bio);
8550 }
8551 if (bio)
8552 bio_put(bio);
8553 kfree(dip);
8554}
8555
8556static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
8557 const struct iov_iter *iter, loff_t offset)
8558{
8559 int seg;
8560 int i;
8561 unsigned int blocksize_mask = fs_info->sectorsize - 1;
8562 ssize_t retval = -EINVAL;
8563
8564 if (offset & blocksize_mask)
8565 goto out;
8566
8567 if (iov_iter_alignment(iter) & blocksize_mask)
8568 goto out;
8569
8570
8571 if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
8572 return 0;
8573
8574
8575
8576
8577
8578 for (seg = 0; seg < iter->nr_segs; seg++) {
8579 for (i = seg + 1; i < iter->nr_segs; i++) {
8580 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
8581 goto out;
8582 }
8583 }
8584 retval = 0;
8585out:
8586 return retval;
8587}
8588
8589static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
8590{
8591 struct file *file = iocb->ki_filp;
8592 struct inode *inode = file->f_mapping->host;
8593 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8594 struct btrfs_dio_data dio_data = { 0 };
8595 struct extent_changeset *data_reserved = NULL;
8596 loff_t offset = iocb->ki_pos;
8597 size_t count = 0;
8598 int flags = 0;
8599 bool wakeup = true;
8600 bool relock = false;
8601 ssize_t ret;
8602
8603 if (check_direct_IO(fs_info, iter, offset))
8604 return 0;
8605
8606 inode_dio_begin(inode);
8607
8608
8609
8610
8611
8612
8613
8614 count = iov_iter_count(iter);
8615 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8616 &BTRFS_I(inode)->runtime_flags))
8617 filemap_fdatawrite_range(inode->i_mapping, offset,
8618 offset + count - 1);
8619
8620 if (iov_iter_rw(iter) == WRITE) {
8621
8622
8623
8624
8625
8626 if (offset + count <= inode->i_size) {
8627 dio_data.overwrite = 1;
8628 inode_unlock(inode);
8629 relock = true;
8630 } else if (iocb->ki_flags & IOCB_NOWAIT) {
8631 ret = -EAGAIN;
8632 goto out;
8633 }
8634 ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
8635 offset, count);
8636 if (ret)
8637 goto out;
8638
8639
8640
8641
8642
8643
8644 dio_data.reserve = round_up(count,
8645 fs_info->sectorsize);
8646 dio_data.unsubmitted_oe_range_start = (u64)offset;
8647 dio_data.unsubmitted_oe_range_end = (u64)offset;
8648 current->journal_info = &dio_data;
8649 down_read(&BTRFS_I(inode)->dio_sem);
8650 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8651 &BTRFS_I(inode)->runtime_flags)) {
8652 inode_dio_end(inode);
8653 flags = DIO_LOCKING | DIO_SKIP_HOLES;
8654 wakeup = false;
8655 }
8656
8657 ret = __blockdev_direct_IO(iocb, inode,
8658 fs_info->fs_devices->latest_bdev,
8659 iter, btrfs_get_blocks_direct, NULL,
8660 btrfs_submit_direct, flags);
8661 if (iov_iter_rw(iter) == WRITE) {
8662 up_read(&BTRFS_I(inode)->dio_sem);
8663 current->journal_info = NULL;
8664 if (ret < 0 && ret != -EIOCBQUEUED) {
8665 if (dio_data.reserve)
8666 btrfs_delalloc_release_space(inode, data_reserved,
8667 offset, dio_data.reserve, true);
8668
8669
8670
8671
8672
8673
8674 if (dio_data.unsubmitted_oe_range_start <
8675 dio_data.unsubmitted_oe_range_end)
8676 __endio_write_update_ordered(inode,
8677 dio_data.unsubmitted_oe_range_start,
8678 dio_data.unsubmitted_oe_range_end -
8679 dio_data.unsubmitted_oe_range_start,
8680 false);
8681 } else if (ret >= 0 && (size_t)ret < count)
8682 btrfs_delalloc_release_space(inode, data_reserved,
8683 offset, count - (size_t)ret, true);
8684 btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
8685 }
8686out:
8687 if (wakeup)
8688 inode_dio_end(inode);
8689 if (relock)
8690 inode_lock(inode);
8691
8692 extent_changeset_free(data_reserved);
8693 return ret;
8694}
8695
8696#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
8697
8698static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8699 __u64 start, __u64 len)
8700{
8701 int ret;
8702
8703 ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
8704 if (ret)
8705 return ret;
8706
8707 return extent_fiemap(inode, fieinfo, start, len);
8708}
8709
8710int btrfs_readpage(struct file *file, struct page *page)
8711{
8712 struct extent_io_tree *tree;
8713 tree = &BTRFS_I(page->mapping->host)->io_tree;
8714 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
8715}
8716
8717static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8718{
8719 struct inode *inode = page->mapping->host;
8720 int ret;
8721
8722 if (current->flags & PF_MEMALLOC) {
8723 redirty_page_for_writepage(wbc, page);
8724 unlock_page(page);
8725 return 0;
8726 }
8727
8728
8729
8730
8731
8732
8733 if (!igrab(inode)) {
8734 redirty_page_for_writepage(wbc, page);
8735 return AOP_WRITEPAGE_ACTIVATE;
8736 }
8737 ret = extent_write_full_page(page, wbc);
8738 btrfs_add_delayed_iput(inode);
8739 return ret;
8740}
8741
8742static int btrfs_writepages(struct address_space *mapping,
8743 struct writeback_control *wbc)
8744{
8745 return extent_writepages(mapping, wbc);
8746}
8747
8748static int
8749btrfs_readpages(struct file *file, struct address_space *mapping,
8750 struct list_head *pages, unsigned nr_pages)
8751{
8752 return extent_readpages(mapping, pages, nr_pages);
8753}
8754
8755static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8756{
8757 int ret = try_release_extent_mapping(page, gfp_flags);
8758 if (ret == 1) {
8759 ClearPagePrivate(page);
8760 set_page_private(page, 0);
8761 put_page(page);
8762 }
8763 return ret;
8764}
8765
8766static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8767{
8768 if (PageWriteback(page) || PageDirty(page))
8769 return 0;
8770 return __btrfs_releasepage(page, gfp_flags);
8771}
8772
8773static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8774 unsigned int length)
8775{
8776 struct inode *inode = page->mapping->host;
8777 struct extent_io_tree *tree;
8778 struct btrfs_ordered_extent *ordered;
8779 struct extent_state *cached_state = NULL;
8780 u64 page_start = page_offset(page);
8781 u64 page_end = page_start + PAGE_SIZE - 1;
8782 u64 start;
8783 u64 end;
8784 int inode_evicting = inode->i_state & I_FREEING;
8785
8786
8787
8788
8789
8790
8791
8792
8793 wait_on_page_writeback(page);
8794
8795 tree = &BTRFS_I(inode)->io_tree;
8796 if (offset) {
8797 btrfs_releasepage(page, GFP_NOFS);
8798 return;
8799 }
8800
8801 if (!inode_evicting)
8802 lock_extent_bits(tree, page_start, page_end, &cached_state);
8803again:
8804 start = page_start;
8805 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
8806 page_end - start + 1);
8807 if (ordered) {
8808 end = min(page_end, ordered->file_offset + ordered->len - 1);
8809
8810
8811
8812
8813 if (!inode_evicting)
8814 clear_extent_bit(tree, start, end,
8815 EXTENT_DIRTY | EXTENT_DELALLOC |
8816 EXTENT_DELALLOC_NEW |
8817 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8818 EXTENT_DEFRAG, 1, 0, &cached_state);
8819
8820
8821
8822
8823 if (TestClearPagePrivate2(page)) {
8824 struct btrfs_ordered_inode_tree *tree;
8825 u64 new_len;
8826
8827 tree = &BTRFS_I(inode)->ordered_tree;
8828
8829 spin_lock_irq(&tree->lock);
8830 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8831 new_len = start - ordered->file_offset;
8832 if (new_len < ordered->truncated_len)
8833 ordered->truncated_len = new_len;
8834 spin_unlock_irq(&tree->lock);
8835
8836 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8837 start,
8838 end - start + 1, 1))
8839 btrfs_finish_ordered_io(ordered);
8840 }
8841 btrfs_put_ordered_extent(ordered);
8842 if (!inode_evicting) {
8843 cached_state = NULL;
8844 lock_extent_bits(tree, start, end,
8845 &cached_state);
8846 }
8847
8848 start = end + 1;
8849 if (start < page_end)
8850 goto again;
8851 }
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868 if (PageDirty(page))
8869 btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
8870 if (!inode_evicting) {
8871 clear_extent_bit(tree, page_start, page_end,
8872 EXTENT_LOCKED | EXTENT_DIRTY |
8873 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
8874 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
8875 &cached_state);
8876
8877 __btrfs_releasepage(page, GFP_NOFS);
8878 }
8879
8880 ClearPageChecked(page);
8881 if (PagePrivate(page)) {
8882 ClearPagePrivate(page);
8883 set_page_private(page, 0);
8884 put_page(page);
8885 }
8886}
8887
8888
8889
8890
8891
8892
8893
8894
8895
8896
8897
8898
8899
8900
8901
8902
8903vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
8904{
8905 struct page *page = vmf->page;
8906 struct inode *inode = file_inode(vmf->vma->vm_file);
8907 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
8908 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8909 struct btrfs_ordered_extent *ordered;
8910 struct extent_state *cached_state = NULL;
8911 struct extent_changeset *data_reserved = NULL;
8912 char *kaddr;
8913 unsigned long zero_start;
8914 loff_t size;
8915 vm_fault_t ret;
8916 int ret2;
8917 int reserved = 0;
8918 u64 reserved_space;
8919 u64 page_start;
8920 u64 page_end;
8921 u64 end;
8922
8923 reserved_space = PAGE_SIZE;
8924
8925 sb_start_pagefault(inode->i_sb);
8926 page_start = page_offset(page);
8927 page_end = page_start + PAGE_SIZE - 1;
8928 end = page_end;
8929
8930
8931
8932
8933
8934
8935
8936
8937
8938 ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
8939 reserved_space);
8940 if (!ret2) {
8941 ret2 = file_update_time(vmf->vma->vm_file);
8942 reserved = 1;
8943 }
8944 if (ret2) {
8945 ret = vmf_error(ret2);
8946 if (reserved)
8947 goto out;
8948 goto out_noreserve;
8949 }
8950
8951 ret = VM_FAULT_NOPAGE;
8952again:
8953 lock_page(page);
8954 size = i_size_read(inode);
8955
8956 if ((page->mapping != inode->i_mapping) ||
8957 (page_start >= size)) {
8958
8959 goto out_unlock;
8960 }
8961 wait_on_page_writeback(page);
8962
8963 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8964 set_page_extent_mapped(page);
8965
8966
8967
8968
8969
8970 ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
8971 PAGE_SIZE);
8972 if (ordered) {
8973 unlock_extent_cached(io_tree, page_start, page_end,
8974 &cached_state);
8975 unlock_page(page);
8976 btrfs_start_ordered_extent(inode, ordered, 1);
8977 btrfs_put_ordered_extent(ordered);
8978 goto again;
8979 }
8980
8981 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8982 reserved_space = round_up(size - page_start,
8983 fs_info->sectorsize);
8984 if (reserved_space < PAGE_SIZE) {
8985 end = page_start + reserved_space - 1;
8986 btrfs_delalloc_release_space(inode, data_reserved,
8987 page_start, PAGE_SIZE - reserved_space,
8988 true);
8989 }
8990 }
8991
8992
8993
8994
8995
8996
8997
8998
8999 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
9000 EXTENT_DIRTY | EXTENT_DELALLOC |
9001 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
9002 0, 0, &cached_state);
9003
9004 ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
9005 &cached_state, 0);
9006 if (ret2) {
9007 unlock_extent_cached(io_tree, page_start, page_end,
9008 &cached_state);
9009 ret = VM_FAULT_SIGBUS;
9010 goto out_unlock;
9011 }
9012 ret2 = 0;
9013
9014
9015 if (page_start + PAGE_SIZE > size)
9016 zero_start = offset_in_page(size);
9017 else
9018 zero_start = PAGE_SIZE;
9019
9020 if (zero_start != PAGE_SIZE) {
9021 kaddr = kmap(page);
9022 memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
9023 flush_dcache_page(page);
9024 kunmap(page);
9025 }
9026 ClearPageChecked(page);
9027 set_page_dirty(page);
9028 SetPageUptodate(page);
9029
9030 BTRFS_I(inode)->last_trans = fs_info->generation;
9031 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
9032 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
9033
9034 unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
9035
9036 if (!ret2) {
9037 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
9038 sb_end_pagefault(inode->i_sb);
9039 extent_changeset_free(data_reserved);
9040 return VM_FAULT_LOCKED;
9041 }
9042
9043out_unlock:
9044 unlock_page(page);
9045out:
9046 btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
9047 btrfs_delalloc_release_space(inode, data_reserved, page_start,
9048 reserved_space, (ret != 0));
9049out_noreserve:
9050 sb_end_pagefault(inode->i_sb);
9051 extent_changeset_free(data_reserved);
9052 return ret;
9053}
9054
9055static int btrfs_truncate(struct inode *inode, bool skip_writeback)
9056{
9057 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9058 struct btrfs_root *root = BTRFS_I(inode)->root;
9059 struct btrfs_block_rsv *rsv;
9060 int ret;
9061 struct btrfs_trans_handle *trans;
9062 u64 mask = fs_info->sectorsize - 1;
9063 u64 min_size = btrfs_calc_trunc_metadata_size(fs_info, 1);
9064
9065 if (!skip_writeback) {
9066 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
9067 (u64)-1);
9068 if (ret)
9069 return ret;
9070 }
9071
9072
9073
9074
9075
9076
9077
9078
9079
9080
9081
9082
9083
9084
9085
9086
9087
9088
9089
9090
9091
9092
9093
9094
9095
9096
9097
9098
9099
9100 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
9101 if (!rsv)
9102 return -ENOMEM;
9103 rsv->size = min_size;
9104 rsv->failfast = 1;
9105
9106
9107
9108
9109
9110 trans = btrfs_start_transaction(root, 2);
9111 if (IS_ERR(trans)) {
9112 ret = PTR_ERR(trans);
9113 goto out;
9114 }
9115
9116
9117 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
9118 min_size, false);
9119 BUG_ON(ret);
9120
9121
9122
9123
9124
9125
9126
9127
9128 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
9129 trans->block_rsv = rsv;
9130
9131 while (1) {
9132 ret = btrfs_truncate_inode_items(trans, root, inode,
9133 inode->i_size,
9134 BTRFS_EXTENT_DATA_KEY);
9135 trans->block_rsv = &fs_info->trans_block_rsv;
9136 if (ret != -ENOSPC && ret != -EAGAIN)
9137 break;
9138
9139 ret = btrfs_update_inode(trans, root, inode);
9140 if (ret)
9141 break;
9142
9143 btrfs_end_transaction(trans);
9144 btrfs_btree_balance_dirty(fs_info);
9145
9146 trans = btrfs_start_transaction(root, 2);
9147 if (IS_ERR(trans)) {
9148 ret = PTR_ERR(trans);
9149 trans = NULL;
9150 break;
9151 }
9152
9153 btrfs_block_rsv_release(fs_info, rsv, -1);
9154 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
9155 rsv, min_size, false);
9156 BUG_ON(ret);
9157 trans->block_rsv = rsv;
9158 }
9159
9160
9161
9162
9163
9164
9165
9166 if (ret == NEED_TRUNCATE_BLOCK) {
9167 btrfs_end_transaction(trans);
9168 btrfs_btree_balance_dirty(fs_info);
9169
9170 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
9171 if (ret)
9172 goto out;
9173 trans = btrfs_start_transaction(root, 1);
9174 if (IS_ERR(trans)) {
9175 ret = PTR_ERR(trans);
9176 goto out;
9177 }
9178 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
9179 }
9180
9181 if (trans) {
9182 int ret2;
9183
9184 trans->block_rsv = &fs_info->trans_block_rsv;
9185 ret2 = btrfs_update_inode(trans, root, inode);
9186 if (ret2 && !ret)
9187 ret = ret2;
9188
9189 ret2 = btrfs_end_transaction(trans);
9190 if (ret2 && !ret)
9191 ret = ret2;
9192 btrfs_btree_balance_dirty(fs_info);
9193 }
9194out:
9195 btrfs_free_block_rsv(fs_info, rsv);
9196
9197 return ret;
9198}
9199
9200
9201
9202
9203int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
9204 struct btrfs_root *new_root,
9205 struct btrfs_root *parent_root,
9206 u64 new_dirid)
9207{
9208 struct inode *inode;
9209 int err;
9210 u64 index = 0;
9211
9212 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
9213 new_dirid, new_dirid,
9214 S_IFDIR | (~current_umask() & S_IRWXUGO),
9215 &index);
9216 if (IS_ERR(inode))
9217 return PTR_ERR(inode);
9218 inode->i_op = &btrfs_dir_inode_operations;
9219 inode->i_fop = &btrfs_dir_file_operations;
9220
9221 set_nlink(inode, 1);
9222 btrfs_i_size_write(BTRFS_I(inode), 0);
9223 unlock_new_inode(inode);
9224
9225 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
9226 if (err)
9227 btrfs_err(new_root->fs_info,
9228 "error inheriting subvolume %llu properties: %d",
9229 new_root->root_key.objectid, err);
9230
9231 err = btrfs_update_inode(trans, new_root, inode);
9232
9233 iput(inode);
9234 return err;
9235}
9236
9237struct inode *btrfs_alloc_inode(struct super_block *sb)
9238{
9239 struct btrfs_fs_info *fs_info = btrfs_sb(sb);
9240 struct btrfs_inode *ei;
9241 struct inode *inode;
9242
9243 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
9244 if (!ei)
9245 return NULL;
9246
9247 ei->root = NULL;
9248 ei->generation = 0;
9249 ei->last_trans = 0;
9250 ei->last_sub_trans = 0;
9251 ei->logged_trans = 0;
9252 ei->delalloc_bytes = 0;
9253 ei->new_delalloc_bytes = 0;
9254 ei->defrag_bytes = 0;
9255 ei->disk_i_size = 0;
9256 ei->flags = 0;
9257 ei->csum_bytes = 0;
9258 ei->index_cnt = (u64)-1;
9259 ei->dir_index = 0;
9260 ei->last_unlink_trans = 0;
9261 ei->last_log_commit = 0;
9262
9263 spin_lock_init(&ei->lock);
9264 ei->outstanding_extents = 0;
9265 if (sb->s_magic != BTRFS_TEST_MAGIC)
9266 btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
9267 BTRFS_BLOCK_RSV_DELALLOC);
9268 ei->runtime_flags = 0;
9269 ei->prop_compress = BTRFS_COMPRESS_NONE;
9270 ei->defrag_compress = BTRFS_COMPRESS_NONE;
9271
9272 ei->delayed_node = NULL;
9273
9274 ei->i_otime.tv_sec = 0;
9275 ei->i_otime.tv_nsec = 0;
9276
9277 inode = &ei->vfs_inode;
9278 extent_map_tree_init(&ei->extent_tree);
9279 extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
9280 extent_io_tree_init(fs_info, &ei->io_failure_tree,
9281 IO_TREE_INODE_IO_FAILURE, inode);
9282 ei->io_tree.track_uptodate = true;
9283 ei->io_failure_tree.track_uptodate = true;
9284 atomic_set(&ei->sync_writers, 0);
9285 mutex_init(&ei->log_mutex);
9286 mutex_init(&ei->delalloc_mutex);
9287 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
9288 INIT_LIST_HEAD(&ei->delalloc_inodes);
9289 INIT_LIST_HEAD(&ei->delayed_iput);
9290 RB_CLEAR_NODE(&ei->rb_node);
9291 init_rwsem(&ei->dio_sem);
9292
9293 return inode;
9294}
9295
9296#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
9297void btrfs_test_destroy_inode(struct inode *inode)
9298{
9299 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9300 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9301}
9302#endif
9303
9304void btrfs_free_inode(struct inode *inode)
9305{
9306 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9307}
9308
9309void btrfs_destroy_inode(struct inode *inode)
9310{
9311 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
9312 struct btrfs_ordered_extent *ordered;
9313 struct btrfs_root *root = BTRFS_I(inode)->root;
9314
9315 WARN_ON(!hlist_empty(&inode->i_dentry));
9316 WARN_ON(inode->i_data.nrpages);
9317 WARN_ON(BTRFS_I(inode)->block_rsv.reserved);
9318 WARN_ON(BTRFS_I(inode)->block_rsv.size);
9319 WARN_ON(BTRFS_I(inode)->outstanding_extents);
9320 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
9321 WARN_ON(BTRFS_I(inode)->new_delalloc_bytes);
9322 WARN_ON(BTRFS_I(inode)->csum_bytes);
9323 WARN_ON(BTRFS_I(inode)->defrag_bytes);
9324
9325
9326
9327
9328
9329
9330 if (!root)
9331 return;
9332
9333 while (1) {
9334 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
9335 if (!ordered)
9336 break;
9337 else {
9338 btrfs_err(fs_info,
9339 "found ordered extent %llu %llu on inode cleanup",
9340 ordered->file_offset, ordered->len);
9341 btrfs_remove_ordered_extent(inode, ordered);
9342 btrfs_put_ordered_extent(ordered);
9343 btrfs_put_ordered_extent(ordered);
9344 }
9345 }
9346 btrfs_qgroup_check_reserved_leak(inode);
9347 inode_tree_del(inode);
9348 btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
9349}
9350
9351int btrfs_drop_inode(struct inode *inode)
9352{
9353 struct btrfs_root *root = BTRFS_I(inode)->root;
9354
9355 if (root == NULL)
9356 return 1;
9357
9358
9359 if (btrfs_root_refs(&root->root_item) == 0)
9360 return 1;
9361 else
9362 return generic_drop_inode(inode);
9363}
9364
9365static void init_once(void *foo)
9366{
9367 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
9368
9369 inode_init_once(&ei->vfs_inode);
9370}
9371
9372void __cold btrfs_destroy_cachep(void)
9373{
9374
9375
9376
9377
9378 rcu_barrier();
9379 kmem_cache_destroy(btrfs_inode_cachep);
9380 kmem_cache_destroy(btrfs_trans_handle_cachep);
9381 kmem_cache_destroy(btrfs_path_cachep);
9382 kmem_cache_destroy(btrfs_free_space_cachep);
9383}
9384
9385int __init btrfs_init_cachep(void)
9386{
9387 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9388 sizeof(struct btrfs_inode), 0,
9389 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
9390 init_once);
9391 if (!btrfs_inode_cachep)
9392 goto fail;
9393
9394 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9395 sizeof(struct btrfs_trans_handle), 0,
9396 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9397 if (!btrfs_trans_handle_cachep)
9398 goto fail;
9399
9400 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9401 sizeof(struct btrfs_path), 0,
9402 SLAB_MEM_SPREAD, NULL);
9403 if (!btrfs_path_cachep)
9404 goto fail;
9405
9406 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9407 sizeof(struct btrfs_free_space), 0,
9408 SLAB_MEM_SPREAD, NULL);
9409 if (!btrfs_free_space_cachep)
9410 goto fail;
9411
9412 return 0;
9413fail:
9414 btrfs_destroy_cachep();
9415 return -ENOMEM;
9416}
9417
9418static int btrfs_getattr(const struct path *path, struct kstat *stat,
9419 u32 request_mask, unsigned int flags)
9420{
9421 u64 delalloc_bytes;
9422 struct inode *inode = d_inode(path->dentry);
9423 u32 blocksize = inode->i_sb->s_blocksize;
9424 u32 bi_flags = BTRFS_I(inode)->flags;
9425
9426 stat->result_mask |= STATX_BTIME;
9427 stat->btime.tv_sec = BTRFS_I(inode)->i_otime.tv_sec;
9428 stat->btime.tv_nsec = BTRFS_I(inode)->i_otime.tv_nsec;
9429 if (bi_flags & BTRFS_INODE_APPEND)
9430 stat->attributes |= STATX_ATTR_APPEND;
9431 if (bi_flags & BTRFS_INODE_COMPRESS)
9432 stat->attributes |= STATX_ATTR_COMPRESSED;
9433 if (bi_flags & BTRFS_INODE_IMMUTABLE)
9434 stat->attributes |= STATX_ATTR_IMMUTABLE;
9435 if (bi_flags & BTRFS_INODE_NODUMP)
9436 stat->attributes |= STATX_ATTR_NODUMP;
9437
9438 stat->attributes_mask |= (STATX_ATTR_APPEND |
9439 STATX_ATTR_COMPRESSED |
9440 STATX_ATTR_IMMUTABLE |
9441 STATX_ATTR_NODUMP);
9442
9443 generic_fillattr(inode, stat);
9444 stat->dev = BTRFS_I(inode)->root->anon_dev;
9445
9446 spin_lock(&BTRFS_I(inode)->lock);
9447 delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
9448 spin_unlock(&BTRFS_I(inode)->lock);
9449 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
9450 ALIGN(delalloc_bytes, blocksize)) >> 9;
9451 return 0;
9452}
9453
9454static int btrfs_rename_exchange(struct inode *old_dir,
9455 struct dentry *old_dentry,
9456 struct inode *new_dir,
9457 struct dentry *new_dentry)
9458{
9459 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9460 struct btrfs_trans_handle *trans;
9461 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9462 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9463 struct inode *new_inode = new_dentry->d_inode;
9464 struct inode *old_inode = old_dentry->d_inode;
9465 struct timespec64 ctime = current_time(old_inode);
9466 struct dentry *parent;
9467 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9468 u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
9469 u64 old_idx = 0;
9470 u64 new_idx = 0;
9471 u64 root_objectid;
9472 int ret;
9473 bool root_log_pinned = false;
9474 bool dest_log_pinned = false;
9475 struct btrfs_log_ctx ctx_root;
9476 struct btrfs_log_ctx ctx_dest;
9477 bool sync_log_root = false;
9478 bool sync_log_dest = false;
9479 bool commit_transaction = false;
9480
9481
9482 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9483 return -EXDEV;
9484
9485 btrfs_init_log_ctx(&ctx_root, old_inode);
9486 btrfs_init_log_ctx(&ctx_dest, new_inode);
9487
9488
9489 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9490 down_read(&fs_info->subvol_sem);
9491 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9492 down_read(&fs_info->subvol_sem);
9493
9494
9495
9496
9497
9498
9499
9500
9501
9502 trans = btrfs_start_transaction(root, 12);
9503 if (IS_ERR(trans)) {
9504 ret = PTR_ERR(trans);
9505 goto out_notrans;
9506 }
9507
9508
9509
9510
9511
9512 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &old_idx);
9513 if (ret)
9514 goto out_fail;
9515 ret = btrfs_set_inode_index(BTRFS_I(old_dir), &new_idx);
9516 if (ret)
9517 goto out_fail;
9518
9519 BTRFS_I(old_inode)->dir_index = 0ULL;
9520 BTRFS_I(new_inode)->dir_index = 0ULL;
9521
9522
9523 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9524
9525 btrfs_set_log_full_commit(trans);
9526 } else {
9527 btrfs_pin_log_trans(root);
9528 root_log_pinned = true;
9529 ret = btrfs_insert_inode_ref(trans, dest,
9530 new_dentry->d_name.name,
9531 new_dentry->d_name.len,
9532 old_ino,
9533 btrfs_ino(BTRFS_I(new_dir)),
9534 old_idx);
9535 if (ret)
9536 goto out_fail;
9537 }
9538
9539
9540 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9541
9542 btrfs_set_log_full_commit(trans);
9543 } else {
9544 btrfs_pin_log_trans(dest);
9545 dest_log_pinned = true;
9546 ret = btrfs_insert_inode_ref(trans, root,
9547 old_dentry->d_name.name,
9548 old_dentry->d_name.len,
9549 new_ino,
9550 btrfs_ino(BTRFS_I(old_dir)),
9551 new_idx);
9552 if (ret)
9553 goto out_fail;
9554 }
9555
9556
9557 inode_inc_iversion(old_dir);
9558 inode_inc_iversion(new_dir);
9559 inode_inc_iversion(old_inode);
9560 inode_inc_iversion(new_inode);
9561 old_dir->i_ctime = old_dir->i_mtime = ctime;
9562 new_dir->i_ctime = new_dir->i_mtime = ctime;
9563 old_inode->i_ctime = ctime;
9564 new_inode->i_ctime = ctime;
9565
9566 if (old_dentry->d_parent != new_dentry->d_parent) {
9567 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9568 BTRFS_I(old_inode), 1);
9569 btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
9570 BTRFS_I(new_inode), 1);
9571 }
9572
9573
9574 if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
9575 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9576 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9577 old_dentry->d_name.name,
9578 old_dentry->d_name.len);
9579 } else {
9580 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9581 BTRFS_I(old_dentry->d_inode),
9582 old_dentry->d_name.name,
9583 old_dentry->d_name.len);
9584 if (!ret)
9585 ret = btrfs_update_inode(trans, root, old_inode);
9586 }
9587 if (ret) {
9588 btrfs_abort_transaction(trans, ret);
9589 goto out_fail;
9590 }
9591
9592
9593 if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
9594 root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
9595 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9596 new_dentry->d_name.name,
9597 new_dentry->d_name.len);
9598 } else {
9599 ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9600 BTRFS_I(new_dentry->d_inode),
9601 new_dentry->d_name.name,
9602 new_dentry->d_name.len);
9603 if (!ret)
9604 ret = btrfs_update_inode(trans, dest, new_inode);
9605 }
9606 if (ret) {
9607 btrfs_abort_transaction(trans, ret);
9608 goto out_fail;
9609 }
9610
9611 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9612 new_dentry->d_name.name,
9613 new_dentry->d_name.len, 0, old_idx);
9614 if (ret) {
9615 btrfs_abort_transaction(trans, ret);
9616 goto out_fail;
9617 }
9618
9619 ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
9620 old_dentry->d_name.name,
9621 old_dentry->d_name.len, 0, new_idx);
9622 if (ret) {
9623 btrfs_abort_transaction(trans, ret);
9624 goto out_fail;
9625 }
9626
9627 if (old_inode->i_nlink == 1)
9628 BTRFS_I(old_inode)->dir_index = old_idx;
9629 if (new_inode->i_nlink == 1)
9630 BTRFS_I(new_inode)->dir_index = new_idx;
9631
9632 if (root_log_pinned) {
9633 parent = new_dentry->d_parent;
9634 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9635 BTRFS_I(old_dir), parent,
9636 false, &ctx_root);
9637 if (ret == BTRFS_NEED_LOG_SYNC)
9638 sync_log_root = true;
9639 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9640 commit_transaction = true;
9641 ret = 0;
9642 btrfs_end_log_trans(root);
9643 root_log_pinned = false;
9644 }
9645 if (dest_log_pinned) {
9646 if (!commit_transaction) {
9647 parent = old_dentry->d_parent;
9648 ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
9649 BTRFS_I(new_dir), parent,
9650 false, &ctx_dest);
9651 if (ret == BTRFS_NEED_LOG_SYNC)
9652 sync_log_dest = true;
9653 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9654 commit_transaction = true;
9655 ret = 0;
9656 }
9657 btrfs_end_log_trans(dest);
9658 dest_log_pinned = false;
9659 }
9660out_fail:
9661
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672 if (ret && (root_log_pinned || dest_log_pinned)) {
9673 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9674 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9675 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9676 (new_inode &&
9677 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9678 btrfs_set_log_full_commit(trans);
9679
9680 if (root_log_pinned) {
9681 btrfs_end_log_trans(root);
9682 root_log_pinned = false;
9683 }
9684 if (dest_log_pinned) {
9685 btrfs_end_log_trans(dest);
9686 dest_log_pinned = false;
9687 }
9688 }
9689 if (!ret && sync_log_root && !commit_transaction) {
9690 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
9691 &ctx_root);
9692 if (ret)
9693 commit_transaction = true;
9694 }
9695 if (!ret && sync_log_dest && !commit_transaction) {
9696 ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
9697 &ctx_dest);
9698 if (ret)
9699 commit_transaction = true;
9700 }
9701 if (commit_transaction) {
9702 ret = btrfs_commit_transaction(trans);
9703 } else {
9704 int ret2;
9705
9706 ret2 = btrfs_end_transaction(trans);
9707 ret = ret ? ret : ret2;
9708 }
9709out_notrans:
9710 if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
9711 up_read(&fs_info->subvol_sem);
9712 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9713 up_read(&fs_info->subvol_sem);
9714
9715 return ret;
9716}
9717
9718static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
9719 struct btrfs_root *root,
9720 struct inode *dir,
9721 struct dentry *dentry)
9722{
9723 int ret;
9724 struct inode *inode;
9725 u64 objectid;
9726 u64 index;
9727
9728 ret = btrfs_find_free_ino(root, &objectid);
9729 if (ret)
9730 return ret;
9731
9732 inode = btrfs_new_inode(trans, root, dir,
9733 dentry->d_name.name,
9734 dentry->d_name.len,
9735 btrfs_ino(BTRFS_I(dir)),
9736 objectid,
9737 S_IFCHR | WHITEOUT_MODE,
9738 &index);
9739
9740 if (IS_ERR(inode)) {
9741 ret = PTR_ERR(inode);
9742 return ret;
9743 }
9744
9745 inode->i_op = &btrfs_special_inode_operations;
9746 init_special_inode(inode, inode->i_mode,
9747 WHITEOUT_DEV);
9748
9749 ret = btrfs_init_inode_security(trans, inode, dir,
9750 &dentry->d_name);
9751 if (ret)
9752 goto out;
9753
9754 ret = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
9755 BTRFS_I(inode), 0, index);
9756 if (ret)
9757 goto out;
9758
9759 ret = btrfs_update_inode(trans, root, inode);
9760out:
9761 unlock_new_inode(inode);
9762 if (ret)
9763 inode_dec_link_count(inode);
9764 iput(inode);
9765
9766 return ret;
9767}
9768
9769static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9770 struct inode *new_dir, struct dentry *new_dentry,
9771 unsigned int flags)
9772{
9773 struct btrfs_fs_info *fs_info = btrfs_sb(old_dir->i_sb);
9774 struct btrfs_trans_handle *trans;
9775 unsigned int trans_num_items;
9776 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9777 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9778 struct inode *new_inode = d_inode(new_dentry);
9779 struct inode *old_inode = d_inode(old_dentry);
9780 u64 index = 0;
9781 u64 root_objectid;
9782 int ret;
9783 u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
9784 bool log_pinned = false;
9785 struct btrfs_log_ctx ctx;
9786 bool sync_log = false;
9787 bool commit_transaction = false;
9788
9789 if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9790 return -EPERM;
9791
9792
9793 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9794 return -EXDEV;
9795
9796 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9797 (new_inode && btrfs_ino(BTRFS_I(new_inode)) == BTRFS_FIRST_FREE_OBJECTID))
9798 return -ENOTEMPTY;
9799
9800 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9801 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9802 return -ENOTEMPTY;
9803
9804
9805
9806 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9807 new_dentry->d_name.name,
9808 new_dentry->d_name.len);
9809
9810 if (ret) {
9811 if (ret == -EEXIST) {
9812
9813
9814 if (WARN_ON(!new_inode)) {
9815 return ret;
9816 }
9817 } else {
9818
9819 return ret;
9820 }
9821 }
9822 ret = 0;
9823
9824
9825
9826
9827
9828 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9829 filemap_flush(old_inode->i_mapping);
9830
9831
9832 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9833 down_read(&fs_info->subvol_sem);
9834
9835
9836
9837
9838
9839
9840
9841
9842
9843
9844
9845 trans_num_items = 11;
9846 if (flags & RENAME_WHITEOUT)
9847 trans_num_items += 5;
9848 trans = btrfs_start_transaction(root, trans_num_items);
9849 if (IS_ERR(trans)) {
9850 ret = PTR_ERR(trans);
9851 goto out_notrans;
9852 }
9853
9854 if (dest != root)
9855 btrfs_record_root_in_trans(trans, dest);
9856
9857 ret = btrfs_set_inode_index(BTRFS_I(new_dir), &index);
9858 if (ret)
9859 goto out_fail;
9860
9861 BTRFS_I(old_inode)->dir_index = 0ULL;
9862 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9863
9864 btrfs_set_log_full_commit(trans);
9865 } else {
9866 btrfs_pin_log_trans(root);
9867 log_pinned = true;
9868 ret = btrfs_insert_inode_ref(trans, dest,
9869 new_dentry->d_name.name,
9870 new_dentry->d_name.len,
9871 old_ino,
9872 btrfs_ino(BTRFS_I(new_dir)), index);
9873 if (ret)
9874 goto out_fail;
9875 }
9876
9877 inode_inc_iversion(old_dir);
9878 inode_inc_iversion(new_dir);
9879 inode_inc_iversion(old_inode);
9880 old_dir->i_ctime = old_dir->i_mtime =
9881 new_dir->i_ctime = new_dir->i_mtime =
9882 old_inode->i_ctime = current_time(old_dir);
9883
9884 if (old_dentry->d_parent != new_dentry->d_parent)
9885 btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
9886 BTRFS_I(old_inode), 1);
9887
9888 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9889 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9890 ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
9891 old_dentry->d_name.name,
9892 old_dentry->d_name.len);
9893 } else {
9894 ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
9895 BTRFS_I(d_inode(old_dentry)),
9896 old_dentry->d_name.name,
9897 old_dentry->d_name.len);
9898 if (!ret)
9899 ret = btrfs_update_inode(trans, root, old_inode);
9900 }
9901 if (ret) {
9902 btrfs_abort_transaction(trans, ret);
9903 goto out_fail;
9904 }
9905
9906 if (new_inode) {
9907 inode_inc_iversion(new_inode);
9908 new_inode->i_ctime = current_time(new_inode);
9909 if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
9910 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9911 root_objectid = BTRFS_I(new_inode)->location.objectid;
9912 ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
9913 new_dentry->d_name.name,
9914 new_dentry->d_name.len);
9915 BUG_ON(new_inode->i_nlink == 0);
9916 } else {
9917 ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
9918 BTRFS_I(d_inode(new_dentry)),
9919 new_dentry->d_name.name,
9920 new_dentry->d_name.len);
9921 }
9922 if (!ret && new_inode->i_nlink == 0)
9923 ret = btrfs_orphan_add(trans,
9924 BTRFS_I(d_inode(new_dentry)));
9925 if (ret) {
9926 btrfs_abort_transaction(trans, ret);
9927 goto out_fail;
9928 }
9929 }
9930
9931 ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
9932 new_dentry->d_name.name,
9933 new_dentry->d_name.len, 0, index);
9934 if (ret) {
9935 btrfs_abort_transaction(trans, ret);
9936 goto out_fail;
9937 }
9938
9939 if (old_inode->i_nlink == 1)
9940 BTRFS_I(old_inode)->dir_index = index;
9941
9942 if (log_pinned) {
9943 struct dentry *parent = new_dentry->d_parent;
9944
9945 btrfs_init_log_ctx(&ctx, old_inode);
9946 ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
9947 BTRFS_I(old_dir), parent,
9948 false, &ctx);
9949 if (ret == BTRFS_NEED_LOG_SYNC)
9950 sync_log = true;
9951 else if (ret == BTRFS_NEED_TRANS_COMMIT)
9952 commit_transaction = true;
9953 ret = 0;
9954 btrfs_end_log_trans(root);
9955 log_pinned = false;
9956 }
9957
9958 if (flags & RENAME_WHITEOUT) {
9959 ret = btrfs_whiteout_for_rename(trans, root, old_dir,
9960 old_dentry);
9961
9962 if (ret) {
9963 btrfs_abort_transaction(trans, ret);
9964 goto out_fail;
9965 }
9966 }
9967out_fail:
9968
9969
9970
9971
9972
9973
9974
9975
9976
9977
9978
9979 if (ret && log_pinned) {
9980 if (btrfs_inode_in_log(BTRFS_I(old_dir), fs_info->generation) ||
9981 btrfs_inode_in_log(BTRFS_I(new_dir), fs_info->generation) ||
9982 btrfs_inode_in_log(BTRFS_I(old_inode), fs_info->generation) ||
9983 (new_inode &&
9984 btrfs_inode_in_log(BTRFS_I(new_inode), fs_info->generation)))
9985 btrfs_set_log_full_commit(trans);
9986
9987 btrfs_end_log_trans(root);
9988 log_pinned = false;
9989 }
9990 if (!ret && sync_log) {
9991 ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
9992 if (ret)
9993 commit_transaction = true;
9994 }
9995 if (commit_transaction) {
9996 ret = btrfs_commit_transaction(trans);
9997 } else {
9998 int ret2;
9999
10000 ret2 = btrfs_end_transaction(trans);
10001 ret = ret ? ret : ret2;
10002 }
10003out_notrans:
10004 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
10005 up_read(&fs_info->subvol_sem);
10006
10007 return ret;
10008}
10009
10010static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
10011 struct inode *new_dir, struct dentry *new_dentry,
10012 unsigned int flags)
10013{
10014 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
10015 return -EINVAL;
10016
10017 if (flags & RENAME_EXCHANGE)
10018 return btrfs_rename_exchange(old_dir, old_dentry, new_dir,
10019 new_dentry);
10020
10021 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
10022}
10023
10024struct btrfs_delalloc_work {
10025 struct inode *inode;
10026 struct completion completion;
10027 struct list_head list;
10028 struct btrfs_work work;
10029};
10030
10031static void btrfs_run_delalloc_work(struct btrfs_work *work)
10032{
10033 struct btrfs_delalloc_work *delalloc_work;
10034 struct inode *inode;
10035
10036 delalloc_work = container_of(work, struct btrfs_delalloc_work,
10037 work);
10038 inode = delalloc_work->inode;
10039 filemap_flush(inode->i_mapping);
10040 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
10041 &BTRFS_I(inode)->runtime_flags))
10042 filemap_flush(inode->i_mapping);
10043
10044 iput(inode);
10045 complete(&delalloc_work->completion);
10046}
10047
10048static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode)
10049{
10050 struct btrfs_delalloc_work *work;
10051
10052 work = kmalloc(sizeof(*work), GFP_NOFS);
10053 if (!work)
10054 return NULL;
10055
10056 init_completion(&work->completion);
10057 INIT_LIST_HEAD(&work->list);
10058 work->inode = inode;
10059 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
10060 btrfs_run_delalloc_work, NULL, NULL);
10061
10062 return work;
10063}
10064
10065
10066
10067
10068
10069static int start_delalloc_inodes(struct btrfs_root *root, int nr, bool snapshot)
10070{
10071 struct btrfs_inode *binode;
10072 struct inode *inode;
10073 struct btrfs_delalloc_work *work, *next;
10074 struct list_head works;
10075 struct list_head splice;
10076 int ret = 0;
10077
10078 INIT_LIST_HEAD(&works);
10079 INIT_LIST_HEAD(&splice);
10080
10081 mutex_lock(&root->delalloc_mutex);
10082 spin_lock(&root->delalloc_lock);
10083 list_splice_init(&root->delalloc_inodes, &splice);
10084 while (!list_empty(&splice)) {
10085 binode = list_entry(splice.next, struct btrfs_inode,
10086 delalloc_inodes);
10087
10088 list_move_tail(&binode->delalloc_inodes,
10089 &root->delalloc_inodes);
10090 inode = igrab(&binode->vfs_inode);
10091 if (!inode) {
10092 cond_resched_lock(&root->delalloc_lock);
10093 continue;
10094 }
10095 spin_unlock(&root->delalloc_lock);
10096
10097 if (snapshot)
10098 set_bit(BTRFS_INODE_SNAPSHOT_FLUSH,
10099 &binode->runtime_flags);
10100 work = btrfs_alloc_delalloc_work(inode);
10101 if (!work) {
10102 iput(inode);
10103 ret = -ENOMEM;
10104 goto out;
10105 }
10106 list_add_tail(&work->list, &works);
10107 btrfs_queue_work(root->fs_info->flush_workers,
10108 &work->work);
10109 ret++;
10110 if (nr != -1 && ret >= nr)
10111 goto out;
10112 cond_resched();
10113 spin_lock(&root->delalloc_lock);
10114 }
10115 spin_unlock(&root->delalloc_lock);
10116
10117out:
10118 list_for_each_entry_safe(work, next, &works, list) {
10119 list_del_init(&work->list);
10120 wait_for_completion(&work->completion);
10121 kfree(work);
10122 }
10123
10124 if (!list_empty(&splice)) {
10125 spin_lock(&root->delalloc_lock);
10126 list_splice_tail(&splice, &root->delalloc_inodes);
10127 spin_unlock(&root->delalloc_lock);
10128 }
10129 mutex_unlock(&root->delalloc_mutex);
10130 return ret;
10131}
10132
10133int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
10134{
10135 struct btrfs_fs_info *fs_info = root->fs_info;
10136 int ret;
10137
10138 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10139 return -EROFS;
10140
10141 ret = start_delalloc_inodes(root, -1, true);
10142 if (ret > 0)
10143 ret = 0;
10144 return ret;
10145}
10146
10147int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
10148{
10149 struct btrfs_root *root;
10150 struct list_head splice;
10151 int ret;
10152
10153 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
10154 return -EROFS;
10155
10156 INIT_LIST_HEAD(&splice);
10157
10158 mutex_lock(&fs_info->delalloc_root_mutex);
10159 spin_lock(&fs_info->delalloc_root_lock);
10160 list_splice_init(&fs_info->delalloc_roots, &splice);
10161 while (!list_empty(&splice) && nr) {
10162 root = list_first_entry(&splice, struct btrfs_root,
10163 delalloc_root);
10164 root = btrfs_grab_fs_root(root);
10165 BUG_ON(!root);
10166 list_move_tail(&root->delalloc_root,
10167 &fs_info->delalloc_roots);
10168 spin_unlock(&fs_info->delalloc_root_lock);
10169
10170 ret = start_delalloc_inodes(root, nr, false);
10171 btrfs_put_fs_root(root);
10172 if (ret < 0)
10173 goto out;
10174
10175 if (nr != -1) {
10176 nr -= ret;
10177 WARN_ON(nr < 0);
10178 }
10179 spin_lock(&fs_info->delalloc_root_lock);
10180 }
10181 spin_unlock(&fs_info->delalloc_root_lock);
10182
10183 ret = 0;
10184out:
10185 if (!list_empty(&splice)) {
10186 spin_lock(&fs_info->delalloc_root_lock);
10187 list_splice_tail(&splice, &fs_info->delalloc_roots);
10188 spin_unlock(&fs_info->delalloc_root_lock);
10189 }
10190 mutex_unlock(&fs_info->delalloc_root_mutex);
10191 return ret;
10192}
10193
10194static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
10195 const char *symname)
10196{
10197 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10198 struct btrfs_trans_handle *trans;
10199 struct btrfs_root *root = BTRFS_I(dir)->root;
10200 struct btrfs_path *path;
10201 struct btrfs_key key;
10202 struct inode *inode = NULL;
10203 int err;
10204 u64 objectid;
10205 u64 index = 0;
10206 int name_len;
10207 int datasize;
10208 unsigned long ptr;
10209 struct btrfs_file_extent_item *ei;
10210 struct extent_buffer *leaf;
10211
10212 name_len = strlen(symname);
10213 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(fs_info))
10214 return -ENAMETOOLONG;
10215
10216
10217
10218
10219
10220
10221
10222
10223 trans = btrfs_start_transaction(root, 7);
10224 if (IS_ERR(trans))
10225 return PTR_ERR(trans);
10226
10227 err = btrfs_find_free_ino(root, &objectid);
10228 if (err)
10229 goto out_unlock;
10230
10231 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
10232 dentry->d_name.len, btrfs_ino(BTRFS_I(dir)),
10233 objectid, S_IFLNK|S_IRWXUGO, &index);
10234 if (IS_ERR(inode)) {
10235 err = PTR_ERR(inode);
10236 inode = NULL;
10237 goto out_unlock;
10238 }
10239
10240
10241
10242
10243
10244
10245
10246 inode->i_fop = &btrfs_file_operations;
10247 inode->i_op = &btrfs_file_inode_operations;
10248 inode->i_mapping->a_ops = &btrfs_aops;
10249 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10250
10251 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
10252 if (err)
10253 goto out_unlock;
10254
10255 path = btrfs_alloc_path();
10256 if (!path) {
10257 err = -ENOMEM;
10258 goto out_unlock;
10259 }
10260 key.objectid = btrfs_ino(BTRFS_I(inode));
10261 key.offset = 0;
10262 key.type = BTRFS_EXTENT_DATA_KEY;
10263 datasize = btrfs_file_extent_calc_inline_size(name_len);
10264 err = btrfs_insert_empty_item(trans, root, path, &key,
10265 datasize);
10266 if (err) {
10267 btrfs_free_path(path);
10268 goto out_unlock;
10269 }
10270 leaf = path->nodes[0];
10271 ei = btrfs_item_ptr(leaf, path->slots[0],
10272 struct btrfs_file_extent_item);
10273 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
10274 btrfs_set_file_extent_type(leaf, ei,
10275 BTRFS_FILE_EXTENT_INLINE);
10276 btrfs_set_file_extent_encryption(leaf, ei, 0);
10277 btrfs_set_file_extent_compression(leaf, ei, 0);
10278 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
10279 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
10280
10281 ptr = btrfs_file_extent_inline_start(ei);
10282 write_extent_buffer(leaf, symname, ptr, name_len);
10283 btrfs_mark_buffer_dirty(leaf);
10284 btrfs_free_path(path);
10285
10286 inode->i_op = &btrfs_symlink_inode_operations;
10287 inode_nohighmem(inode);
10288 inode_set_bytes(inode, name_len);
10289 btrfs_i_size_write(BTRFS_I(inode), name_len);
10290 err = btrfs_update_inode(trans, root, inode);
10291
10292
10293
10294
10295
10296 if (!err)
10297 err = btrfs_add_nondir(trans, BTRFS_I(dir), dentry,
10298 BTRFS_I(inode), 0, index);
10299 if (err)
10300 goto out_unlock;
10301
10302 d_instantiate_new(dentry, inode);
10303
10304out_unlock:
10305 btrfs_end_transaction(trans);
10306 if (err && inode) {
10307 inode_dec_link_count(inode);
10308 discard_new_inode(inode);
10309 }
10310 btrfs_btree_balance_dirty(fs_info);
10311 return err;
10312}
10313
10314static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10315 u64 start, u64 num_bytes, u64 min_size,
10316 loff_t actual_len, u64 *alloc_hint,
10317 struct btrfs_trans_handle *trans)
10318{
10319 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
10320 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
10321 struct extent_map *em;
10322 struct btrfs_root *root = BTRFS_I(inode)->root;
10323 struct btrfs_key ins;
10324 u64 cur_offset = start;
10325 u64 i_size;
10326 u64 cur_bytes;
10327 u64 last_alloc = (u64)-1;
10328 int ret = 0;
10329 bool own_trans = true;
10330 u64 end = start + num_bytes - 1;
10331
10332 if (trans)
10333 own_trans = false;
10334 while (num_bytes > 0) {
10335 if (own_trans) {
10336 trans = btrfs_start_transaction(root, 3);
10337 if (IS_ERR(trans)) {
10338 ret = PTR_ERR(trans);
10339 break;
10340 }
10341 }
10342
10343 cur_bytes = min_t(u64, num_bytes, SZ_256M);
10344 cur_bytes = max(cur_bytes, min_size);
10345
10346
10347
10348
10349
10350
10351 cur_bytes = min(cur_bytes, last_alloc);
10352 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10353 min_size, 0, *alloc_hint, &ins, 1, 0);
10354 if (ret) {
10355 if (own_trans)
10356 btrfs_end_transaction(trans);
10357 break;
10358 }
10359 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
10360
10361 last_alloc = ins.offset;
10362 ret = insert_reserved_file_extent(trans, inode,
10363 cur_offset, ins.objectid,
10364 ins.offset, ins.offset,
10365 ins.offset, 0, 0, 0,
10366 BTRFS_FILE_EXTENT_PREALLOC);
10367 if (ret) {
10368 btrfs_free_reserved_extent(fs_info, ins.objectid,
10369 ins.offset, 0);
10370 btrfs_abort_transaction(trans, ret);
10371 if (own_trans)
10372 btrfs_end_transaction(trans);
10373 break;
10374 }
10375
10376 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10377 cur_offset + ins.offset -1, 0);
10378
10379 em = alloc_extent_map();
10380 if (!em) {
10381 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
10382 &BTRFS_I(inode)->runtime_flags);
10383 goto next;
10384 }
10385
10386 em->start = cur_offset;
10387 em->orig_start = cur_offset;
10388 em->len = ins.offset;
10389 em->block_start = ins.objectid;
10390 em->block_len = ins.offset;
10391 em->orig_block_len = ins.offset;
10392 em->ram_bytes = ins.offset;
10393 em->bdev = fs_info->fs_devices->latest_bdev;
10394 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
10395 em->generation = trans->transid;
10396
10397 while (1) {
10398 write_lock(&em_tree->lock);
10399 ret = add_extent_mapping(em_tree, em, 1);
10400 write_unlock(&em_tree->lock);
10401 if (ret != -EEXIST)
10402 break;
10403 btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
10404 cur_offset + ins.offset - 1,
10405 0);
10406 }
10407 free_extent_map(em);
10408next:
10409 num_bytes -= ins.offset;
10410 cur_offset += ins.offset;
10411 *alloc_hint = ins.objectid + ins.offset;
10412
10413 inode_inc_iversion(inode);
10414 inode->i_ctime = current_time(inode);
10415 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10416 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10417 (actual_len > inode->i_size) &&
10418 (cur_offset > inode->i_size)) {
10419 if (cur_offset > actual_len)
10420 i_size = actual_len;
10421 else
10422 i_size = cur_offset;
10423 i_size_write(inode, i_size);
10424 btrfs_ordered_update_i_size(inode, i_size, NULL);
10425 }
10426
10427 ret = btrfs_update_inode(trans, root, inode);
10428
10429 if (ret) {
10430 btrfs_abort_transaction(trans, ret);
10431 if (own_trans)
10432 btrfs_end_transaction(trans);
10433 break;
10434 }
10435
10436 if (own_trans)
10437 btrfs_end_transaction(trans);
10438 }
10439 if (cur_offset < end)
10440 btrfs_free_reserved_data_space(inode, NULL, cur_offset,
10441 end - cur_offset + 1);
10442 return ret;
10443}
10444
10445int btrfs_prealloc_file_range(struct inode *inode, int mode,
10446 u64 start, u64 num_bytes, u64 min_size,
10447 loff_t actual_len, u64 *alloc_hint)
10448{
10449 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10450 min_size, actual_len, alloc_hint,
10451 NULL);
10452}
10453
10454int btrfs_prealloc_file_range_trans(struct inode *inode,
10455 struct btrfs_trans_handle *trans, int mode,
10456 u64 start, u64 num_bytes, u64 min_size,
10457 loff_t actual_len, u64 *alloc_hint)
10458{
10459 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10460 min_size, actual_len, alloc_hint, trans);
10461}
10462
10463static int btrfs_set_page_dirty(struct page *page)
10464{
10465 return __set_page_dirty_nobuffers(page);
10466}
10467
10468static int btrfs_permission(struct inode *inode, int mask)
10469{
10470 struct btrfs_root *root = BTRFS_I(inode)->root;
10471 umode_t mode = inode->i_mode;
10472
10473 if (mask & MAY_WRITE &&
10474 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10475 if (btrfs_root_readonly(root))
10476 return -EROFS;
10477 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10478 return -EACCES;
10479 }
10480 return generic_permission(inode, mask);
10481}
10482
10483static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
10484{
10485 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
10486 struct btrfs_trans_handle *trans;
10487 struct btrfs_root *root = BTRFS_I(dir)->root;
10488 struct inode *inode = NULL;
10489 u64 objectid;
10490 u64 index;
10491 int ret = 0;
10492
10493
10494
10495
10496 trans = btrfs_start_transaction(root, 5);
10497 if (IS_ERR(trans))
10498 return PTR_ERR(trans);
10499
10500 ret = btrfs_find_free_ino(root, &objectid);
10501 if (ret)
10502 goto out;
10503
10504 inode = btrfs_new_inode(trans, root, dir, NULL, 0,
10505 btrfs_ino(BTRFS_I(dir)), objectid, mode, &index);
10506 if (IS_ERR(inode)) {
10507 ret = PTR_ERR(inode);
10508 inode = NULL;
10509 goto out;
10510 }
10511
10512 inode->i_fop = &btrfs_file_operations;
10513 inode->i_op = &btrfs_file_inode_operations;
10514
10515 inode->i_mapping->a_ops = &btrfs_aops;
10516 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10517
10518 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
10519 if (ret)
10520 goto out;
10521
10522 ret = btrfs_update_inode(trans, root, inode);
10523 if (ret)
10524 goto out;
10525 ret = btrfs_orphan_add(trans, BTRFS_I(inode));
10526 if (ret)
10527 goto out;
10528
10529
10530
10531
10532
10533
10534
10535
10536 set_nlink(inode, 1);
10537 d_tmpfile(dentry, inode);
10538 unlock_new_inode(inode);
10539 mark_inode_dirty(inode);
10540out:
10541 btrfs_end_transaction(trans);
10542 if (ret && inode)
10543 discard_new_inode(inode);
10544 btrfs_btree_balance_dirty(fs_info);
10545 return ret;
10546}
10547
10548void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
10549{
10550 struct inode *inode = tree->private_data;
10551 unsigned long index = start >> PAGE_SHIFT;
10552 unsigned long end_index = end >> PAGE_SHIFT;
10553 struct page *page;
10554
10555 while (index <= end_index) {
10556 page = find_get_page(inode->i_mapping, index);
10557 ASSERT(page);
10558 set_page_writeback(page);
10559 put_page(page);
10560 index++;
10561 }
10562}
10563
10564#ifdef CONFIG_SWAP
10565
10566
10567
10568
10569
10570static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
10571 bool is_block_group)
10572{
10573 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10574 struct btrfs_swapfile_pin *sp, *entry;
10575 struct rb_node **p;
10576 struct rb_node *parent = NULL;
10577
10578 sp = kmalloc(sizeof(*sp), GFP_NOFS);
10579 if (!sp)
10580 return -ENOMEM;
10581 sp->ptr = ptr;
10582 sp->inode = inode;
10583 sp->is_block_group = is_block_group;
10584
10585 spin_lock(&fs_info->swapfile_pins_lock);
10586 p = &fs_info->swapfile_pins.rb_node;
10587 while (*p) {
10588 parent = *p;
10589 entry = rb_entry(parent, struct btrfs_swapfile_pin, node);
10590 if (sp->ptr < entry->ptr ||
10591 (sp->ptr == entry->ptr && sp->inode < entry->inode)) {
10592 p = &(*p)->rb_left;
10593 } else if (sp->ptr > entry->ptr ||
10594 (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
10595 p = &(*p)->rb_right;
10596 } else {
10597 spin_unlock(&fs_info->swapfile_pins_lock);
10598 kfree(sp);
10599 return 1;
10600 }
10601 }
10602 rb_link_node(&sp->node, parent, p);
10603 rb_insert_color(&sp->node, &fs_info->swapfile_pins);
10604 spin_unlock(&fs_info->swapfile_pins_lock);
10605 return 0;
10606}
10607
10608
10609static void btrfs_free_swapfile_pins(struct inode *inode)
10610{
10611 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10612 struct btrfs_swapfile_pin *sp;
10613 struct rb_node *node, *next;
10614
10615 spin_lock(&fs_info->swapfile_pins_lock);
10616 node = rb_first(&fs_info->swapfile_pins);
10617 while (node) {
10618 next = rb_next(node);
10619 sp = rb_entry(node, struct btrfs_swapfile_pin, node);
10620 if (sp->inode == inode) {
10621 rb_erase(&sp->node, &fs_info->swapfile_pins);
10622 if (sp->is_block_group)
10623 btrfs_put_block_group(sp->ptr);
10624 kfree(sp);
10625 }
10626 node = next;
10627 }
10628 spin_unlock(&fs_info->swapfile_pins_lock);
10629}
10630
10631struct btrfs_swap_info {
10632 u64 start;
10633 u64 block_start;
10634 u64 block_len;
10635 u64 lowest_ppage;
10636 u64 highest_ppage;
10637 unsigned long nr_pages;
10638 int nr_extents;
10639};
10640
10641static int btrfs_add_swap_extent(struct swap_info_struct *sis,
10642 struct btrfs_swap_info *bsi)
10643{
10644 unsigned long nr_pages;
10645 u64 first_ppage, first_ppage_reported, next_ppage;
10646 int ret;
10647
10648 first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT;
10649 next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len,
10650 PAGE_SIZE) >> PAGE_SHIFT;
10651
10652 if (first_ppage >= next_ppage)
10653 return 0;
10654 nr_pages = next_ppage - first_ppage;
10655
10656 first_ppage_reported = first_ppage;
10657 if (bsi->start == 0)
10658 first_ppage_reported++;
10659 if (bsi->lowest_ppage > first_ppage_reported)
10660 bsi->lowest_ppage = first_ppage_reported;
10661 if (bsi->highest_ppage < (next_ppage - 1))
10662 bsi->highest_ppage = next_ppage - 1;
10663
10664 ret = add_swap_extent(sis, bsi->nr_pages, nr_pages, first_ppage);
10665 if (ret < 0)
10666 return ret;
10667 bsi->nr_extents += ret;
10668 bsi->nr_pages += nr_pages;
10669 return 0;
10670}
10671
10672static void btrfs_swap_deactivate(struct file *file)
10673{
10674 struct inode *inode = file_inode(file);
10675
10676 btrfs_free_swapfile_pins(inode);
10677 atomic_dec(&BTRFS_I(inode)->root->nr_swapfiles);
10678}
10679
10680static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10681 sector_t *span)
10682{
10683 struct inode *inode = file_inode(file);
10684 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
10685 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
10686 struct extent_state *cached_state = NULL;
10687 struct extent_map *em = NULL;
10688 struct btrfs_device *device = NULL;
10689 struct btrfs_swap_info bsi = {
10690 .lowest_ppage = (sector_t)-1ULL,
10691 };
10692 int ret = 0;
10693 u64 isize;
10694 u64 start;
10695
10696
10697
10698
10699
10700
10701 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
10702 if (ret)
10703 return ret;
10704
10705
10706
10707
10708 if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
10709 btrfs_warn(fs_info, "swapfile must not be compressed");
10710 return -EINVAL;
10711 }
10712 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
10713 btrfs_warn(fs_info, "swapfile must not be copy-on-write");
10714 return -EINVAL;
10715 }
10716 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
10717 btrfs_warn(fs_info, "swapfile must not be checksummed");
10718 return -EINVAL;
10719 }
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) {
10731 btrfs_warn(fs_info,
10732 "cannot activate swapfile while exclusive operation is running");
10733 return -EBUSY;
10734 }
10735
10736
10737
10738
10739
10740
10741 atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
10742
10743 isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
10744
10745 lock_extent_bits(io_tree, 0, isize - 1, &cached_state);
10746 start = 0;
10747 while (start < isize) {
10748 u64 logical_block_start, physical_block_start;
10749 struct btrfs_block_group_cache *bg;
10750 u64 len = isize - start;
10751
10752 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
10753 if (IS_ERR(em)) {
10754 ret = PTR_ERR(em);
10755 goto out;
10756 }
10757
10758 if (em->block_start == EXTENT_MAP_HOLE) {
10759 btrfs_warn(fs_info, "swapfile must not have holes");
10760 ret = -EINVAL;
10761 goto out;
10762 }
10763 if (em->block_start == EXTENT_MAP_INLINE) {
10764
10765
10766
10767
10768
10769
10770
10771 btrfs_warn(fs_info, "swapfile must not be inline");
10772 ret = -EINVAL;
10773 goto out;
10774 }
10775 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
10776 btrfs_warn(fs_info, "swapfile must not be compressed");
10777 ret = -EINVAL;
10778 goto out;
10779 }
10780
10781 logical_block_start = em->block_start + (start - em->start);
10782 len = min(len, em->len - (start - em->start));
10783 free_extent_map(em);
10784 em = NULL;
10785
10786 ret = can_nocow_extent(inode, start, &len, NULL, NULL, NULL);
10787 if (ret < 0) {
10788 goto out;
10789 } else if (ret) {
10790 ret = 0;
10791 } else {
10792 btrfs_warn(fs_info,
10793 "swapfile must not be copy-on-write");
10794 ret = -EINVAL;
10795 goto out;
10796 }
10797
10798 em = btrfs_get_chunk_map(fs_info, logical_block_start, len);
10799 if (IS_ERR(em)) {
10800 ret = PTR_ERR(em);
10801 goto out;
10802 }
10803
10804 if (em->map_lookup->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
10805 btrfs_warn(fs_info,
10806 "swapfile must have single data profile");
10807 ret = -EINVAL;
10808 goto out;
10809 }
10810
10811 if (device == NULL) {
10812 device = em->map_lookup->stripes[0].dev;
10813 ret = btrfs_add_swapfile_pin(inode, device, false);
10814 if (ret == 1)
10815 ret = 0;
10816 else if (ret)
10817 goto out;
10818 } else if (device != em->map_lookup->stripes[0].dev) {
10819 btrfs_warn(fs_info, "swapfile must be on one device");
10820 ret = -EINVAL;
10821 goto out;
10822 }
10823
10824 physical_block_start = (em->map_lookup->stripes[0].physical +
10825 (logical_block_start - em->start));
10826 len = min(len, em->len - (logical_block_start - em->start));
10827 free_extent_map(em);
10828 em = NULL;
10829
10830 bg = btrfs_lookup_block_group(fs_info, logical_block_start);
10831 if (!bg) {
10832 btrfs_warn(fs_info,
10833 "could not find block group containing swapfile");
10834 ret = -EINVAL;
10835 goto out;
10836 }
10837
10838 ret = btrfs_add_swapfile_pin(inode, bg, true);
10839 if (ret) {
10840 btrfs_put_block_group(bg);
10841 if (ret == 1)
10842 ret = 0;
10843 else
10844 goto out;
10845 }
10846
10847 if (bsi.block_len &&
10848 bsi.block_start + bsi.block_len == physical_block_start) {
10849 bsi.block_len += len;
10850 } else {
10851 if (bsi.block_len) {
10852 ret = btrfs_add_swap_extent(sis, &bsi);
10853 if (ret)
10854 goto out;
10855 }
10856 bsi.start = start;
10857 bsi.block_start = physical_block_start;
10858 bsi.block_len = len;
10859 }
10860
10861 start += len;
10862 }
10863
10864 if (bsi.block_len)
10865 ret = btrfs_add_swap_extent(sis, &bsi);
10866
10867out:
10868 if (!IS_ERR_OR_NULL(em))
10869 free_extent_map(em);
10870
10871 unlock_extent_cached(io_tree, 0, isize - 1, &cached_state);
10872
10873 if (ret)
10874 btrfs_swap_deactivate(file);
10875
10876 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
10877
10878 if (ret)
10879 return ret;
10880
10881 if (device)
10882 sis->bdev = device->bdev;
10883 *span = bsi.highest_ppage - bsi.lowest_ppage + 1;
10884 sis->max = bsi.nr_pages;
10885 sis->pages = bsi.nr_pages - 1;
10886 sis->highest_bit = bsi.nr_pages - 1;
10887 return bsi.nr_extents;
10888}
10889#else
10890static void btrfs_swap_deactivate(struct file *file)
10891{
10892}
10893
10894static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10895 sector_t *span)
10896{
10897 return -EOPNOTSUPP;
10898}
10899#endif
10900
10901static const struct inode_operations btrfs_dir_inode_operations = {
10902 .getattr = btrfs_getattr,
10903 .lookup = btrfs_lookup,
10904 .create = btrfs_create,
10905 .unlink = btrfs_unlink,
10906 .link = btrfs_link,
10907 .mkdir = btrfs_mkdir,
10908 .rmdir = btrfs_rmdir,
10909 .rename = btrfs_rename2,
10910 .symlink = btrfs_symlink,
10911 .setattr = btrfs_setattr,
10912 .mknod = btrfs_mknod,
10913 .listxattr = btrfs_listxattr,
10914 .permission = btrfs_permission,
10915 .get_acl = btrfs_get_acl,
10916 .set_acl = btrfs_set_acl,
10917 .update_time = btrfs_update_time,
10918 .tmpfile = btrfs_tmpfile,
10919};
10920static const struct inode_operations btrfs_dir_ro_inode_operations = {
10921 .lookup = btrfs_lookup,
10922 .permission = btrfs_permission,
10923 .update_time = btrfs_update_time,
10924};
10925
10926static const struct file_operations btrfs_dir_file_operations = {
10927 .llseek = generic_file_llseek,
10928 .read = generic_read_dir,
10929 .iterate_shared = btrfs_real_readdir,
10930 .open = btrfs_opendir,
10931 .unlocked_ioctl = btrfs_ioctl,
10932#ifdef CONFIG_COMPAT
10933 .compat_ioctl = btrfs_compat_ioctl,
10934#endif
10935 .release = btrfs_release_file,
10936 .fsync = btrfs_sync_file,
10937};
10938
10939static const struct extent_io_ops btrfs_extent_io_ops = {
10940
10941 .submit_bio_hook = btrfs_submit_bio_hook,
10942 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10943};
10944
10945
10946
10947
10948
10949
10950
10951
10952
10953
10954
10955
10956
10957static const struct address_space_operations btrfs_aops = {
10958 .readpage = btrfs_readpage,
10959 .writepage = btrfs_writepage,
10960 .writepages = btrfs_writepages,
10961 .readpages = btrfs_readpages,
10962 .direct_IO = btrfs_direct_IO,
10963 .invalidatepage = btrfs_invalidatepage,
10964 .releasepage = btrfs_releasepage,
10965 .set_page_dirty = btrfs_set_page_dirty,
10966 .error_remove_page = generic_error_remove_page,
10967 .swap_activate = btrfs_swap_activate,
10968 .swap_deactivate = btrfs_swap_deactivate,
10969};
10970
10971static const struct inode_operations btrfs_file_inode_operations = {
10972 .getattr = btrfs_getattr,
10973 .setattr = btrfs_setattr,
10974 .listxattr = btrfs_listxattr,
10975 .permission = btrfs_permission,
10976 .fiemap = btrfs_fiemap,
10977 .get_acl = btrfs_get_acl,
10978 .set_acl = btrfs_set_acl,
10979 .update_time = btrfs_update_time,
10980};
10981static const struct inode_operations btrfs_special_inode_operations = {
10982 .getattr = btrfs_getattr,
10983 .setattr = btrfs_setattr,
10984 .permission = btrfs_permission,
10985 .listxattr = btrfs_listxattr,
10986 .get_acl = btrfs_get_acl,
10987 .set_acl = btrfs_set_acl,
10988 .update_time = btrfs_update_time,
10989};
10990static const struct inode_operations btrfs_symlink_inode_operations = {
10991 .get_link = page_get_link,
10992 .getattr = btrfs_getattr,
10993 .setattr = btrfs_setattr,
10994 .permission = btrfs_permission,
10995 .listxattr = btrfs_listxattr,
10996 .update_time = btrfs_update_time,
10997};
10998
10999const struct dentry_operations btrfs_dentry_operations = {
11000 .d_delete = btrfs_dentry_delete,
11001};
11002