1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
24#include <linux/pagemap.h>
25#include <linux/highmem.h>
26#include <linux/time.h>
27#include <linux/init.h>
28#include <linux/string.h>
29#include <linux/backing-dev.h>
30#include <linux/mpage.h>
31#include <linux/swap.h>
32#include <linux/writeback.h>
33#include <linux/statfs.h>
34#include <linux/compat.h>
35#include <linux/aio.h>
36#include <linux/bit_spinlock.h>
37#include <linux/xattr.h>
38#include <linux/posix_acl.h>
39#include <linux/falloc.h>
40#include <linux/slab.h>
41#include <linux/ratelimit.h>
42#include <linux/mount.h>
43#include <linux/btrfs.h>
44#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h>
46#include <linux/uio.h>
47#include "ctree.h"
48#include "disk-io.h"
49#include "transaction.h"
50#include "btrfs_inode.h"
51#include "print-tree.h"
52#include "ordered-data.h"
53#include "xattr.h"
54#include "tree-log.h"
55#include "volumes.h"
56#include "compression.h"
57#include "locking.h"
58#include "free-space-cache.h"
59#include "inode-map.h"
60#include "backref.h"
61#include "hash.h"
62#include "props.h"
63#include "qgroup.h"
64#include "dedupe.h"
65
66struct btrfs_iget_args {
67 struct btrfs_key *location;
68 struct btrfs_root *root;
69};
70
71static const struct inode_operations_wrapper btrfs_dir_inode_operations;
72static const struct inode_operations btrfs_symlink_inode_operations;
73static const struct inode_operations btrfs_dir_ro_inode_operations;
74static const struct inode_operations btrfs_special_inode_operations;
75static const struct inode_operations btrfs_file_inode_operations;
76static const struct address_space_operations btrfs_aops;
77static const struct address_space_operations btrfs_symlink_aops;
78static const struct file_operations btrfs_dir_file_operations;
79static const struct extent_io_ops btrfs_extent_io_ops;
80
81static struct kmem_cache *btrfs_inode_cachep;
82struct kmem_cache *btrfs_trans_handle_cachep;
83struct kmem_cache *btrfs_transaction_cachep;
84struct kmem_cache *btrfs_path_cachep;
85struct kmem_cache *btrfs_free_space_cachep;
86
87#define S_SHIFT 12
88static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
89 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
90 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
91 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
92 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
93 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
94 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
95 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
96};
97
98static int btrfs_setsize(struct inode *inode, struct iattr *attr);
99static int btrfs_truncate(struct inode *inode);
100static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
101static noinline int cow_file_range(struct inode *inode,
102 struct page *locked_page,
103 u64 start, u64 end, u64 delalloc_end,
104 int *page_started, unsigned long *nr_written,
105 int unlock, struct btrfs_dedupe_hash *hash);
106static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
107 u64 len, u64 orig_start,
108 u64 block_start, u64 block_len,
109 u64 orig_block_len, u64 ram_bytes,
110 int type);
111
112static int btrfs_dirty_inode(struct inode *inode);
113
114#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
115void btrfs_test_inode_set_ops(struct inode *inode)
116{
117 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
118}
119#endif
120
121static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
122 struct inode *inode, struct inode *dir,
123 const struct qstr *qstr)
124{
125 int err;
126
127 err = btrfs_init_acl(trans, inode, dir);
128 if (!err)
129 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
130 return err;
131}
132
133
134
135
136
137
138static int insert_inline_extent(struct btrfs_trans_handle *trans,
139 struct btrfs_path *path, int extent_inserted,
140 struct btrfs_root *root, struct inode *inode,
141 u64 start, size_t size, size_t compressed_size,
142 int compress_type,
143 struct page **compressed_pages)
144{
145 struct extent_buffer *leaf;
146 struct page *page = NULL;
147 char *kaddr;
148 unsigned long ptr;
149 struct btrfs_file_extent_item *ei;
150 int err = 0;
151 int ret;
152 size_t cur_size = size;
153 unsigned long offset;
154
155 if (compressed_size && compressed_pages)
156 cur_size = compressed_size;
157
158 inode_add_bytes(inode, size);
159
160 if (!extent_inserted) {
161 struct btrfs_key key;
162 size_t datasize;
163
164 key.objectid = btrfs_ino(inode);
165 key.offset = start;
166 key.type = BTRFS_EXTENT_DATA_KEY;
167
168 datasize = btrfs_file_extent_calc_inline_size(cur_size);
169 path->leave_spinning = 1;
170 ret = btrfs_insert_empty_item(trans, root, path, &key,
171 datasize);
172 if (ret) {
173 err = ret;
174 goto fail;
175 }
176 }
177 leaf = path->nodes[0];
178 ei = btrfs_item_ptr(leaf, path->slots[0],
179 struct btrfs_file_extent_item);
180 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
181 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
182 btrfs_set_file_extent_encryption(leaf, ei, 0);
183 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
184 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
185 ptr = btrfs_file_extent_inline_start(ei);
186
187 if (compress_type != BTRFS_COMPRESS_NONE) {
188 struct page *cpage;
189 int i = 0;
190 while (compressed_size > 0) {
191 cpage = compressed_pages[i];
192 cur_size = min_t(unsigned long, compressed_size,
193 PAGE_CACHE_SIZE);
194
195 kaddr = kmap_atomic(cpage);
196 write_extent_buffer(leaf, kaddr, ptr, cur_size);
197 kunmap_atomic(kaddr);
198
199 i++;
200 ptr += cur_size;
201 compressed_size -= cur_size;
202 }
203 btrfs_set_file_extent_compression(leaf, ei,
204 compress_type);
205 } else {
206 page = find_get_page(inode->i_mapping,
207 start >> PAGE_CACHE_SHIFT);
208 btrfs_set_file_extent_compression(leaf, ei, 0);
209 kaddr = kmap_atomic(page);
210 offset = start & (PAGE_CACHE_SIZE - 1);
211 write_extent_buffer(leaf, kaddr + offset, ptr, size);
212 kunmap_atomic(kaddr);
213 page_cache_release(page);
214 }
215 btrfs_mark_buffer_dirty(leaf);
216 btrfs_release_path(path);
217
218
219
220
221
222
223
224
225
226
227 BTRFS_I(inode)->disk_i_size = inode->i_size;
228 ret = btrfs_update_inode(trans, root, inode);
229
230 return ret;
231fail:
232 return err;
233}
234
235
236
237
238
239
240
241static noinline int cow_file_range_inline(struct btrfs_root *root,
242 struct inode *inode, u64 start,
243 u64 end, size_t compressed_size,
244 int compress_type,
245 struct page **compressed_pages)
246{
247 struct btrfs_trans_handle *trans;
248 u64 isize = i_size_read(inode);
249 u64 actual_end = min(end + 1, isize);
250 u64 inline_len = actual_end - start;
251 u64 aligned_end = ALIGN(end, root->sectorsize);
252 u64 data_len = inline_len;
253 int ret;
254 struct btrfs_path *path;
255 int extent_inserted = 0;
256 u32 extent_item_size;
257
258 if (compressed_size)
259 data_len = compressed_size;
260
261 if (start > 0 ||
262 actual_end > root->sectorsize ||
263 data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
264 (!compressed_size &&
265 (actual_end & (root->sectorsize - 1)) == 0) ||
266 end + 1 < isize ||
267 data_len > root->fs_info->max_inline) {
268 return 1;
269 }
270
271 path = btrfs_alloc_path();
272 if (!path)
273 return -ENOMEM;
274
275 trans = btrfs_join_transaction(root);
276 if (IS_ERR(trans)) {
277 btrfs_free_path(path);
278 return PTR_ERR(trans);
279 }
280 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
281
282 if (compressed_size && compressed_pages)
283 extent_item_size = btrfs_file_extent_calc_inline_size(
284 compressed_size);
285 else
286 extent_item_size = btrfs_file_extent_calc_inline_size(
287 inline_len);
288
289 ret = __btrfs_drop_extents(trans, root, inode, path,
290 start, aligned_end, NULL,
291 1, 1, extent_item_size, &extent_inserted);
292 if (ret) {
293 btrfs_abort_transaction(trans, root, ret);
294 goto out;
295 }
296
297 if (isize > actual_end)
298 inline_len = min_t(u64, isize, actual_end);
299 ret = insert_inline_extent(trans, path, extent_inserted,
300 root, inode, start,
301 inline_len, compressed_size,
302 compress_type, compressed_pages);
303 if (ret && ret != -ENOSPC) {
304 btrfs_abort_transaction(trans, root, ret);
305 goto out;
306 } else if (ret == -ENOSPC) {
307 ret = 1;
308 goto out;
309 }
310
311 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
312 btrfs_delalloc_release_metadata(inode, end + 1 - start);
313 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
314out:
315
316
317
318
319
320
321 btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE);
322 btrfs_free_path(path);
323 btrfs_end_transaction(trans, root);
324 return ret;
325}
326
327struct async_extent {
328 u64 start;
329 u64 ram_size;
330 u64 compressed_size;
331 struct page **pages;
332 unsigned long nr_pages;
333 int compress_type;
334 struct list_head list;
335};
336
337struct async_cow {
338 struct inode *inode;
339 struct btrfs_root *root;
340 struct page *locked_page;
341 u64 start;
342 u64 end;
343 struct list_head extents;
344 struct btrfs_work work;
345};
346
347static noinline int add_async_extent(struct async_cow *cow,
348 u64 start, u64 ram_size,
349 u64 compressed_size,
350 struct page **pages,
351 unsigned long nr_pages,
352 int compress_type)
353{
354 struct async_extent *async_extent;
355
356 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
357 BUG_ON(!async_extent);
358 async_extent->start = start;
359 async_extent->ram_size = ram_size;
360 async_extent->compressed_size = compressed_size;
361 async_extent->pages = pages;
362 async_extent->nr_pages = nr_pages;
363 async_extent->compress_type = compress_type;
364 list_add_tail(&async_extent->list, &cow->extents);
365 return 0;
366}
367
368static inline int inode_need_compress(struct inode *inode)
369{
370 struct btrfs_root *root = BTRFS_I(inode)->root;
371
372
373 if (btrfs_test_opt(root->fs_info, FORCE_COMPRESS))
374 return 1;
375
376 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
377 return 0;
378 if (btrfs_test_opt(root->fs_info, COMPRESS) ||
379 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
380 BTRFS_I(inode)->force_compress)
381 return 1;
382 return 0;
383}
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402static noinline void compress_file_range(struct inode *inode,
403 struct page *locked_page,
404 u64 start, u64 end,
405 struct async_cow *async_cow,
406 int *num_added)
407{
408 struct btrfs_root *root = BTRFS_I(inode)->root;
409 u64 num_bytes;
410 u64 blocksize = root->sectorsize;
411 u64 actual_end;
412 u64 isize = i_size_read(inode);
413 int ret = 0;
414 struct page **pages = NULL;
415 unsigned long nr_pages;
416 unsigned long nr_pages_ret = 0;
417 unsigned long total_compressed = 0;
418 unsigned long total_in = 0;
419 unsigned long max_compressed = SZ_128K;
420 unsigned long max_uncompressed = SZ_128K;
421 int i;
422 int will_compress;
423 int compress_type = root->fs_info->compress_type;
424 int redirty = 0;
425
426
427 if ((end - start + 1) < SZ_16K &&
428 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
429 btrfs_add_inode_defrag(NULL, inode);
430
431 actual_end = min_t(u64, isize, end + 1);
432again:
433 will_compress = 0;
434 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
435 nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE);
436
437
438
439
440
441
442
443
444
445
446
447 if (actual_end <= start)
448 goto cleanup_and_bail_uncompressed;
449
450 total_compressed = actual_end - start;
451
452
453
454
455
456 if (total_compressed <= blocksize &&
457 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
458 goto cleanup_and_bail_uncompressed;
459
460
461
462
463
464
465
466
467
468
469
470 total_compressed = min(total_compressed, max_uncompressed);
471 num_bytes = ALIGN(end - start + 1, blocksize);
472 num_bytes = max(blocksize, num_bytes);
473 total_in = 0;
474 ret = 0;
475
476
477
478
479
480
481 if (inode_need_compress(inode)) {
482 WARN_ON(pages);
483 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
484 if (!pages) {
485
486 goto cont;
487 }
488
489 if (BTRFS_I(inode)->force_compress)
490 compress_type = BTRFS_I(inode)->force_compress;
491
492
493
494
495
496
497
498
499
500
501 extent_range_clear_dirty_for_io(inode, start, end);
502 redirty = 1;
503 ret = btrfs_compress_pages(compress_type,
504 inode->i_mapping, start,
505 total_compressed, pages,
506 nr_pages, &nr_pages_ret,
507 &total_in,
508 &total_compressed,
509 max_compressed);
510
511 if (!ret) {
512 unsigned long offset = total_compressed &
513 (PAGE_CACHE_SIZE - 1);
514 struct page *page = pages[nr_pages_ret - 1];
515 char *kaddr;
516
517
518
519
520 if (offset) {
521 kaddr = kmap_atomic(page);
522 memset(kaddr + offset, 0,
523 PAGE_CACHE_SIZE - offset);
524 kunmap_atomic(kaddr);
525 }
526 will_compress = 1;
527 }
528 }
529cont:
530 if (start == 0) {
531
532 if (ret || total_in < (actual_end - start)) {
533
534
535
536 ret = cow_file_range_inline(root, inode, start, end,
537 0, 0, NULL);
538 } else {
539
540 ret = cow_file_range_inline(root, inode, start, end,
541 total_compressed,
542 compress_type, pages);
543 }
544 if (ret <= 0) {
545 unsigned long clear_flags = EXTENT_DELALLOC |
546 EXTENT_DEFRAG;
547 unsigned long page_error_op;
548
549 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
550 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
551
552
553
554
555
556
557 extent_clear_unlock_delalloc(inode, start, end, end,
558 NULL, clear_flags,
559 PAGE_UNLOCK |
560 PAGE_CLEAR_DIRTY |
561 PAGE_SET_WRITEBACK |
562 page_error_op |
563 PAGE_END_WRITEBACK);
564 btrfs_free_reserved_data_space_noquota(inode, start,
565 end - start + 1);
566 goto free_pages_out;
567 }
568 }
569
570 if (will_compress) {
571
572
573
574
575
576 total_compressed = ALIGN(total_compressed, blocksize);
577
578
579
580
581
582 total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
583 if (total_compressed >= total_in) {
584 will_compress = 0;
585 } else {
586 num_bytes = total_in;
587 *num_added += 1;
588
589
590
591
592
593
594 add_async_extent(async_cow, start, num_bytes,
595 total_compressed, pages, nr_pages_ret,
596 compress_type);
597
598 if (start + num_bytes < end) {
599 start += num_bytes;
600 pages = NULL;
601 cond_resched();
602 goto again;
603 }
604 return;
605 }
606 }
607 if (pages) {
608
609
610
611
612 for (i = 0; i < nr_pages_ret; i++) {
613 WARN_ON(pages[i]->mapping);
614 page_cache_release(pages[i]);
615 }
616 kfree(pages);
617 pages = NULL;
618 total_compressed = 0;
619 nr_pages_ret = 0;
620
621
622 if (!btrfs_test_opt(root->fs_info, FORCE_COMPRESS) &&
623 !(BTRFS_I(inode)->force_compress)) {
624 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
625 }
626 }
627cleanup_and_bail_uncompressed:
628
629
630
631
632
633
634 if (page_offset(locked_page) >= start &&
635 page_offset(locked_page) <= end)
636 __set_page_dirty_nobuffers(locked_page);
637
638
639 if (redirty)
640 extent_range_redirty_for_io(inode, start, end);
641 add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0,
642 BTRFS_COMPRESS_NONE);
643 *num_added += 1;
644
645 return;
646
647free_pages_out:
648 for (i = 0; i < nr_pages_ret; i++) {
649 WARN_ON(pages[i]->mapping);
650 page_cache_release(pages[i]);
651 }
652 kfree(pages);
653}
654
655static void free_async_extent_pages(struct async_extent *async_extent)
656{
657 int i;
658
659 if (!async_extent->pages)
660 return;
661
662 for (i = 0; i < async_extent->nr_pages; i++) {
663 WARN_ON(async_extent->pages[i]->mapping);
664 page_cache_release(async_extent->pages[i]);
665 }
666 kfree(async_extent->pages);
667 async_extent->nr_pages = 0;
668 async_extent->pages = NULL;
669}
670
671
672
673
674
675
676
677static noinline void submit_compressed_extents(struct inode *inode,
678 struct async_cow *async_cow)
679{
680 struct async_extent *async_extent;
681 u64 alloc_hint = 0;
682 struct btrfs_key ins;
683 struct extent_map *em;
684 struct btrfs_root *root = BTRFS_I(inode)->root;
685 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
686 struct extent_io_tree *io_tree;
687 int ret = 0;
688
689again:
690 while (!list_empty(&async_cow->extents)) {
691 async_extent = list_entry(async_cow->extents.next,
692 struct async_extent, list);
693 list_del(&async_extent->list);
694
695 io_tree = &BTRFS_I(inode)->io_tree;
696
697retry:
698
699 if (!async_extent->pages) {
700 int page_started = 0;
701 unsigned long nr_written = 0;
702
703 lock_extent(io_tree, async_extent->start,
704 async_extent->start +
705 async_extent->ram_size - 1);
706
707
708 ret = cow_file_range(inode, async_cow->locked_page,
709 async_extent->start,
710 async_extent->start +
711 async_extent->ram_size - 1,
712 async_extent->start +
713 async_extent->ram_size - 1,
714 &page_started, &nr_written, 0,
715 NULL);
716
717
718
719
720
721
722
723
724
725 if (!page_started && !ret)
726 extent_write_locked_range(io_tree,
727 inode, async_extent->start,
728 async_extent->start +
729 async_extent->ram_size - 1,
730 btrfs_get_extent,
731 WB_SYNC_ALL);
732 else if (ret)
733 unlock_page(async_cow->locked_page);
734 kfree(async_extent);
735 cond_resched();
736 continue;
737 }
738
739 lock_extent(io_tree, async_extent->start,
740 async_extent->start + async_extent->ram_size - 1);
741
742 ret = btrfs_reserve_extent(root, async_extent->ram_size,
743 async_extent->compressed_size,
744 async_extent->compressed_size,
745 0, alloc_hint, &ins, 1, 1);
746 if (ret) {
747 free_async_extent_pages(async_extent);
748
749 if (ret == -ENOSPC) {
750 unlock_extent(io_tree, async_extent->start,
751 async_extent->start +
752 async_extent->ram_size - 1);
753
754
755
756
757
758
759
760 extent_range_redirty_for_io(inode,
761 async_extent->start,
762 async_extent->start +
763 async_extent->ram_size - 1);
764
765 goto retry;
766 }
767 goto out_free;
768 }
769
770
771
772
773 btrfs_drop_extent_cache(inode, async_extent->start,
774 async_extent->start +
775 async_extent->ram_size - 1, 0);
776
777 em = alloc_extent_map();
778 if (!em) {
779 ret = -ENOMEM;
780 goto out_free_reserve;
781 }
782 em->start = async_extent->start;
783 em->len = async_extent->ram_size;
784 em->orig_start = em->start;
785 em->mod_start = em->start;
786 em->mod_len = em->len;
787
788 em->block_start = ins.objectid;
789 em->block_len = ins.offset;
790 em->orig_block_len = ins.offset;
791 em->ram_bytes = async_extent->ram_size;
792 em->bdev = root->fs_info->fs_devices->latest_bdev;
793 em->compress_type = async_extent->compress_type;
794 set_bit(EXTENT_FLAG_PINNED, &em->flags);
795 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
796 em->generation = -1;
797
798 while (1) {
799 write_lock(&em_tree->lock);
800 ret = add_extent_mapping(em_tree, em, 1);
801 write_unlock(&em_tree->lock);
802 if (ret != -EEXIST) {
803 free_extent_map(em);
804 break;
805 }
806 btrfs_drop_extent_cache(inode, async_extent->start,
807 async_extent->start +
808 async_extent->ram_size - 1, 0);
809 }
810
811 if (ret)
812 goto out_free_reserve;
813
814 ret = btrfs_add_ordered_extent_compress(inode,
815 async_extent->start,
816 ins.objectid,
817 async_extent->ram_size,
818 ins.offset,
819 BTRFS_ORDERED_COMPRESSED,
820 async_extent->compress_type);
821 if (ret) {
822 btrfs_drop_extent_cache(inode, async_extent->start,
823 async_extent->start +
824 async_extent->ram_size - 1, 0);
825 goto out_free_reserve;
826 }
827 btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
828
829
830
831
832 extent_clear_unlock_delalloc(inode, async_extent->start,
833 async_extent->start +
834 async_extent->ram_size - 1,
835 async_extent->start +
836 async_extent->ram_size - 1,
837 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
838 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
839 PAGE_SET_WRITEBACK);
840 ret = btrfs_submit_compressed_write(inode,
841 async_extent->start,
842 async_extent->ram_size,
843 ins.objectid,
844 ins.offset, async_extent->pages,
845 async_extent->nr_pages);
846 if (ret) {
847 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
848 struct page *p = async_extent->pages[0];
849 const u64 start = async_extent->start;
850 const u64 end = start + async_extent->ram_size - 1;
851
852 p->mapping = inode->i_mapping;
853 tree->ops->writepage_end_io_hook(p, start, end,
854 NULL, 0);
855 p->mapping = NULL;
856 extent_clear_unlock_delalloc(inode, start, end, end,
857 NULL, 0,
858 PAGE_END_WRITEBACK |
859 PAGE_SET_ERROR);
860 free_async_extent_pages(async_extent);
861 }
862 alloc_hint = ins.objectid + ins.offset;
863 kfree(async_extent);
864 cond_resched();
865 }
866 return;
867out_free_reserve:
868 btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
869 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
870out_free:
871 extent_clear_unlock_delalloc(inode, async_extent->start,
872 async_extent->start +
873 async_extent->ram_size - 1,
874 async_extent->start +
875 async_extent->ram_size - 1,
876 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
877 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
878 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
879 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
880 PAGE_SET_ERROR);
881 free_async_extent_pages(async_extent);
882 kfree(async_extent);
883 goto again;
884}
885
886static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
887 u64 num_bytes)
888{
889 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
890 struct extent_map *em;
891 u64 alloc_hint = 0;
892
893 read_lock(&em_tree->lock);
894 em = search_extent_mapping(em_tree, start, num_bytes);
895 if (em) {
896
897
898
899
900
901 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
902 free_extent_map(em);
903 em = search_extent_mapping(em_tree, 0, 0);
904 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
905 alloc_hint = em->block_start;
906 if (em)
907 free_extent_map(em);
908 } else {
909 alloc_hint = em->block_start;
910 free_extent_map(em);
911 }
912 }
913 read_unlock(&em_tree->lock);
914
915 return alloc_hint;
916}
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931static noinline int cow_file_range(struct inode *inode,
932 struct page *locked_page,
933 u64 start, u64 end, u64 delalloc_end,
934 int *page_started, unsigned long *nr_written,
935 int unlock, struct btrfs_dedupe_hash *hash)
936{
937 struct btrfs_root *root = BTRFS_I(inode)->root;
938 u64 alloc_hint = 0;
939 u64 num_bytes;
940 unsigned long ram_size;
941 u64 disk_num_bytes;
942 u64 cur_alloc_size;
943 u64 blocksize = root->sectorsize;
944 struct btrfs_key ins;
945 struct extent_map *em;
946 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
947 int ret = 0;
948
949 if (btrfs_is_free_space_inode(inode)) {
950 WARN_ON_ONCE(1);
951 ret = -EINVAL;
952 goto out_unlock;
953 }
954
955 num_bytes = ALIGN(end - start + 1, blocksize);
956 num_bytes = max(blocksize, num_bytes);
957 disk_num_bytes = num_bytes;
958
959
960 if (num_bytes < SZ_64K &&
961 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
962 btrfs_add_inode_defrag(NULL, inode);
963
964 if (start == 0) {
965
966 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
967 NULL);
968 if (ret == 0) {
969 extent_clear_unlock_delalloc(inode, start, end,
970 delalloc_end, NULL,
971 EXTENT_LOCKED | EXTENT_DELALLOC |
972 EXTENT_DEFRAG, PAGE_UNLOCK |
973 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
974 PAGE_END_WRITEBACK);
975 btrfs_free_reserved_data_space_noquota(inode, start,
976 end - start + 1);
977 *nr_written = *nr_written +
978 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
979 *page_started = 1;
980 goto out;
981 } else if (ret < 0) {
982 goto out_unlock;
983 }
984 }
985
986 BUG_ON(disk_num_bytes >
987 btrfs_super_total_bytes(root->fs_info->super_copy));
988
989 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
990 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
991
992 while (disk_num_bytes > 0) {
993 unsigned long op;
994
995 cur_alloc_size = disk_num_bytes;
996 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
997 root->sectorsize, 0, alloc_hint,
998 &ins, 1, 1);
999 if (ret < 0)
1000 goto out_unlock;
1001
1002 em = alloc_extent_map();
1003 if (!em) {
1004 ret = -ENOMEM;
1005 goto out_reserve;
1006 }
1007 em->start = start;
1008 em->orig_start = em->start;
1009 ram_size = ins.offset;
1010 em->len = ins.offset;
1011 em->mod_start = em->start;
1012 em->mod_len = em->len;
1013
1014 em->block_start = ins.objectid;
1015 em->block_len = ins.offset;
1016 em->orig_block_len = ins.offset;
1017 em->ram_bytes = ram_size;
1018 em->bdev = root->fs_info->fs_devices->latest_bdev;
1019 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1020 em->generation = -1;
1021
1022 while (1) {
1023 write_lock(&em_tree->lock);
1024 ret = add_extent_mapping(em_tree, em, 1);
1025 write_unlock(&em_tree->lock);
1026 if (ret != -EEXIST) {
1027 free_extent_map(em);
1028 break;
1029 }
1030 btrfs_drop_extent_cache(inode, start,
1031 start + ram_size - 1, 0);
1032 }
1033 if (ret)
1034 goto out_reserve;
1035
1036 cur_alloc_size = ins.offset;
1037 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1038 ram_size, cur_alloc_size, 0);
1039 if (ret)
1040 goto out_drop_extent_cache;
1041
1042 if (root->root_key.objectid ==
1043 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1044 ret = btrfs_reloc_clone_csums(inode, start,
1045 cur_alloc_size);
1046 if (ret)
1047 goto out_drop_extent_cache;
1048 }
1049
1050 btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
1051
1052 if (disk_num_bytes < cur_alloc_size)
1053 break;
1054
1055
1056
1057
1058
1059
1060
1061
1062 op = unlock ? PAGE_UNLOCK : 0;
1063 op |= PAGE_SET_PRIVATE2;
1064
1065 extent_clear_unlock_delalloc(inode, start,
1066 start + ram_size - 1,
1067 delalloc_end, locked_page,
1068 EXTENT_LOCKED | EXTENT_DELALLOC,
1069 op);
1070 disk_num_bytes -= cur_alloc_size;
1071 num_bytes -= cur_alloc_size;
1072 alloc_hint = ins.objectid + ins.offset;
1073 start += cur_alloc_size;
1074 }
1075out:
1076 return ret;
1077
1078out_drop_extent_cache:
1079 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1080out_reserve:
1081 btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
1082 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
1083out_unlock:
1084 extent_clear_unlock_delalloc(inode, start, end, delalloc_end,
1085 locked_page,
1086 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1087 EXTENT_DELALLOC | EXTENT_DEFRAG,
1088 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1089 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1090 goto out;
1091}
1092
1093
1094
1095
1096static noinline void async_cow_start(struct btrfs_work *work)
1097{
1098 struct async_cow *async_cow;
1099 int num_added = 0;
1100 async_cow = container_of(work, struct async_cow, work);
1101
1102 compress_file_range(async_cow->inode, async_cow->locked_page,
1103 async_cow->start, async_cow->end, async_cow,
1104 &num_added);
1105 if (num_added == 0) {
1106 btrfs_add_delayed_iput(async_cow->inode);
1107 async_cow->inode = NULL;
1108 }
1109}
1110
1111
1112
1113
1114static noinline void async_cow_submit(struct btrfs_work *work)
1115{
1116 struct async_cow *async_cow;
1117 struct btrfs_root *root;
1118 unsigned long nr_pages;
1119
1120 async_cow = container_of(work, struct async_cow, work);
1121
1122 root = async_cow->root;
1123 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1124 PAGE_CACHE_SHIFT;
1125
1126
1127
1128
1129 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1130 5 * SZ_1M &&
1131 waitqueue_active(&root->fs_info->async_submit_wait))
1132 wake_up(&root->fs_info->async_submit_wait);
1133
1134 if (async_cow->inode)
1135 submit_compressed_extents(async_cow->inode, async_cow);
1136}
1137
1138static noinline void async_cow_free(struct btrfs_work *work)
1139{
1140 struct async_cow *async_cow;
1141 async_cow = container_of(work, struct async_cow, work);
1142 if (async_cow->inode)
1143 btrfs_add_delayed_iput(async_cow->inode);
1144 kfree(async_cow);
1145}
1146
1147static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1148 u64 start, u64 end, int *page_started,
1149 unsigned long *nr_written)
1150{
1151 struct async_cow *async_cow;
1152 struct btrfs_root *root = BTRFS_I(inode)->root;
1153 unsigned long nr_pages;
1154 u64 cur_end;
1155 int limit = 10 * SZ_1M;
1156
1157 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1158 1, 0, NULL, GFP_NOFS);
1159 while (start < end) {
1160 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1161 BUG_ON(!async_cow);
1162 async_cow->inode = igrab(inode);
1163 async_cow->root = root;
1164 async_cow->locked_page = locked_page;
1165 async_cow->start = start;
1166
1167 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1168 !btrfs_test_opt(root->fs_info, FORCE_COMPRESS))
1169 cur_end = end;
1170 else
1171 cur_end = min(end, start + SZ_512K - 1);
1172
1173 async_cow->end = cur_end;
1174 INIT_LIST_HEAD(&async_cow->extents);
1175
1176 btrfs_init_work(&async_cow->work,
1177 btrfs_delalloc_helper,
1178 async_cow_start, async_cow_submit,
1179 async_cow_free);
1180
1181 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
1182 PAGE_CACHE_SHIFT;
1183 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
1184
1185 btrfs_queue_work(root->fs_info->delalloc_workers,
1186 &async_cow->work);
1187
1188 if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
1189 wait_event(root->fs_info->async_submit_wait,
1190 (atomic_read(&root->fs_info->async_delalloc_pages) <
1191 limit));
1192 }
1193
1194 while (atomic_read(&root->fs_info->async_submit_draining) &&
1195 atomic_read(&root->fs_info->async_delalloc_pages)) {
1196 wait_event(root->fs_info->async_submit_wait,
1197 (atomic_read(&root->fs_info->async_delalloc_pages) ==
1198 0));
1199 }
1200
1201 *nr_written += nr_pages;
1202 start = cur_end + 1;
1203 }
1204 *page_started = 1;
1205 return 0;
1206}
1207
1208static noinline int csum_exist_in_range(struct btrfs_root *root,
1209 u64 bytenr, u64 num_bytes)
1210{
1211 int ret;
1212 struct btrfs_ordered_sum *sums;
1213 LIST_HEAD(list);
1214
1215 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1216 bytenr + num_bytes - 1, &list, 0);
1217 if (ret == 0 && list_empty(&list))
1218 return 0;
1219
1220 while (!list_empty(&list)) {
1221 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1222 list_del(&sums->list);
1223 kfree(sums);
1224 }
1225 return 1;
1226}
1227
1228
1229
1230
1231
1232
1233
1234
1235static noinline int run_delalloc_nocow(struct inode *inode,
1236 struct page *locked_page,
1237 u64 start, u64 end, int *page_started, int force,
1238 unsigned long *nr_written)
1239{
1240 struct btrfs_root *root = BTRFS_I(inode)->root;
1241 struct btrfs_trans_handle *trans;
1242 struct extent_buffer *leaf;
1243 struct btrfs_path *path;
1244 struct btrfs_file_extent_item *fi;
1245 struct btrfs_key found_key;
1246 u64 cow_start;
1247 u64 cur_offset;
1248 u64 extent_end;
1249 u64 extent_offset;
1250 u64 disk_bytenr;
1251 u64 num_bytes;
1252 u64 disk_num_bytes;
1253 u64 ram_bytes;
1254 int extent_type;
1255 int ret, err;
1256 int type;
1257 int nocow;
1258 int check_prev = 1;
1259 bool nolock;
1260 u64 ino = btrfs_ino(inode);
1261
1262 path = btrfs_alloc_path();
1263 if (!path) {
1264 extent_clear_unlock_delalloc(inode, start, end, end,
1265 locked_page,
1266 EXTENT_LOCKED | EXTENT_DELALLOC |
1267 EXTENT_DO_ACCOUNTING |
1268 EXTENT_DEFRAG, PAGE_UNLOCK |
1269 PAGE_CLEAR_DIRTY |
1270 PAGE_SET_WRITEBACK |
1271 PAGE_END_WRITEBACK);
1272 return -ENOMEM;
1273 }
1274
1275 nolock = btrfs_is_free_space_inode(inode);
1276
1277 if (nolock)
1278 trans = btrfs_join_transaction_nolock(root);
1279 else
1280 trans = btrfs_join_transaction(root);
1281
1282 if (IS_ERR(trans)) {
1283 extent_clear_unlock_delalloc(inode, start, end, end,
1284 locked_page,
1285 EXTENT_LOCKED | EXTENT_DELALLOC |
1286 EXTENT_DO_ACCOUNTING |
1287 EXTENT_DEFRAG, PAGE_UNLOCK |
1288 PAGE_CLEAR_DIRTY |
1289 PAGE_SET_WRITEBACK |
1290 PAGE_END_WRITEBACK);
1291 btrfs_free_path(path);
1292 return PTR_ERR(trans);
1293 }
1294
1295 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1296
1297 cow_start = (u64)-1;
1298 cur_offset = start;
1299 while (1) {
1300 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1301 cur_offset, 0);
1302 if (ret < 0)
1303 goto error;
1304 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1305 leaf = path->nodes[0];
1306 btrfs_item_key_to_cpu(leaf, &found_key,
1307 path->slots[0] - 1);
1308 if (found_key.objectid == ino &&
1309 found_key.type == BTRFS_EXTENT_DATA_KEY)
1310 path->slots[0]--;
1311 }
1312 check_prev = 0;
1313next_slot:
1314 leaf = path->nodes[0];
1315 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1316 ret = btrfs_next_leaf(root, path);
1317 if (ret < 0)
1318 goto error;
1319 if (ret > 0)
1320 break;
1321 leaf = path->nodes[0];
1322 }
1323
1324 nocow = 0;
1325 disk_bytenr = 0;
1326 num_bytes = 0;
1327 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1328
1329 if (found_key.objectid > ino)
1330 break;
1331 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1332 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1333 path->slots[0]++;
1334 goto next_slot;
1335 }
1336 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1337 found_key.offset > end)
1338 break;
1339
1340 if (found_key.offset > cur_offset) {
1341 extent_end = found_key.offset;
1342 extent_type = 0;
1343 goto out_check;
1344 }
1345
1346 fi = btrfs_item_ptr(leaf, path->slots[0],
1347 struct btrfs_file_extent_item);
1348 extent_type = btrfs_file_extent_type(leaf, fi);
1349
1350 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1351 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1352 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1353 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1354 extent_offset = btrfs_file_extent_offset(leaf, fi);
1355 extent_end = found_key.offset +
1356 btrfs_file_extent_num_bytes(leaf, fi);
1357 disk_num_bytes =
1358 btrfs_file_extent_disk_num_bytes(leaf, fi);
1359 if (extent_end <= start) {
1360 path->slots[0]++;
1361 goto next_slot;
1362 }
1363 if (disk_bytenr == 0)
1364 goto out_check;
1365 if (btrfs_file_extent_compression(leaf, fi) ||
1366 btrfs_file_extent_encryption(leaf, fi) ||
1367 btrfs_file_extent_other_encoding(leaf, fi))
1368 goto out_check;
1369 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1370 goto out_check;
1371 if (btrfs_extent_readonly(root, disk_bytenr))
1372 goto out_check;
1373 if (btrfs_cross_ref_exist(trans, root, ino,
1374 found_key.offset -
1375 extent_offset, disk_bytenr))
1376 goto out_check;
1377 disk_bytenr += extent_offset;
1378 disk_bytenr += cur_offset - found_key.offset;
1379 num_bytes = min(end + 1, extent_end) - cur_offset;
1380
1381
1382
1383
1384 if (!nolock) {
1385 err = btrfs_start_write_no_snapshoting(root);
1386 if (!err)
1387 goto out_check;
1388 }
1389
1390
1391
1392
1393
1394 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
1395 goto out_check;
1396 if (!btrfs_inc_nocow_writers(root->fs_info,
1397 disk_bytenr))
1398 goto out_check;
1399 nocow = 1;
1400 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1401 extent_end = found_key.offset +
1402 btrfs_file_extent_inline_len(leaf,
1403 path->slots[0], fi);
1404 extent_end = ALIGN(extent_end, root->sectorsize);
1405 } else {
1406 BUG_ON(1);
1407 }
1408out_check:
1409 if (extent_end <= start) {
1410 path->slots[0]++;
1411 if (!nolock && nocow)
1412 btrfs_end_write_no_snapshoting(root);
1413 if (nocow)
1414 btrfs_dec_nocow_writers(root->fs_info,
1415 disk_bytenr);
1416 goto next_slot;
1417 }
1418 if (!nocow) {
1419 if (cow_start == (u64)-1)
1420 cow_start = cur_offset;
1421 cur_offset = extent_end;
1422 if (cur_offset > end)
1423 break;
1424 path->slots[0]++;
1425 goto next_slot;
1426 }
1427
1428 btrfs_release_path(path);
1429 if (cow_start != (u64)-1) {
1430 ret = cow_file_range(inode, locked_page,
1431 cow_start, found_key.offset - 1,
1432 end, page_started, nr_written, 1,
1433 NULL);
1434 if (ret) {
1435 if (!nolock && nocow)
1436 btrfs_end_write_no_snapshoting(root);
1437 if (nocow)
1438 btrfs_dec_nocow_writers(root->fs_info,
1439 disk_bytenr);
1440 goto error;
1441 }
1442 cow_start = (u64)-1;
1443 }
1444
1445 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1446 struct extent_map *em;
1447 struct extent_map_tree *em_tree;
1448 em_tree = &BTRFS_I(inode)->extent_tree;
1449 em = alloc_extent_map();
1450 BUG_ON(!em);
1451 em->start = cur_offset;
1452 em->orig_start = found_key.offset - extent_offset;
1453 em->len = num_bytes;
1454 em->block_len = num_bytes;
1455 em->block_start = disk_bytenr;
1456 em->orig_block_len = disk_num_bytes;
1457 em->ram_bytes = ram_bytes;
1458 em->bdev = root->fs_info->fs_devices->latest_bdev;
1459 em->mod_start = em->start;
1460 em->mod_len = em->len;
1461 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1462 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1463 em->generation = -1;
1464 while (1) {
1465 write_lock(&em_tree->lock);
1466 ret = add_extent_mapping(em_tree, em, 1);
1467 write_unlock(&em_tree->lock);
1468 if (ret != -EEXIST) {
1469 free_extent_map(em);
1470 break;
1471 }
1472 btrfs_drop_extent_cache(inode, em->start,
1473 em->start + em->len - 1, 0);
1474 }
1475 type = BTRFS_ORDERED_PREALLOC;
1476 } else {
1477 type = BTRFS_ORDERED_NOCOW;
1478 }
1479
1480 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1481 num_bytes, num_bytes, type);
1482 if (nocow)
1483 btrfs_dec_nocow_writers(root->fs_info, disk_bytenr);
1484 BUG_ON(ret);
1485
1486 if (root->root_key.objectid ==
1487 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1488 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1489 num_bytes);
1490 if (ret) {
1491 if (!nolock && nocow)
1492 btrfs_end_write_no_snapshoting(root);
1493 goto error;
1494 }
1495 }
1496
1497 extent_clear_unlock_delalloc(inode, cur_offset,
1498 cur_offset + num_bytes - 1, end,
1499 locked_page, EXTENT_LOCKED |
1500 EXTENT_DELALLOC |
1501 EXTENT_CLEAR_DATA_RESV,
1502 PAGE_UNLOCK | PAGE_SET_PRIVATE2);
1503
1504 if (!nolock && nocow)
1505 btrfs_end_write_no_snapshoting(root);
1506 cur_offset = extent_end;
1507 if (cur_offset > end)
1508 break;
1509 }
1510 btrfs_release_path(path);
1511
1512 if (cur_offset <= end && cow_start == (u64)-1) {
1513 cow_start = cur_offset;
1514 cur_offset = end;
1515 }
1516
1517 if (cow_start != (u64)-1) {
1518 ret = cow_file_range(inode, locked_page, cow_start, end, end,
1519 page_started, nr_written, 1, NULL);
1520 if (ret)
1521 goto error;
1522 }
1523
1524error:
1525 err = btrfs_end_transaction(trans, root);
1526 if (!ret)
1527 ret = err;
1528
1529 if (ret && cur_offset < end)
1530 extent_clear_unlock_delalloc(inode, cur_offset, end, end,
1531 locked_page, EXTENT_LOCKED |
1532 EXTENT_DELALLOC | EXTENT_DEFRAG |
1533 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1534 PAGE_CLEAR_DIRTY |
1535 PAGE_SET_WRITEBACK |
1536 PAGE_END_WRITEBACK);
1537 btrfs_free_path(path);
1538 return ret;
1539}
1540
1541static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
1542{
1543
1544 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
1545 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
1546 return 0;
1547
1548
1549
1550
1551
1552
1553 if (BTRFS_I(inode)->defrag_bytes &&
1554 test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1555 EXTENT_DEFRAG, 0, NULL))
1556 return 1;
1557
1558 return 0;
1559}
1560
1561
1562
1563
1564static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1565 u64 start, u64 end, int *page_started,
1566 unsigned long *nr_written)
1567{
1568 int ret;
1569 int force_cow = need_force_cow(inode, start, end);
1570
1571 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1572 ret = run_delalloc_nocow(inode, locked_page, start, end,
1573 page_started, 1, nr_written);
1574 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1575 ret = run_delalloc_nocow(inode, locked_page, start, end,
1576 page_started, 0, nr_written);
1577 } else if (!inode_need_compress(inode)) {
1578 ret = cow_file_range(inode, locked_page, start, end, end,
1579 page_started, nr_written, 1, NULL);
1580 } else {
1581 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1582 &BTRFS_I(inode)->runtime_flags);
1583 ret = cow_file_range_async(inode, locked_page, start, end,
1584 page_started, nr_written);
1585 }
1586 return ret;
1587}
1588
1589static void btrfs_split_extent_hook(struct inode *inode,
1590 struct extent_state *orig, u64 split)
1591{
1592 u64 size;
1593
1594
1595 if (!(orig->state & EXTENT_DELALLOC))
1596 return;
1597
1598 size = orig->end - orig->start + 1;
1599 if (size > BTRFS_MAX_EXTENT_SIZE) {
1600 u64 num_extents;
1601 u64 new_size;
1602
1603
1604
1605
1606
1607 new_size = orig->end - split + 1;
1608 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1609 BTRFS_MAX_EXTENT_SIZE);
1610 new_size = split - orig->start;
1611 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE);
1613 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1614 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1615 return;
1616 }
1617
1618 spin_lock(&BTRFS_I(inode)->lock);
1619 BTRFS_I(inode)->outstanding_extents++;
1620 spin_unlock(&BTRFS_I(inode)->lock);
1621}
1622
1623
1624
1625
1626
1627
1628
1629static void btrfs_merge_extent_hook(struct inode *inode,
1630 struct extent_state *new,
1631 struct extent_state *other)
1632{
1633 u64 new_size, old_size;
1634 u64 num_extents;
1635
1636
1637 if (!(other->state & EXTENT_DELALLOC))
1638 return;
1639
1640 if (new->start > other->start)
1641 new_size = new->end - other->start + 1;
1642 else
1643 new_size = other->end - new->start + 1;
1644
1645
1646 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1647 spin_lock(&BTRFS_I(inode)->lock);
1648 BTRFS_I(inode)->outstanding_extents--;
1649 spin_unlock(&BTRFS_I(inode)->lock);
1650 return;
1651 }
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671 old_size = other->end - other->start + 1;
1672 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1673 BTRFS_MAX_EXTENT_SIZE);
1674 old_size = new->end - new->start + 1;
1675 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1676 BTRFS_MAX_EXTENT_SIZE);
1677
1678 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1679 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1680 return;
1681
1682 spin_lock(&BTRFS_I(inode)->lock);
1683 BTRFS_I(inode)->outstanding_extents--;
1684 spin_unlock(&BTRFS_I(inode)->lock);
1685}
1686
1687static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1688 struct inode *inode)
1689{
1690 spin_lock(&root->delalloc_lock);
1691 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1692 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1693 &root->delalloc_inodes);
1694 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1695 &BTRFS_I(inode)->runtime_flags);
1696 root->nr_delalloc_inodes++;
1697 if (root->nr_delalloc_inodes == 1) {
1698 spin_lock(&root->fs_info->delalloc_root_lock);
1699 BUG_ON(!list_empty(&root->delalloc_root));
1700 list_add_tail(&root->delalloc_root,
1701 &root->fs_info->delalloc_roots);
1702 spin_unlock(&root->fs_info->delalloc_root_lock);
1703 }
1704 }
1705 spin_unlock(&root->delalloc_lock);
1706}
1707
1708static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1709 struct inode *inode)
1710{
1711 spin_lock(&root->delalloc_lock);
1712 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1713 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1714 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1715 &BTRFS_I(inode)->runtime_flags);
1716 root->nr_delalloc_inodes--;
1717 if (!root->nr_delalloc_inodes) {
1718 spin_lock(&root->fs_info->delalloc_root_lock);
1719 BUG_ON(list_empty(&root->delalloc_root));
1720 list_del_init(&root->delalloc_root);
1721 spin_unlock(&root->fs_info->delalloc_root_lock);
1722 }
1723 }
1724 spin_unlock(&root->delalloc_lock);
1725}
1726
1727
1728
1729
1730
1731
1732static void btrfs_set_bit_hook(struct inode *inode,
1733 struct extent_state *state, unsigned *bits)
1734{
1735
1736 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
1737 WARN_ON(1);
1738
1739
1740
1741
1742
1743 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1744 struct btrfs_root *root = BTRFS_I(inode)->root;
1745 u64 len = state->end + 1 - state->start;
1746 bool do_list = !btrfs_is_free_space_inode(inode);
1747
1748 if (*bits & EXTENT_FIRST_DELALLOC) {
1749 *bits &= ~EXTENT_FIRST_DELALLOC;
1750 } else {
1751 spin_lock(&BTRFS_I(inode)->lock);
1752 BTRFS_I(inode)->outstanding_extents++;
1753 spin_unlock(&BTRFS_I(inode)->lock);
1754 }
1755
1756
1757 if (btrfs_is_testing(root->fs_info))
1758 return;
1759
1760 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1761 root->fs_info->delalloc_batch);
1762 spin_lock(&BTRFS_I(inode)->lock);
1763 BTRFS_I(inode)->delalloc_bytes += len;
1764 if (*bits & EXTENT_DEFRAG)
1765 BTRFS_I(inode)->defrag_bytes += len;
1766 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1767 &BTRFS_I(inode)->runtime_flags))
1768 btrfs_add_delalloc_inodes(root, inode);
1769 spin_unlock(&BTRFS_I(inode)->lock);
1770 }
1771}
1772
1773
1774
1775
1776static void btrfs_clear_bit_hook(struct inode *inode,
1777 struct extent_state *state,
1778 unsigned *bits)
1779{
1780 u64 len = state->end + 1 - state->start;
1781 u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
1782 BTRFS_MAX_EXTENT_SIZE);
1783
1784 spin_lock(&BTRFS_I(inode)->lock);
1785 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
1786 BTRFS_I(inode)->defrag_bytes -= len;
1787 spin_unlock(&BTRFS_I(inode)->lock);
1788
1789
1790
1791
1792
1793
1794 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1795 struct btrfs_root *root = BTRFS_I(inode)->root;
1796 bool do_list = !btrfs_is_free_space_inode(inode);
1797
1798 if (*bits & EXTENT_FIRST_DELALLOC) {
1799 *bits &= ~EXTENT_FIRST_DELALLOC;
1800 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1801 spin_lock(&BTRFS_I(inode)->lock);
1802 BTRFS_I(inode)->outstanding_extents -= num_extents;
1803 spin_unlock(&BTRFS_I(inode)->lock);
1804 }
1805
1806
1807
1808
1809
1810
1811 if (*bits & EXTENT_DO_ACCOUNTING &&
1812 root != root->fs_info->tree_root)
1813 btrfs_delalloc_release_metadata(inode, len);
1814
1815
1816 if (btrfs_is_testing(root->fs_info))
1817 return;
1818
1819 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1820 && do_list && !(state->state & EXTENT_NORESERVE)
1821 && (*bits & (EXTENT_DO_ACCOUNTING |
1822 EXTENT_CLEAR_DATA_RESV)))
1823 btrfs_free_reserved_data_space_noquota(inode,
1824 state->start, len);
1825
1826 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1827 root->fs_info->delalloc_batch);
1828 spin_lock(&BTRFS_I(inode)->lock);
1829 BTRFS_I(inode)->delalloc_bytes -= len;
1830 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1831 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1832 &BTRFS_I(inode)->runtime_flags))
1833 btrfs_del_delalloc_inode(root, inode);
1834 spin_unlock(&BTRFS_I(inode)->lock);
1835 }
1836}
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1847 size_t size, struct bio *bio,
1848 unsigned long bio_flags)
1849{
1850 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1851 u64 logical = (u64)bio->bi_sector << 9;
1852 u64 length = 0;
1853 u64 map_length;
1854 int ret;
1855
1856 if (bio_flags & EXTENT_BIO_COMPRESSED)
1857 return 0;
1858
1859 length = bio->bi_size;
1860 map_length = length;
1861 ret = btrfs_map_block(root->fs_info, rw, logical,
1862 &map_length, NULL, 0);
1863 if (ret < 0)
1864 return ret;
1865 if (map_length < length + size)
1866 return 1;
1867 return 0;
1868}
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1879 struct bio *bio, int mirror_num,
1880 unsigned long bio_flags,
1881 u64 bio_offset)
1882{
1883 struct btrfs_root *root = BTRFS_I(inode)->root;
1884 int ret = 0;
1885
1886 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1887 BUG_ON(ret);
1888 return 0;
1889}
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1900 int mirror_num, unsigned long bio_flags,
1901 u64 bio_offset)
1902{
1903 struct btrfs_root *root = BTRFS_I(inode)->root;
1904 int ret;
1905
1906 ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
1907 if (ret)
1908 bio_endio(bio, ret);
1909 return ret;
1910}
1911
1912
1913
1914
1915
1916static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1917 int mirror_num, unsigned long bio_flags,
1918 u64 bio_offset)
1919{
1920 struct btrfs_root *root = BTRFS_I(inode)->root;
1921 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1922 int ret = 0;
1923 int skip_sum;
1924 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1925
1926 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1927
1928 if (btrfs_is_free_space_inode(inode))
1929 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
1930
1931 if (!(rw & REQ_WRITE)) {
1932 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1933 if (ret)
1934 goto out;
1935
1936 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1937 ret = btrfs_submit_compressed_read(inode, bio,
1938 mirror_num,
1939 bio_flags);
1940 goto out;
1941 } else if (!skip_sum) {
1942 ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
1943 if (ret)
1944 goto out;
1945 }
1946 goto mapit;
1947 } else if (async && !skip_sum) {
1948
1949 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1950 goto mapit;
1951
1952 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1953 inode, rw, bio, mirror_num,
1954 bio_flags, bio_offset,
1955 __btrfs_submit_bio_start,
1956 __btrfs_submit_bio_done);
1957 goto out;
1958 } else if (!skip_sum) {
1959 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1960 if (ret)
1961 goto out;
1962 }
1963
1964mapit:
1965 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
1966
1967out:
1968 if (ret < 0)
1969 bio_endio(bio, ret);
1970 return ret;
1971}
1972
1973
1974
1975
1976
1977static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1978 struct inode *inode, u64 file_offset,
1979 struct list_head *list)
1980{
1981 struct btrfs_ordered_sum *sum;
1982
1983 list_for_each_entry(sum, list, list) {
1984 trans->adding_csums = 1;
1985 btrfs_csum_file_blocks(trans,
1986 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1987 trans->adding_csums = 0;
1988 }
1989 return 0;
1990}
1991
1992int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1993 struct extent_state **cached_state, int dedupe)
1994{
1995 WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
1996 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1997 cached_state);
1998}
1999
2000
2001struct btrfs_writepage_fixup {
2002 struct page *page;
2003 struct btrfs_work work;
2004};
2005
2006static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
2007{
2008 struct btrfs_writepage_fixup *fixup;
2009 struct btrfs_ordered_extent *ordered;
2010 struct extent_state *cached_state = NULL;
2011 struct page *page;
2012 struct inode *inode;
2013 u64 page_start;
2014 u64 page_end;
2015 int ret;
2016
2017 fixup = container_of(work, struct btrfs_writepage_fixup, work);
2018 page = fixup->page;
2019again:
2020 lock_page(page);
2021 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
2022 ClearPageChecked(page);
2023 goto out_page;
2024 }
2025
2026 inode = page->mapping->host;
2027 page_start = page_offset(page);
2028 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
2029
2030 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
2031 &cached_state);
2032
2033
2034 if (PagePrivate2(page))
2035 goto out;
2036
2037 ordered = btrfs_lookup_ordered_range(inode, page_start,
2038 PAGE_CACHE_SIZE);
2039 if (ordered) {
2040 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
2041 page_end, &cached_state, GFP_NOFS);
2042 unlock_page(page);
2043 btrfs_start_ordered_extent(inode, ordered, 1);
2044 btrfs_put_ordered_extent(ordered);
2045 goto again;
2046 }
2047
2048 ret = btrfs_delalloc_reserve_space(inode, page_start,
2049 PAGE_CACHE_SIZE);
2050 if (ret) {
2051 mapping_set_error(page->mapping, ret);
2052 end_extent_writepage(page, ret, page_start, page_end);
2053 ClearPageChecked(page);
2054 goto out;
2055 }
2056
2057 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state,
2058 0);
2059 ClearPageChecked(page);
2060 set_page_dirty(page);
2061out:
2062 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2063 &cached_state, GFP_NOFS);
2064out_page:
2065 unlock_page(page);
2066 page_cache_release(page);
2067 kfree(fixup);
2068}
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
2082{
2083 struct inode *inode = page->mapping->host;
2084 struct btrfs_writepage_fixup *fixup;
2085 struct btrfs_root *root = BTRFS_I(inode)->root;
2086
2087
2088 if (TestClearPagePrivate2(page))
2089 return 0;
2090
2091 if (PageChecked(page))
2092 return -EAGAIN;
2093
2094 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2095 if (!fixup)
2096 return -EAGAIN;
2097
2098 SetPageChecked(page);
2099 page_cache_get(page);
2100 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
2101 btrfs_writepage_fixup_worker, NULL, NULL);
2102 fixup->page = page;
2103 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
2104 return -EBUSY;
2105}
2106
2107static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2108 struct inode *inode, u64 file_pos,
2109 u64 disk_bytenr, u64 disk_num_bytes,
2110 u64 num_bytes, u64 ram_bytes,
2111 u8 compression, u8 encryption,
2112 u16 other_encoding, int extent_type)
2113{
2114 struct btrfs_root *root = BTRFS_I(inode)->root;
2115 struct btrfs_file_extent_item *fi;
2116 struct btrfs_path *path;
2117 struct extent_buffer *leaf;
2118 struct btrfs_key ins;
2119 int extent_inserted = 0;
2120 int ret;
2121
2122 path = btrfs_alloc_path();
2123 if (!path)
2124 return -ENOMEM;
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
2136 file_pos + num_bytes, NULL, 0,
2137 1, sizeof(*fi), &extent_inserted);
2138 if (ret)
2139 goto out;
2140
2141 if (!extent_inserted) {
2142 ins.objectid = btrfs_ino(inode);
2143 ins.offset = file_pos;
2144 ins.type = BTRFS_EXTENT_DATA_KEY;
2145
2146 path->leave_spinning = 1;
2147 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2148 sizeof(*fi));
2149 if (ret)
2150 goto out;
2151 }
2152 leaf = path->nodes[0];
2153 fi = btrfs_item_ptr(leaf, path->slots[0],
2154 struct btrfs_file_extent_item);
2155 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
2156 btrfs_set_file_extent_type(leaf, fi, extent_type);
2157 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
2158 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
2159 btrfs_set_file_extent_offset(leaf, fi, 0);
2160 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2161 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
2162 btrfs_set_file_extent_compression(leaf, fi, compression);
2163 btrfs_set_file_extent_encryption(leaf, fi, encryption);
2164 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
2165
2166 btrfs_mark_buffer_dirty(leaf);
2167 btrfs_release_path(path);
2168
2169 inode_add_bytes(inode, num_bytes);
2170
2171 ins.objectid = disk_bytenr;
2172 ins.offset = disk_num_bytes;
2173 ins.type = BTRFS_EXTENT_ITEM_KEY;
2174 ret = btrfs_alloc_reserved_file_extent(trans, root,
2175 root->root_key.objectid,
2176 btrfs_ino(inode), file_pos,
2177 ram_bytes, &ins);
2178
2179
2180
2181
2182 btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
2183out:
2184 btrfs_free_path(path);
2185
2186 return ret;
2187}
2188
2189
2190struct sa_defrag_extent_backref {
2191 struct rb_node node;
2192 struct old_sa_defrag_extent *old;
2193 u64 root_id;
2194 u64 inum;
2195 u64 file_pos;
2196 u64 extent_offset;
2197 u64 num_bytes;
2198 u64 generation;
2199};
2200
2201struct old_sa_defrag_extent {
2202 struct list_head list;
2203 struct new_sa_defrag_extent *new;
2204
2205 u64 extent_offset;
2206 u64 bytenr;
2207 u64 offset;
2208 u64 len;
2209 int count;
2210};
2211
2212struct new_sa_defrag_extent {
2213 struct rb_root root;
2214 struct list_head head;
2215 struct btrfs_path *path;
2216 struct inode *inode;
2217 u64 file_pos;
2218 u64 len;
2219 u64 bytenr;
2220 u64 disk_len;
2221 u8 compress_type;
2222};
2223
2224static int backref_comp(struct sa_defrag_extent_backref *b1,
2225 struct sa_defrag_extent_backref *b2)
2226{
2227 if (b1->root_id < b2->root_id)
2228 return -1;
2229 else if (b1->root_id > b2->root_id)
2230 return 1;
2231
2232 if (b1->inum < b2->inum)
2233 return -1;
2234 else if (b1->inum > b2->inum)
2235 return 1;
2236
2237 if (b1->file_pos < b2->file_pos)
2238 return -1;
2239 else if (b1->file_pos > b2->file_pos)
2240 return 1;
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254 return 0;
2255}
2256
2257static void backref_insert(struct rb_root *root,
2258 struct sa_defrag_extent_backref *backref)
2259{
2260 struct rb_node **p = &root->rb_node;
2261 struct rb_node *parent = NULL;
2262 struct sa_defrag_extent_backref *entry;
2263 int ret;
2264
2265 while (*p) {
2266 parent = *p;
2267 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2268
2269 ret = backref_comp(backref, entry);
2270 if (ret < 0)
2271 p = &(*p)->rb_left;
2272 else
2273 p = &(*p)->rb_right;
2274 }
2275
2276 rb_link_node(&backref->node, parent, p);
2277 rb_insert_color(&backref->node, root);
2278}
2279
2280
2281
2282
2283static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2284 void *ctx)
2285{
2286 struct btrfs_file_extent_item *extent;
2287 struct btrfs_fs_info *fs_info;
2288 struct old_sa_defrag_extent *old = ctx;
2289 struct new_sa_defrag_extent *new = old->new;
2290 struct btrfs_path *path = new->path;
2291 struct btrfs_key key;
2292 struct btrfs_root *root;
2293 struct sa_defrag_extent_backref *backref;
2294 struct extent_buffer *leaf;
2295 struct inode *inode = new->inode;
2296 int slot;
2297 int ret;
2298 u64 extent_offset;
2299 u64 num_bytes;
2300
2301 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2302 inum == btrfs_ino(inode))
2303 return 0;
2304
2305 key.objectid = root_id;
2306 key.type = BTRFS_ROOT_ITEM_KEY;
2307 key.offset = (u64)-1;
2308
2309 fs_info = BTRFS_I(inode)->root->fs_info;
2310 root = btrfs_read_fs_root_no_name(fs_info, &key);
2311 if (IS_ERR(root)) {
2312 if (PTR_ERR(root) == -ENOENT)
2313 return 0;
2314 WARN_ON(1);
2315 pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
2316 inum, offset, root_id);
2317 return PTR_ERR(root);
2318 }
2319
2320 key.objectid = inum;
2321 key.type = BTRFS_EXTENT_DATA_KEY;
2322 if (offset > (u64)-1 << 32)
2323 key.offset = 0;
2324 else
2325 key.offset = offset;
2326
2327 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2328 if (WARN_ON(ret < 0))
2329 return ret;
2330 ret = 0;
2331
2332 while (1) {
2333 cond_resched();
2334
2335 leaf = path->nodes[0];
2336 slot = path->slots[0];
2337
2338 if (slot >= btrfs_header_nritems(leaf)) {
2339 ret = btrfs_next_leaf(root, path);
2340 if (ret < 0) {
2341 goto out;
2342 } else if (ret > 0) {
2343 ret = 0;
2344 goto out;
2345 }
2346 continue;
2347 }
2348
2349 path->slots[0]++;
2350
2351 btrfs_item_key_to_cpu(leaf, &key, slot);
2352
2353 if (key.objectid > inum)
2354 goto out;
2355
2356 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2357 continue;
2358
2359 extent = btrfs_item_ptr(leaf, slot,
2360 struct btrfs_file_extent_item);
2361
2362 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2363 continue;
2364
2365
2366
2367
2368
2369
2370 if (key.offset != offset)
2371 continue;
2372
2373 extent_offset = btrfs_file_extent_offset(leaf, extent);
2374 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2375
2376 if (extent_offset >= old->extent_offset + old->offset +
2377 old->len || extent_offset + num_bytes <=
2378 old->extent_offset + old->offset)
2379 continue;
2380 break;
2381 }
2382
2383 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2384 if (!backref) {
2385 ret = -ENOENT;
2386 goto out;
2387 }
2388
2389 backref->root_id = root_id;
2390 backref->inum = inum;
2391 backref->file_pos = offset;
2392 backref->num_bytes = num_bytes;
2393 backref->extent_offset = extent_offset;
2394 backref->generation = btrfs_file_extent_generation(leaf, extent);
2395 backref->old = old;
2396 backref_insert(&new->root, backref);
2397 old->count++;
2398out:
2399 btrfs_release_path(path);
2400 WARN_ON(ret);
2401 return ret;
2402}
2403
2404static noinline bool record_extent_backrefs(struct btrfs_path *path,
2405 struct new_sa_defrag_extent *new)
2406{
2407 struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
2408 struct old_sa_defrag_extent *old, *tmp;
2409 int ret;
2410
2411 new->path = path;
2412
2413 list_for_each_entry_safe(old, tmp, &new->head, list) {
2414 ret = iterate_inodes_from_logical(old->bytenr +
2415 old->extent_offset, fs_info,
2416 path, record_one_backref,
2417 old);
2418 if (ret < 0 && ret != -ENOENT)
2419 return false;
2420
2421
2422 if (!old->count) {
2423 list_del(&old->list);
2424 kfree(old);
2425 }
2426 }
2427
2428 if (list_empty(&new->head))
2429 return false;
2430
2431 return true;
2432}
2433
2434static int relink_is_mergable(struct extent_buffer *leaf,
2435 struct btrfs_file_extent_item *fi,
2436 struct new_sa_defrag_extent *new)
2437{
2438 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2439 return 0;
2440
2441 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2442 return 0;
2443
2444 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2445 return 0;
2446
2447 if (btrfs_file_extent_encryption(leaf, fi) ||
2448 btrfs_file_extent_other_encoding(leaf, fi))
2449 return 0;
2450
2451 return 1;
2452}
2453
2454
2455
2456
2457static noinline int relink_extent_backref(struct btrfs_path *path,
2458 struct sa_defrag_extent_backref *prev,
2459 struct sa_defrag_extent_backref *backref)
2460{
2461 struct btrfs_file_extent_item *extent;
2462 struct btrfs_file_extent_item *item;
2463 struct btrfs_ordered_extent *ordered;
2464 struct btrfs_trans_handle *trans;
2465 struct btrfs_fs_info *fs_info;
2466 struct btrfs_root *root;
2467 struct btrfs_key key;
2468 struct extent_buffer *leaf;
2469 struct old_sa_defrag_extent *old = backref->old;
2470 struct new_sa_defrag_extent *new = old->new;
2471 struct inode *src_inode = new->inode;
2472 struct inode *inode;
2473 struct extent_state *cached = NULL;
2474 int ret = 0;
2475 u64 start;
2476 u64 len;
2477 u64 lock_start;
2478 u64 lock_end;
2479 bool merge = false;
2480 int index;
2481
2482 if (prev && prev->root_id == backref->root_id &&
2483 prev->inum == backref->inum &&
2484 prev->file_pos + prev->num_bytes == backref->file_pos)
2485 merge = true;
2486
2487
2488 key.objectid = backref->root_id;
2489 key.type = BTRFS_ROOT_ITEM_KEY;
2490 key.offset = (u64)-1;
2491
2492 fs_info = BTRFS_I(src_inode)->root->fs_info;
2493 index = srcu_read_lock(&fs_info->subvol_srcu);
2494
2495 root = btrfs_read_fs_root_no_name(fs_info, &key);
2496 if (IS_ERR(root)) {
2497 srcu_read_unlock(&fs_info->subvol_srcu, index);
2498 if (PTR_ERR(root) == -ENOENT)
2499 return 0;
2500 return PTR_ERR(root);
2501 }
2502
2503 if (btrfs_root_readonly(root)) {
2504 srcu_read_unlock(&fs_info->subvol_srcu, index);
2505 return 0;
2506 }
2507
2508
2509 key.objectid = backref->inum;
2510 key.type = BTRFS_INODE_ITEM_KEY;
2511 key.offset = 0;
2512
2513 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2514 if (IS_ERR(inode)) {
2515 srcu_read_unlock(&fs_info->subvol_srcu, index);
2516 return 0;
2517 }
2518
2519 srcu_read_unlock(&fs_info->subvol_srcu, index);
2520
2521
2522 lock_start = backref->file_pos;
2523 lock_end = backref->file_pos + backref->num_bytes - 1;
2524 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2525 &cached);
2526
2527 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2528 if (ordered) {
2529 btrfs_put_ordered_extent(ordered);
2530 goto out_unlock;
2531 }
2532
2533 trans = btrfs_join_transaction(root);
2534 if (IS_ERR(trans)) {
2535 ret = PTR_ERR(trans);
2536 goto out_unlock;
2537 }
2538
2539 key.objectid = backref->inum;
2540 key.type = BTRFS_EXTENT_DATA_KEY;
2541 key.offset = backref->file_pos;
2542
2543 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2544 if (ret < 0) {
2545 goto out_free_path;
2546 } else if (ret > 0) {
2547 ret = 0;
2548 goto out_free_path;
2549 }
2550
2551 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2552 struct btrfs_file_extent_item);
2553
2554 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2555 backref->generation)
2556 goto out_free_path;
2557
2558 btrfs_release_path(path);
2559
2560 start = backref->file_pos;
2561 if (backref->extent_offset < old->extent_offset + old->offset)
2562 start += old->extent_offset + old->offset -
2563 backref->extent_offset;
2564
2565 len = min(backref->extent_offset + backref->num_bytes,
2566 old->extent_offset + old->offset + old->len);
2567 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2568
2569 ret = btrfs_drop_extents(trans, root, inode, start,
2570 start + len, 1);
2571 if (ret)
2572 goto out_free_path;
2573again:
2574 key.objectid = btrfs_ino(inode);
2575 key.type = BTRFS_EXTENT_DATA_KEY;
2576 key.offset = start;
2577
2578 path->leave_spinning = 1;
2579 if (merge) {
2580 struct btrfs_file_extent_item *fi;
2581 u64 extent_len;
2582 struct btrfs_key found_key;
2583
2584 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2585 if (ret < 0)
2586 goto out_free_path;
2587
2588 path->slots[0]--;
2589 leaf = path->nodes[0];
2590 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2591
2592 fi = btrfs_item_ptr(leaf, path->slots[0],
2593 struct btrfs_file_extent_item);
2594 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2595
2596 if (extent_len + found_key.offset == start &&
2597 relink_is_mergable(leaf, fi, new)) {
2598 btrfs_set_file_extent_num_bytes(leaf, fi,
2599 extent_len + len);
2600 btrfs_mark_buffer_dirty(leaf);
2601 inode_add_bytes(inode, len);
2602
2603 ret = 1;
2604 goto out_free_path;
2605 } else {
2606 merge = false;
2607 btrfs_release_path(path);
2608 goto again;
2609 }
2610 }
2611
2612 ret = btrfs_insert_empty_item(trans, root, path, &key,
2613 sizeof(*extent));
2614 if (ret) {
2615 btrfs_abort_transaction(trans, root, ret);
2616 goto out_free_path;
2617 }
2618
2619 leaf = path->nodes[0];
2620 item = btrfs_item_ptr(leaf, path->slots[0],
2621 struct btrfs_file_extent_item);
2622 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2623 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2624 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2625 btrfs_set_file_extent_num_bytes(leaf, item, len);
2626 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2627 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2628 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2629 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2630 btrfs_set_file_extent_encryption(leaf, item, 0);
2631 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2632
2633 btrfs_mark_buffer_dirty(leaf);
2634 inode_add_bytes(inode, len);
2635 btrfs_release_path(path);
2636
2637 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2638 new->disk_len, 0,
2639 backref->root_id, backref->inum,
2640 new->file_pos);
2641 if (ret) {
2642 btrfs_abort_transaction(trans, root, ret);
2643 goto out_free_path;
2644 }
2645
2646 ret = 1;
2647out_free_path:
2648 btrfs_release_path(path);
2649 path->leave_spinning = 0;
2650 btrfs_end_transaction(trans, root);
2651out_unlock:
2652 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2653 &cached, GFP_NOFS);
2654 iput(inode);
2655 return ret;
2656}
2657
2658static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2659{
2660 struct old_sa_defrag_extent *old, *tmp;
2661
2662 if (!new)
2663 return;
2664
2665 list_for_each_entry_safe(old, tmp, &new->head, list) {
2666 kfree(old);
2667 }
2668 kfree(new);
2669}
2670
2671static void relink_file_extents(struct new_sa_defrag_extent *new)
2672{
2673 struct btrfs_path *path;
2674 struct sa_defrag_extent_backref *backref;
2675 struct sa_defrag_extent_backref *prev = NULL;
2676 struct inode *inode;
2677 struct btrfs_root *root;
2678 struct rb_node *node;
2679 int ret;
2680
2681 inode = new->inode;
2682 root = BTRFS_I(inode)->root;
2683
2684 path = btrfs_alloc_path();
2685 if (!path)
2686 return;
2687
2688 if (!record_extent_backrefs(path, new)) {
2689 btrfs_free_path(path);
2690 goto out;
2691 }
2692 btrfs_release_path(path);
2693
2694 while (1) {
2695 node = rb_first(&new->root);
2696 if (!node)
2697 break;
2698 rb_erase(node, &new->root);
2699
2700 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2701
2702 ret = relink_extent_backref(path, prev, backref);
2703 WARN_ON(ret < 0);
2704
2705 kfree(prev);
2706
2707 if (ret == 1)
2708 prev = backref;
2709 else
2710 prev = NULL;
2711 cond_resched();
2712 }
2713 kfree(prev);
2714
2715 btrfs_free_path(path);
2716out:
2717 free_sa_defrag_extent(new);
2718
2719 atomic_dec(&root->fs_info->defrag_running);
2720 wake_up(&root->fs_info->transaction_wait);
2721}
2722
2723static struct new_sa_defrag_extent *
2724record_old_file_extents(struct inode *inode,
2725 struct btrfs_ordered_extent *ordered)
2726{
2727 struct btrfs_root *root = BTRFS_I(inode)->root;
2728 struct btrfs_path *path;
2729 struct btrfs_key key;
2730 struct old_sa_defrag_extent *old;
2731 struct new_sa_defrag_extent *new;
2732 int ret;
2733
2734 new = kmalloc(sizeof(*new), GFP_NOFS);
2735 if (!new)
2736 return NULL;
2737
2738 new->inode = inode;
2739 new->file_pos = ordered->file_offset;
2740 new->len = ordered->len;
2741 new->bytenr = ordered->start;
2742 new->disk_len = ordered->disk_len;
2743 new->compress_type = ordered->compress_type;
2744 new->root = RB_ROOT;
2745 INIT_LIST_HEAD(&new->head);
2746
2747 path = btrfs_alloc_path();
2748 if (!path)
2749 goto out_kfree;
2750
2751 key.objectid = btrfs_ino(inode);
2752 key.type = BTRFS_EXTENT_DATA_KEY;
2753 key.offset = new->file_pos;
2754
2755 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2756 if (ret < 0)
2757 goto out_free_path;
2758 if (ret > 0 && path->slots[0] > 0)
2759 path->slots[0]--;
2760
2761
2762 while (1) {
2763 struct btrfs_file_extent_item *extent;
2764 struct extent_buffer *l;
2765 int slot;
2766 u64 num_bytes;
2767 u64 offset;
2768 u64 end;
2769 u64 disk_bytenr;
2770 u64 extent_offset;
2771
2772 l = path->nodes[0];
2773 slot = path->slots[0];
2774
2775 if (slot >= btrfs_header_nritems(l)) {
2776 ret = btrfs_next_leaf(root, path);
2777 if (ret < 0)
2778 goto out_free_path;
2779 else if (ret > 0)
2780 break;
2781 continue;
2782 }
2783
2784 btrfs_item_key_to_cpu(l, &key, slot);
2785
2786 if (key.objectid != btrfs_ino(inode))
2787 break;
2788 if (key.type != BTRFS_EXTENT_DATA_KEY)
2789 break;
2790 if (key.offset >= new->file_pos + new->len)
2791 break;
2792
2793 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2794
2795 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2796 if (key.offset + num_bytes < new->file_pos)
2797 goto next;
2798
2799 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2800 if (!disk_bytenr)
2801 goto next;
2802
2803 extent_offset = btrfs_file_extent_offset(l, extent);
2804
2805 old = kmalloc(sizeof(*old), GFP_NOFS);
2806 if (!old)
2807 goto out_free_path;
2808
2809 offset = max(new->file_pos, key.offset);
2810 end = min(new->file_pos + new->len, key.offset + num_bytes);
2811
2812 old->bytenr = disk_bytenr;
2813 old->extent_offset = extent_offset;
2814 old->offset = offset - key.offset;
2815 old->len = end - offset;
2816 old->new = new;
2817 old->count = 0;
2818 list_add_tail(&old->list, &new->head);
2819next:
2820 path->slots[0]++;
2821 cond_resched();
2822 }
2823
2824 btrfs_free_path(path);
2825 atomic_inc(&root->fs_info->defrag_running);
2826
2827 return new;
2828
2829out_free_path:
2830 btrfs_free_path(path);
2831out_kfree:
2832 free_sa_defrag_extent(new);
2833 return NULL;
2834}
2835
2836static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
2837 u64 start, u64 len)
2838{
2839 struct btrfs_block_group_cache *cache;
2840
2841 cache = btrfs_lookup_block_group(root->fs_info, start);
2842 ASSERT(cache);
2843
2844 spin_lock(&cache->lock);
2845 cache->delalloc_bytes -= len;
2846 spin_unlock(&cache->lock);
2847
2848 btrfs_put_block_group(cache);
2849}
2850
2851
2852
2853
2854
2855static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2856{
2857 struct inode *inode = ordered_extent->inode;
2858 struct btrfs_root *root = BTRFS_I(inode)->root;
2859 struct btrfs_trans_handle *trans = NULL;
2860 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2861 struct extent_state *cached_state = NULL;
2862 struct new_sa_defrag_extent *new = NULL;
2863 int compress_type = 0;
2864 int ret = 0;
2865 u64 logical_len = ordered_extent->len;
2866 bool nolock;
2867 bool truncated = false;
2868
2869 nolock = btrfs_is_free_space_inode(inode);
2870
2871 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2872 ret = -EIO;
2873 goto out;
2874 }
2875
2876 btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
2877 ordered_extent->file_offset +
2878 ordered_extent->len - 1);
2879
2880 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2881 truncated = true;
2882 logical_len = ordered_extent->truncated_len;
2883
2884 if (!logical_len)
2885 goto out;
2886 }
2887
2888 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2889 BUG_ON(!list_empty(&ordered_extent->list));
2890
2891
2892
2893
2894
2895
2896 btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
2897 ordered_extent->len);
2898 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2899 if (nolock)
2900 trans = btrfs_join_transaction_nolock(root);
2901 else
2902 trans = btrfs_join_transaction(root);
2903 if (IS_ERR(trans)) {
2904 ret = PTR_ERR(trans);
2905 trans = NULL;
2906 goto out;
2907 }
2908 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
2909 ret = btrfs_update_inode_fallback(trans, root, inode);
2910 if (ret)
2911 btrfs_abort_transaction(trans, root, ret);
2912 goto out;
2913 }
2914
2915 lock_extent_bits(io_tree, ordered_extent->file_offset,
2916 ordered_extent->file_offset + ordered_extent->len - 1,
2917 &cached_state);
2918
2919 ret = test_range_bit(io_tree, ordered_extent->file_offset,
2920 ordered_extent->file_offset + ordered_extent->len - 1,
2921 EXTENT_DEFRAG, 1, cached_state);
2922 if (ret) {
2923 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2924 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
2925
2926 new = record_old_file_extents(inode, ordered_extent);
2927
2928 clear_extent_bit(io_tree, ordered_extent->file_offset,
2929 ordered_extent->file_offset + ordered_extent->len - 1,
2930 EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
2931 }
2932
2933 if (nolock)
2934 trans = btrfs_join_transaction_nolock(root);
2935 else
2936 trans = btrfs_join_transaction(root);
2937 if (IS_ERR(trans)) {
2938 ret = PTR_ERR(trans);
2939 trans = NULL;
2940 goto out_unlock;
2941 }
2942
2943 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
2944
2945 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
2946 compress_type = ordered_extent->compress_type;
2947 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
2948 BUG_ON(compress_type);
2949 ret = btrfs_mark_extent_written(trans, inode,
2950 ordered_extent->file_offset,
2951 ordered_extent->file_offset +
2952 logical_len);
2953 } else {
2954 BUG_ON(root == root->fs_info->tree_root);
2955 ret = insert_reserved_file_extent(trans, inode,
2956 ordered_extent->file_offset,
2957 ordered_extent->start,
2958 ordered_extent->disk_len,
2959 logical_len, logical_len,
2960 compress_type, 0, 0,
2961 BTRFS_FILE_EXTENT_REG);
2962 if (!ret)
2963 btrfs_release_delalloc_bytes(root,
2964 ordered_extent->start,
2965 ordered_extent->disk_len);
2966 }
2967 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
2968 ordered_extent->file_offset, ordered_extent->len,
2969 trans->transid);
2970 if (ret < 0) {
2971 btrfs_abort_transaction(trans, root, ret);
2972 goto out_unlock;
2973 }
2974
2975 add_pending_csums(trans, inode, ordered_extent->file_offset,
2976 &ordered_extent->list);
2977
2978 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2979 ret = btrfs_update_inode_fallback(trans, root, inode);
2980 if (ret) {
2981 btrfs_abort_transaction(trans, root, ret);
2982 goto out_unlock;
2983 }
2984 ret = 0;
2985out_unlock:
2986 unlock_extent_cached(io_tree, ordered_extent->file_offset,
2987 ordered_extent->file_offset +
2988 ordered_extent->len - 1, &cached_state, GFP_NOFS);
2989out:
2990 if (root != root->fs_info->tree_root)
2991 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
2992 if (trans)
2993 btrfs_end_transaction(trans, root);
2994
2995 if (ret || truncated) {
2996 u64 start, end;
2997
2998 if (truncated)
2999 start = ordered_extent->file_offset + logical_len;
3000 else
3001 start = ordered_extent->file_offset;
3002 end = ordered_extent->file_offset + ordered_extent->len - 1;
3003 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
3004
3005
3006 btrfs_drop_extent_cache(inode, start, end, 0);
3007
3008
3009
3010
3011
3012
3013
3014 if ((ret || !logical_len) &&
3015 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
3016 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
3017 btrfs_free_reserved_extent(root, ordered_extent->start,
3018 ordered_extent->disk_len, 1);
3019 }
3020
3021
3022
3023
3024
3025
3026 btrfs_remove_ordered_extent(inode, ordered_extent);
3027
3028
3029 if (new) {
3030 if (ret) {
3031 free_sa_defrag_extent(new);
3032 atomic_dec(&root->fs_info->defrag_running);
3033 } else {
3034 relink_file_extents(new);
3035 }
3036 }
3037
3038
3039 btrfs_put_ordered_extent(ordered_extent);
3040
3041 btrfs_put_ordered_extent(ordered_extent);
3042
3043 return ret;
3044}
3045
3046static void finish_ordered_fn(struct btrfs_work *work)
3047{
3048 struct btrfs_ordered_extent *ordered_extent;
3049 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3050 btrfs_finish_ordered_io(ordered_extent);
3051}
3052
3053static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
3054 struct extent_state *state, int uptodate)
3055{
3056 struct inode *inode = page->mapping->host;
3057 struct btrfs_root *root = BTRFS_I(inode)->root;
3058 struct btrfs_ordered_extent *ordered_extent = NULL;
3059 struct btrfs_workqueue *wq;
3060 btrfs_work_func_t func;
3061
3062 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
3063
3064 ClearPagePrivate2(page);
3065 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
3066 end - start + 1, uptodate))
3067 return 0;
3068
3069 if (btrfs_is_free_space_inode(inode)) {
3070 wq = root->fs_info->endio_freespace_worker;
3071 func = btrfs_freespace_write_helper;
3072 } else {
3073 wq = root->fs_info->endio_write_workers;
3074 func = btrfs_endio_write_helper;
3075 }
3076
3077 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
3078 NULL);
3079 btrfs_queue_work(wq, &ordered_extent->work);
3080
3081 return 0;
3082}
3083
3084static int __readpage_endio_check(struct inode *inode,
3085 struct btrfs_io_bio *io_bio,
3086 int icsum, struct page *page,
3087 int pgoff, u64 start, size_t len)
3088{
3089 char *kaddr;
3090 u32 csum_expected;
3091 u32 csum = ~(u32)0;
3092
3093 csum_expected = *(((u32 *)io_bio->csum) + icsum);
3094
3095 kaddr = kmap_atomic(page);
3096 csum = btrfs_csum_data(kaddr + pgoff, csum, len);
3097 btrfs_csum_final(csum, (char *)&csum);
3098 if (csum != csum_expected)
3099 goto zeroit;
3100
3101 kunmap_atomic(kaddr);
3102 return 0;
3103zeroit:
3104 btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
3105 "csum failed ino %llu off %llu csum %u expected csum %u",
3106 btrfs_ino(inode), start, csum, csum_expected);
3107 memset(kaddr + pgoff, 1, len);
3108 flush_dcache_page(page);
3109 kunmap_atomic(kaddr);
3110 if (csum_expected == 0)
3111 return 0;
3112 return -EIO;
3113}
3114
3115
3116
3117
3118
3119
3120static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
3121 u64 phy_offset, struct page *page,
3122 u64 start, u64 end, int mirror)
3123{
3124 size_t offset = start - page_offset(page);
3125 struct inode *inode = page->mapping->host;
3126 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3127 struct btrfs_root *root = BTRFS_I(inode)->root;
3128
3129 if (PageChecked(page)) {
3130 ClearPageChecked(page);
3131 return 0;
3132 }
3133
3134 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3135 return 0;
3136
3137 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
3138 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
3139 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM);
3140 return 0;
3141 }
3142
3143 phy_offset >>= inode->i_sb->s_blocksize_bits;
3144 return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
3145 start, (size_t)(end - start + 1));
3146}
3147
3148struct delayed_iput {
3149 struct list_head list;
3150 struct inode *inode;
3151};
3152
3153
3154
3155void btrfs_add_delayed_iput(struct inode *inode)
3156{
3157 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
3158 struct delayed_iput *delayed;
3159
3160 if (atomic_add_unless(&inode->i_count, -1, 1))
3161 return;
3162
3163 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
3164 delayed->inode = inode;
3165
3166 spin_lock(&fs_info->delayed_iput_lock);
3167 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
3168 spin_unlock(&fs_info->delayed_iput_lock);
3169}
3170
3171void btrfs_run_delayed_iputs(struct btrfs_root *root)
3172{
3173 LIST_HEAD(list);
3174 struct btrfs_fs_info *fs_info = root->fs_info;
3175 struct delayed_iput *delayed;
3176 int empty;
3177
3178 spin_lock(&fs_info->delayed_iput_lock);
3179 empty = list_empty(&fs_info->delayed_iputs);
3180 spin_unlock(&fs_info->delayed_iput_lock);
3181 if (empty)
3182 return;
3183
3184
3185 spin_lock(&fs_info->delayed_iput_lock);
3186 list_splice_init(&fs_info->delayed_iputs, &list);
3187 spin_unlock(&fs_info->delayed_iput_lock);
3188
3189 while (!list_empty(&list)) {
3190 delayed = list_entry(list.next, struct delayed_iput, list);
3191 list_del(&delayed->list);
3192 iput(delayed->inode);
3193 kfree(delayed);
3194 }
3195
3196}
3197
3198
3199
3200
3201
3202
3203void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
3204 struct btrfs_root *root)
3205{
3206 struct btrfs_block_rsv *block_rsv;
3207 int ret;
3208
3209 if (atomic_read(&root->orphan_inodes) ||
3210 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
3211 return;
3212
3213 spin_lock(&root->orphan_lock);
3214 if (atomic_read(&root->orphan_inodes)) {
3215 spin_unlock(&root->orphan_lock);
3216 return;
3217 }
3218
3219 if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
3220 spin_unlock(&root->orphan_lock);
3221 return;
3222 }
3223
3224 block_rsv = root->orphan_block_rsv;
3225 root->orphan_block_rsv = NULL;
3226 spin_unlock(&root->orphan_lock);
3227
3228 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
3229 btrfs_root_refs(&root->root_item) > 0) {
3230 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
3231 root->root_key.objectid);
3232 if (ret)
3233 btrfs_abort_transaction(trans, root, ret);
3234 else
3235 clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
3236 &root->state);
3237 }
3238
3239 if (block_rsv) {
3240 WARN_ON(block_rsv->size > 0);
3241 btrfs_free_block_rsv(root, block_rsv);
3242 }
3243}
3244
3245
3246
3247
3248
3249
3250
3251
3252int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3253{
3254 struct btrfs_root *root = BTRFS_I(inode)->root;
3255 struct btrfs_block_rsv *block_rsv = NULL;
3256 int reserve = 0;
3257 int insert = 0;
3258 int ret;
3259
3260 if (!root->orphan_block_rsv) {
3261 block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3262 if (!block_rsv)
3263 return -ENOMEM;
3264 }
3265
3266 spin_lock(&root->orphan_lock);
3267 if (!root->orphan_block_rsv) {
3268 root->orphan_block_rsv = block_rsv;
3269 } else if (block_rsv) {
3270 btrfs_free_block_rsv(root, block_rsv);
3271 block_rsv = NULL;
3272 }
3273
3274 if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3275 &BTRFS_I(inode)->runtime_flags)) {
3276#if 0
3277
3278
3279
3280
3281
3282 if (!xchg(&root->orphan_item_inserted, 1))
3283 insert = 2;
3284 else
3285 insert = 1;
3286#endif
3287 insert = 1;
3288 atomic_inc(&root->orphan_inodes);
3289 }
3290
3291 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3292 &BTRFS_I(inode)->runtime_flags))
3293 reserve = 1;
3294 spin_unlock(&root->orphan_lock);
3295
3296
3297 if (reserve) {
3298 ret = btrfs_orphan_reserve_metadata(trans, inode);
3299 ASSERT(!ret);
3300 if (ret) {
3301 atomic_dec(&root->orphan_inodes);
3302 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3303 &BTRFS_I(inode)->runtime_flags);
3304 if (insert)
3305 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3306 &BTRFS_I(inode)->runtime_flags);
3307 return ret;
3308 }
3309 }
3310
3311
3312 if (insert >= 1) {
3313 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3314 if (ret) {
3315 atomic_dec(&root->orphan_inodes);
3316 if (reserve) {
3317 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3318 &BTRFS_I(inode)->runtime_flags);
3319 btrfs_orphan_release_metadata(inode);
3320 }
3321 if (ret != -EEXIST) {
3322 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3323 &BTRFS_I(inode)->runtime_flags);
3324 btrfs_abort_transaction(trans, root, ret);
3325 return ret;
3326 }
3327 }
3328 ret = 0;
3329 }
3330
3331
3332 if (insert >= 2) {
3333 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
3334 root->root_key.objectid);
3335 if (ret && ret != -EEXIST) {
3336 btrfs_abort_transaction(trans, root, ret);
3337 return ret;
3338 }
3339 }
3340 return 0;
3341}
3342
3343
3344
3345
3346
3347static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3348 struct inode *inode)
3349{
3350 struct btrfs_root *root = BTRFS_I(inode)->root;
3351 int delete_item = 0;
3352 int release_rsv = 0;
3353 int ret = 0;
3354
3355 spin_lock(&root->orphan_lock);
3356 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3357 &BTRFS_I(inode)->runtime_flags))
3358 delete_item = 1;
3359
3360 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3361 &BTRFS_I(inode)->runtime_flags))
3362 release_rsv = 1;
3363 spin_unlock(&root->orphan_lock);
3364
3365 if (delete_item) {
3366 atomic_dec(&root->orphan_inodes);
3367 if (trans)
3368 ret = btrfs_del_orphan_item(trans, root,
3369 btrfs_ino(inode));
3370 }
3371
3372 if (release_rsv)
3373 btrfs_orphan_release_metadata(inode);
3374
3375 return ret;
3376}
3377
3378
3379
3380
3381
3382int btrfs_orphan_cleanup(struct btrfs_root *root)
3383{
3384 struct btrfs_path *path;
3385 struct extent_buffer *leaf;
3386 struct btrfs_key key, found_key;
3387 struct btrfs_trans_handle *trans;
3388 struct inode *inode;
3389 u64 last_objectid = 0;
3390 int ret = 0, nr_unlink = 0, nr_truncate = 0;
3391
3392 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3393 return 0;
3394
3395 path = btrfs_alloc_path();
3396 if (!path) {
3397 ret = -ENOMEM;
3398 goto out;
3399 }
3400 path->reada = READA_BACK;
3401
3402 key.objectid = BTRFS_ORPHAN_OBJECTID;
3403 key.type = BTRFS_ORPHAN_ITEM_KEY;
3404 key.offset = (u64)-1;
3405
3406 while (1) {
3407 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3408 if (ret < 0)
3409 goto out;
3410
3411
3412
3413
3414
3415
3416 if (ret > 0) {
3417 ret = 0;
3418 if (path->slots[0] == 0)
3419 break;
3420 path->slots[0]--;
3421 }
3422
3423
3424 leaf = path->nodes[0];
3425 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3426
3427
3428 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3429 break;
3430 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3431 break;
3432
3433
3434 btrfs_release_path(path);
3435
3436
3437
3438
3439
3440
3441
3442 if (found_key.offset == last_objectid) {
3443 btrfs_err(root->fs_info,
3444 "Error removing orphan entry, stopping orphan cleanup");
3445 ret = -EINVAL;
3446 goto out;
3447 }
3448
3449 last_objectid = found_key.offset;
3450
3451 found_key.objectid = found_key.offset;
3452 found_key.type = BTRFS_INODE_ITEM_KEY;
3453 found_key.offset = 0;
3454 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
3455 ret = PTR_RET(inode);
3456 if (ret && ret != -ENOENT)
3457 goto out;
3458
3459 if (ret == -ENOENT && root == root->fs_info->tree_root) {
3460 struct btrfs_root *dead_root;
3461 struct btrfs_fs_info *fs_info = root->fs_info;
3462 int is_dead_root = 0;
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475 spin_lock(&fs_info->trans_lock);
3476 list_for_each_entry(dead_root, &fs_info->dead_roots,
3477 root_list) {
3478 if (dead_root->root_key.objectid ==
3479 found_key.objectid) {
3480 is_dead_root = 1;
3481 break;
3482 }
3483 }
3484 spin_unlock(&fs_info->trans_lock);
3485 if (is_dead_root) {
3486
3487 key.offset = found_key.objectid - 1;
3488 continue;
3489 }
3490 }
3491
3492
3493
3494
3495 if (ret == -ENOENT) {
3496 trans = btrfs_start_transaction(root, 1);
3497 if (IS_ERR(trans)) {
3498 ret = PTR_ERR(trans);
3499 goto out;
3500 }
3501 btrfs_debug(root->fs_info, "auto deleting %Lu",
3502 found_key.objectid);
3503 ret = btrfs_del_orphan_item(trans, root,
3504 found_key.objectid);
3505 btrfs_end_transaction(trans, root);
3506 if (ret)
3507 goto out;
3508 continue;
3509 }
3510
3511
3512
3513
3514
3515 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3516 &BTRFS_I(inode)->runtime_flags);
3517 atomic_inc(&root->orphan_inodes);
3518
3519
3520 if (inode->i_nlink) {
3521 if (WARN_ON(!S_ISREG(inode->i_mode))) {
3522 iput(inode);
3523 continue;
3524 }
3525 nr_truncate++;
3526
3527
3528 trans = btrfs_start_transaction(root, 1);
3529 if (IS_ERR(trans)) {
3530 iput(inode);
3531 ret = PTR_ERR(trans);
3532 goto out;
3533 }
3534 ret = btrfs_orphan_add(trans, inode);
3535 btrfs_end_transaction(trans, root);
3536 if (ret) {
3537 iput(inode);
3538 goto out;
3539 }
3540
3541 ret = btrfs_truncate(inode);
3542 if (ret)
3543 btrfs_orphan_del(NULL, inode);
3544 } else {
3545 nr_unlink++;
3546 }
3547
3548
3549 iput(inode);
3550 if (ret)
3551 goto out;
3552 }
3553
3554 btrfs_release_path(path);
3555
3556 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3557
3558 if (root->orphan_block_rsv)
3559 btrfs_block_rsv_release(root, root->orphan_block_rsv,
3560 (u64)-1);
3561
3562 if (root->orphan_block_rsv ||
3563 test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3564 trans = btrfs_join_transaction(root);
3565 if (!IS_ERR(trans))
3566 btrfs_end_transaction(trans, root);
3567 }
3568
3569 if (nr_unlink)
3570 btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
3571 if (nr_truncate)
3572 btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
3573
3574out:
3575 if (ret)
3576 btrfs_err(root->fs_info,
3577 "could not do orphan cleanup %d", ret);
3578 btrfs_free_path(path);
3579 return ret;
3580}
3581
3582
3583
3584
3585
3586
3587
3588static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3589 int slot, u64 objectid,
3590 int *first_xattr_slot)
3591{
3592 u32 nritems = btrfs_header_nritems(leaf);
3593 struct btrfs_key found_key;
3594 static u64 xattr_access = 0;
3595 static u64 xattr_default = 0;
3596 int scanned = 0;
3597
3598 if (!xattr_access) {
3599 xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
3600 strlen(POSIX_ACL_XATTR_ACCESS));
3601 xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
3602 strlen(POSIX_ACL_XATTR_DEFAULT));
3603 }
3604
3605 slot++;
3606 *first_xattr_slot = -1;
3607 while (slot < nritems) {
3608 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3609
3610
3611 if (found_key.objectid != objectid)
3612 return 0;
3613
3614
3615 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3616 if (*first_xattr_slot == -1)
3617 *first_xattr_slot = slot;
3618 if (found_key.offset == xattr_access ||
3619 found_key.offset == xattr_default)
3620 return 1;
3621 }
3622
3623
3624
3625
3626
3627 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3628 return 0;
3629
3630 slot++;
3631 scanned++;
3632
3633
3634
3635
3636
3637
3638
3639 if (scanned >= 8)
3640 break;
3641 }
3642
3643
3644
3645
3646 if (*first_xattr_slot == -1)
3647 *first_xattr_slot = slot;
3648 return 1;
3649}
3650
3651
3652
3653
3654static int btrfs_read_locked_inode(struct inode *inode)
3655{
3656 struct btrfs_path *path;
3657 struct extent_buffer *leaf;
3658 struct btrfs_inode_item *inode_item;
3659 struct btrfs_root *root = BTRFS_I(inode)->root;
3660 struct btrfs_key location;
3661 unsigned long ptr;
3662 int maybe_acls;
3663 u32 rdev;
3664 int ret;
3665 bool filled = false;
3666 int first_xattr_slot;
3667
3668 ret = btrfs_fill_inode(inode, &rdev);
3669 if (!ret)
3670 filled = true;
3671
3672 path = btrfs_alloc_path();
3673 if (!path) {
3674 ret = -ENOMEM;
3675 goto make_bad;
3676 }
3677
3678 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3679
3680 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3681 if (ret) {
3682 if (ret > 0)
3683 ret = -ENOENT;
3684 goto make_bad;
3685 }
3686
3687 leaf = path->nodes[0];
3688
3689 if (filled)
3690 goto cache_index;
3691
3692 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3693 struct btrfs_inode_item);
3694 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3695 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3696 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3697 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3698 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
3699
3700 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3701 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3702
3703 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3704 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3705
3706 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3707 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3708
3709 BTRFS_I(inode)->i_otime.tv_sec =
3710 btrfs_timespec_sec(leaf, &inode_item->otime);
3711 BTRFS_I(inode)->i_otime.tv_nsec =
3712 btrfs_timespec_nsec(leaf, &inode_item->otime);
3713
3714 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3715 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3716 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3717
3718 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3719 inode->i_generation = BTRFS_I(inode)->generation;
3720 inode->i_rdev = 0;
3721 rdev = btrfs_inode_rdev(leaf, inode_item);
3722
3723 BTRFS_I(inode)->index_cnt = (u64)-1;
3724 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3725
3726cache_index:
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
3737 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3738 &BTRFS_I(inode)->runtime_flags);
3739
3740
3741
3742
3743
3744
3745
3746
3747
3748
3749
3750
3751
3752
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762
3763
3764
3765
3766
3767 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3768
3769 path->slots[0]++;
3770 if (inode->i_nlink != 1 ||
3771 path->slots[0] >= btrfs_header_nritems(leaf))
3772 goto cache_acl;
3773
3774 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3775 if (location.objectid != btrfs_ino(inode))
3776 goto cache_acl;
3777
3778 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3779 if (location.type == BTRFS_INODE_REF_KEY) {
3780 struct btrfs_inode_ref *ref;
3781
3782 ref = (struct btrfs_inode_ref *)ptr;
3783 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3784 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3785 struct btrfs_inode_extref *extref;
3786
3787 extref = (struct btrfs_inode_extref *)ptr;
3788 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3789 extref);
3790 }
3791cache_acl:
3792
3793
3794
3795
3796 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3797 btrfs_ino(inode), &first_xattr_slot);
3798 if (first_xattr_slot != -1) {
3799 path->slots[0] = first_xattr_slot;
3800 ret = btrfs_load_inode_props(inode, path);
3801 if (ret)
3802 btrfs_err(root->fs_info,
3803 "error loading props for ino %llu (root %llu): %d",
3804 btrfs_ino(inode),
3805 root->root_key.objectid, ret);
3806 }
3807 btrfs_free_path(path);
3808
3809 if (!maybe_acls)
3810 cache_no_acl(inode);
3811
3812 switch (inode->i_mode & S_IFMT) {
3813 case S_IFREG:
3814 inode->i_mapping->a_ops = &btrfs_aops;
3815 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3816 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3817 inode->i_fop = &btrfs_file_operations.kabi_fops;
3818 inode->i_op = &btrfs_file_inode_operations;
3819 break;
3820 case S_IFDIR:
3821 inode->i_fop = &btrfs_dir_file_operations;
3822 if (root == root->fs_info->tree_root) {
3823 inode->i_op = &btrfs_dir_ro_inode_operations;
3824 } else {
3825 inode->i_op = &btrfs_dir_inode_operations.ops;
3826 inode->i_flags |= S_IOPS_WRAPPER;
3827 }
3828 break;
3829 case S_IFLNK:
3830 inode->i_op = &btrfs_symlink_inode_operations;
3831 inode->i_mapping->a_ops = &btrfs_symlink_aops;
3832 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3833 break;
3834 default:
3835 inode->i_op = &btrfs_special_inode_operations;
3836 init_special_inode(inode, inode->i_mode, rdev);
3837 break;
3838 }
3839
3840 btrfs_update_iflags(inode);
3841 return 0;
3842
3843make_bad:
3844 btrfs_free_path(path);
3845 make_bad_inode(inode);
3846 return ret;
3847}
3848
3849
3850
3851
3852static void fill_inode_item(struct btrfs_trans_handle *trans,
3853 struct extent_buffer *leaf,
3854 struct btrfs_inode_item *item,
3855 struct inode *inode)
3856{
3857 struct btrfs_map_token token;
3858
3859 btrfs_init_map_token(&token);
3860
3861 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3862 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3863 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3864 &token);
3865 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3866 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3867
3868 btrfs_set_token_timespec_sec(leaf, &item->atime,
3869 inode->i_atime.tv_sec, &token);
3870 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3871 inode->i_atime.tv_nsec, &token);
3872
3873 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3874 inode->i_mtime.tv_sec, &token);
3875 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3876 inode->i_mtime.tv_nsec, &token);
3877
3878 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3879 inode->i_ctime.tv_sec, &token);
3880 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3881 inode->i_ctime.tv_nsec, &token);
3882
3883 btrfs_set_token_timespec_sec(leaf, &item->otime,
3884 BTRFS_I(inode)->i_otime.tv_sec, &token);
3885 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3886 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3887
3888 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3889 &token);
3890 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
3891 &token);
3892 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
3893 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3894 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3895 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3896 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
3897}
3898
3899
3900
3901
3902static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3903 struct btrfs_root *root, struct inode *inode)
3904{
3905 struct btrfs_inode_item *inode_item;
3906 struct btrfs_path *path;
3907 struct extent_buffer *leaf;
3908 int ret;
3909
3910 path = btrfs_alloc_path();
3911 if (!path)
3912 return -ENOMEM;
3913
3914 path->leave_spinning = 1;
3915 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3916 1);
3917 if (ret) {
3918 if (ret > 0)
3919 ret = -ENOENT;
3920 goto failed;
3921 }
3922
3923 leaf = path->nodes[0];
3924 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3925 struct btrfs_inode_item);
3926
3927 fill_inode_item(trans, leaf, inode_item, inode);
3928 btrfs_mark_buffer_dirty(leaf);
3929 btrfs_set_inode_last_trans(trans, inode);
3930 ret = 0;
3931failed:
3932 btrfs_free_path(path);
3933 return ret;
3934}
3935
3936
3937
3938
3939noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3940 struct btrfs_root *root, struct inode *inode)
3941{
3942 int ret;
3943
3944
3945
3946
3947
3948
3949
3950
3951 if (!btrfs_is_free_space_inode(inode)
3952 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3953 && !test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
3954 btrfs_update_root_times(trans, root);
3955
3956 ret = btrfs_delayed_update_inode(trans, root, inode);
3957 if (!ret)
3958 btrfs_set_inode_last_trans(trans, inode);
3959 return ret;
3960 }
3961
3962 return btrfs_update_inode_item(trans, root, inode);
3963}
3964
3965noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3966 struct btrfs_root *root,
3967 struct inode *inode)
3968{
3969 int ret;
3970
3971 ret = btrfs_update_inode(trans, root, inode);
3972 if (ret == -ENOSPC)
3973 return btrfs_update_inode_item(trans, root, inode);
3974 return ret;
3975}
3976
3977
3978
3979
3980
3981
3982static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3983 struct btrfs_root *root,
3984 struct inode *dir, struct inode *inode,
3985 const char *name, int name_len)
3986{
3987 struct btrfs_path *path;
3988 int ret = 0;
3989 struct extent_buffer *leaf;
3990 struct btrfs_dir_item *di;
3991 struct btrfs_key key;
3992 u64 index;
3993 u64 ino = btrfs_ino(inode);
3994 u64 dir_ino = btrfs_ino(dir);
3995
3996 path = btrfs_alloc_path();
3997 if (!path) {
3998 ret = -ENOMEM;
3999 goto out;
4000 }
4001
4002 path->leave_spinning = 1;
4003 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4004 name, name_len, -1);
4005 if (IS_ERR(di)) {
4006 ret = PTR_ERR(di);
4007 goto err;
4008 }
4009 if (!di) {
4010 ret = -ENOENT;
4011 goto err;
4012 }
4013 leaf = path->nodes[0];
4014 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4015 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4016 if (ret)
4017 goto err;
4018 btrfs_release_path(path);
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030 if (BTRFS_I(inode)->dir_index) {
4031 ret = btrfs_delayed_delete_inode_ref(inode);
4032 if (!ret) {
4033 index = BTRFS_I(inode)->dir_index;
4034 goto skip_backref;
4035 }
4036 }
4037
4038 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
4039 dir_ino, &index);
4040 if (ret) {
4041 btrfs_info(root->fs_info,
4042 "failed to delete reference to %.*s, inode %llu parent %llu",
4043 name_len, name, ino, dir_ino);
4044 btrfs_abort_transaction(trans, root, ret);
4045 goto err;
4046 }
4047skip_backref:
4048 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
4049 if (ret) {
4050 btrfs_abort_transaction(trans, root, ret);
4051 goto err;
4052 }
4053
4054 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
4055 inode, dir_ino);
4056 if (ret != 0 && ret != -ENOENT) {
4057 btrfs_abort_transaction(trans, root, ret);
4058 goto err;
4059 }
4060
4061 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
4062 dir, index);
4063 if (ret == -ENOENT)
4064 ret = 0;
4065 else if (ret)
4066 btrfs_abort_transaction(trans, root, ret);
4067err:
4068 btrfs_free_path(path);
4069 if (ret)
4070 goto out;
4071
4072 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4073 inode_inc_iversion(inode);
4074 inode_inc_iversion(dir);
4075 inode->i_ctime = dir->i_mtime =
4076 dir->i_ctime = current_fs_time(inode->i_sb);
4077 ret = btrfs_update_inode(trans, root, dir);
4078out:
4079 return ret;
4080}
4081
4082int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4083 struct btrfs_root *root,
4084 struct inode *dir, struct inode *inode,
4085 const char *name, int name_len)
4086{
4087 int ret;
4088 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
4089 if (!ret) {
4090 drop_nlink(inode);
4091 ret = btrfs_update_inode(trans, root, inode);
4092 }
4093 return ret;
4094}
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4105{
4106 struct btrfs_root *root = BTRFS_I(dir)->root;
4107
4108
4109
4110
4111
4112
4113
4114
4115 return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
4116}
4117
4118static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4119{
4120 struct btrfs_root *root = BTRFS_I(dir)->root;
4121 struct btrfs_trans_handle *trans;
4122 struct inode *inode = dentry->d_inode;
4123 int ret;
4124
4125 trans = __unlink_start_trans(dir);
4126 if (IS_ERR(trans))
4127 return PTR_ERR(trans);
4128
4129 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
4130
4131 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
4132 dentry->d_name.name, dentry->d_name.len);
4133 if (ret)
4134 goto out;
4135
4136 if (inode->i_nlink == 0) {
4137 ret = btrfs_orphan_add(trans, inode);
4138 if (ret)
4139 goto out;
4140 }
4141
4142out:
4143 btrfs_end_transaction(trans, root);
4144 btrfs_btree_balance_dirty(root);
4145 return ret;
4146}
4147
4148int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4149 struct btrfs_root *root,
4150 struct inode *dir, u64 objectid,
4151 const char *name, int name_len)
4152{
4153 struct btrfs_path *path;
4154 struct extent_buffer *leaf;
4155 struct btrfs_dir_item *di;
4156 struct btrfs_key key;
4157 u64 index;
4158 int ret;
4159 u64 dir_ino = btrfs_ino(dir);
4160
4161 path = btrfs_alloc_path();
4162 if (!path)
4163 return -ENOMEM;
4164
4165 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4166 name, name_len, -1);
4167 if (IS_ERR_OR_NULL(di)) {
4168 if (!di)
4169 ret = -ENOENT;
4170 else
4171 ret = PTR_ERR(di);
4172 goto out;
4173 }
4174
4175 leaf = path->nodes[0];
4176 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4177 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4178 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4179 if (ret) {
4180 btrfs_abort_transaction(trans, root, ret);
4181 goto out;
4182 }
4183 btrfs_release_path(path);
4184
4185 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
4186 objectid, root->root_key.objectid,
4187 dir_ino, &index, name, name_len);
4188 if (ret < 0) {
4189 if (ret != -ENOENT) {
4190 btrfs_abort_transaction(trans, root, ret);
4191 goto out;
4192 }
4193 di = btrfs_search_dir_index_item(root, path, dir_ino,
4194 name, name_len);
4195 if (IS_ERR_OR_NULL(di)) {
4196 if (!di)
4197 ret = -ENOENT;
4198 else
4199 ret = PTR_ERR(di);
4200 btrfs_abort_transaction(trans, root, ret);
4201 goto out;
4202 }
4203
4204 leaf = path->nodes[0];
4205 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4206 btrfs_release_path(path);
4207 index = key.offset;
4208 }
4209 btrfs_release_path(path);
4210
4211 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
4212 if (ret) {
4213 btrfs_abort_transaction(trans, root, ret);
4214 goto out;
4215 }
4216
4217 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4218 inode_inc_iversion(dir);
4219 dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
4220 ret = btrfs_update_inode_fallback(trans, root, dir);
4221 if (ret)
4222 btrfs_abort_transaction(trans, root, ret);
4223out:
4224 btrfs_free_path(path);
4225 return ret;
4226}
4227
4228static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4229{
4230 struct inode *inode = dentry->d_inode;
4231 int err = 0;
4232 struct btrfs_root *root = BTRFS_I(dir)->root;
4233 struct btrfs_trans_handle *trans;
4234 u64 last_unlink_trans;
4235
4236 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4237 return -ENOTEMPTY;
4238 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
4239 return -EPERM;
4240
4241 trans = __unlink_start_trans(dir);
4242 if (IS_ERR(trans))
4243 return PTR_ERR(trans);
4244
4245 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4246 err = btrfs_unlink_subvol(trans, root, dir,
4247 BTRFS_I(inode)->location.objectid,
4248 dentry->d_name.name,
4249 dentry->d_name.len);
4250 goto out;
4251 }
4252
4253 err = btrfs_orphan_add(trans, inode);
4254 if (err)
4255 goto out;
4256
4257 last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4258
4259
4260 err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
4261 dentry->d_name.name, dentry->d_name.len);
4262 if (!err) {
4263 btrfs_i_size_write(inode, 0);
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275 if (last_unlink_trans >= trans->transid)
4276 BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4277 }
4278out:
4279 btrfs_end_transaction(trans, root);
4280 btrfs_btree_balance_dirty(root);
4281
4282 return err;
4283}
4284
4285static int truncate_space_check(struct btrfs_trans_handle *trans,
4286 struct btrfs_root *root,
4287 u64 bytes_deleted)
4288{
4289 int ret;
4290
4291
4292
4293
4294
4295 bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
4296 bytes_deleted *= root->nodesize;
4297 ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
4298 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
4299 if (!ret) {
4300 trace_btrfs_space_reservation(root->fs_info, "transaction",
4301 trans->transid,
4302 bytes_deleted, 1);
4303 trans->bytes_reserved += bytes_deleted;
4304 }
4305 return ret;
4306
4307}
4308
4309static int truncate_inline_extent(struct inode *inode,
4310 struct btrfs_path *path,
4311 struct btrfs_key *found_key,
4312 const u64 item_end,
4313 const u64 new_size)
4314{
4315 struct extent_buffer *leaf = path->nodes[0];
4316 int slot = path->slots[0];
4317 struct btrfs_file_extent_item *fi;
4318 u32 size = (u32)(new_size - found_key->offset);
4319 struct btrfs_root *root = BTRFS_I(inode)->root;
4320
4321 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
4322
4323 if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
4324 loff_t offset = new_size;
4325 loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
4326
4327
4328
4329
4330
4331
4332
4333
4334
4335
4336 btrfs_release_path(path);
4337 return btrfs_truncate_block(inode, offset, page_end - offset,
4338 0);
4339 }
4340
4341 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4342 size = btrfs_file_extent_calc_inline_size(size);
4343 btrfs_truncate_item(root, path, size, 1);
4344
4345 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4346 inode_sub_bytes(inode, item_end + 1 - new_size);
4347
4348 return 0;
4349}
4350
4351
4352
4353
4354
4355
4356
4357
4358
4359
4360
4361
4362int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4363 struct btrfs_root *root,
4364 struct inode *inode,
4365 u64 new_size, u32 min_type)
4366{
4367 struct btrfs_path *path;
4368 struct extent_buffer *leaf;
4369 struct btrfs_file_extent_item *fi;
4370 struct btrfs_key key;
4371 struct btrfs_key found_key;
4372 u64 extent_start = 0;
4373 u64 extent_num_bytes = 0;
4374 u64 extent_offset = 0;
4375 u64 item_end = 0;
4376 u64 last_size = new_size;
4377 u32 found_type = (u8)-1;
4378 int found_extent;
4379 int del_item;
4380 int pending_del_nr = 0;
4381 int pending_del_slot = 0;
4382 int extent_type = -1;
4383 int ret;
4384 int err = 0;
4385 u64 ino = btrfs_ino(inode);
4386 u64 bytes_deleted = 0;
4387 bool be_nice = 0;
4388 bool should_throttle = 0;
4389 bool should_end = 0;
4390
4391 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4392
4393
4394
4395
4396
4397 if (!btrfs_is_free_space_inode(inode) &&
4398 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4399 be_nice = 1;
4400
4401 path = btrfs_alloc_path();
4402 if (!path)
4403 return -ENOMEM;
4404 path->reada = READA_BACK;
4405
4406
4407
4408
4409
4410
4411 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4412 root == root->fs_info->tree_root)
4413 btrfs_drop_extent_cache(inode, ALIGN(new_size,
4414 root->sectorsize), (u64)-1, 0);
4415
4416
4417
4418
4419
4420
4421
4422 if (min_type == 0 && root == BTRFS_I(inode)->root)
4423 btrfs_kill_delayed_inode_items(inode);
4424
4425 key.objectid = ino;
4426 key.offset = (u64)-1;
4427 key.type = (u8)-1;
4428
4429search_again:
4430
4431
4432
4433
4434
4435 if (be_nice && bytes_deleted > SZ_32M) {
4436 if (btrfs_should_end_transaction(trans, root)) {
4437 err = -EAGAIN;
4438 goto error;
4439 }
4440 }
4441
4442
4443 path->leave_spinning = 1;
4444 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4445 if (ret < 0) {
4446 err = ret;
4447 goto out;
4448 }
4449
4450 if (ret > 0) {
4451
4452
4453
4454 if (path->slots[0] == 0)
4455 goto out;
4456 path->slots[0]--;
4457 }
4458
4459 while (1) {
4460 fi = NULL;
4461 leaf = path->nodes[0];
4462 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4463 found_type = found_key.type;
4464
4465 if (found_key.objectid != ino)
4466 break;
4467
4468 if (found_type < min_type)
4469 break;
4470
4471 item_end = found_key.offset;
4472 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4473 fi = btrfs_item_ptr(leaf, path->slots[0],
4474 struct btrfs_file_extent_item);
4475 extent_type = btrfs_file_extent_type(leaf, fi);
4476 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4477 item_end +=
4478 btrfs_file_extent_num_bytes(leaf, fi);
4479 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4480 item_end += btrfs_file_extent_inline_len(leaf,
4481 path->slots[0], fi);
4482 }
4483 item_end--;
4484 }
4485 if (found_type > min_type) {
4486 del_item = 1;
4487 } else {
4488 if (item_end < new_size) {
4489
4490
4491
4492
4493
4494
4495
4496
4497 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
4498 last_size = new_size;
4499 break;
4500 }
4501 if (found_key.offset >= new_size)
4502 del_item = 1;
4503 else
4504 del_item = 0;
4505 }
4506 found_extent = 0;
4507
4508 if (found_type != BTRFS_EXTENT_DATA_KEY)
4509 goto delete;
4510
4511 if (del_item)
4512 last_size = found_key.offset;
4513 else
4514 last_size = new_size;
4515
4516 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4517 u64 num_dec;
4518 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4519 if (!del_item) {
4520 u64 orig_num_bytes =
4521 btrfs_file_extent_num_bytes(leaf, fi);
4522 extent_num_bytes = ALIGN(new_size -
4523 found_key.offset,
4524 root->sectorsize);
4525 btrfs_set_file_extent_num_bytes(leaf, fi,
4526 extent_num_bytes);
4527 num_dec = (orig_num_bytes -
4528 extent_num_bytes);
4529 if (test_bit(BTRFS_ROOT_REF_COWS,
4530 &root->state) &&
4531 extent_start != 0)
4532 inode_sub_bytes(inode, num_dec);
4533 btrfs_mark_buffer_dirty(leaf);
4534 } else {
4535 extent_num_bytes =
4536 btrfs_file_extent_disk_num_bytes(leaf,
4537 fi);
4538 extent_offset = found_key.offset -
4539 btrfs_file_extent_offset(leaf, fi);
4540
4541
4542 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4543 if (extent_start != 0) {
4544 found_extent = 1;
4545 if (test_bit(BTRFS_ROOT_REF_COWS,
4546 &root->state))
4547 inode_sub_bytes(inode, num_dec);
4548 }
4549 }
4550 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4551
4552
4553
4554
4555 if (!del_item &&
4556 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4557 btrfs_file_extent_other_encoding(leaf, fi) == 0) {
4558
4559
4560
4561
4562
4563
4564 if (btrfs_file_extent_compression(leaf, fi) !=
4565 BTRFS_COMPRESS_NONE && pending_del_nr) {
4566 err = btrfs_del_items(trans, root, path,
4567 pending_del_slot,
4568 pending_del_nr);
4569 if (err) {
4570 btrfs_abort_transaction(trans,
4571 root,
4572 err);
4573 goto error;
4574 }
4575 pending_del_nr = 0;
4576 }
4577
4578 err = truncate_inline_extent(inode, path,
4579 &found_key,
4580 item_end,
4581 new_size);
4582 if (err) {
4583 btrfs_abort_transaction(trans,
4584 root, err);
4585 goto error;
4586 }
4587 } else if (test_bit(BTRFS_ROOT_REF_COWS,
4588 &root->state)) {
4589 inode_sub_bytes(inode, item_end + 1 - new_size);
4590 }
4591 }
4592delete:
4593 if (del_item) {
4594 if (!pending_del_nr) {
4595
4596 pending_del_slot = path->slots[0];
4597 pending_del_nr = 1;
4598 } else if (pending_del_nr &&
4599 path->slots[0] + 1 == pending_del_slot) {
4600
4601 pending_del_nr++;
4602 pending_del_slot = path->slots[0];
4603 } else {
4604 BUG();
4605 }
4606 } else {
4607 break;
4608 }
4609 should_throttle = 0;
4610
4611 if (found_extent &&
4612 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4613 root == root->fs_info->tree_root)) {
4614 btrfs_set_path_blocking(path);
4615 bytes_deleted += extent_num_bytes;
4616 ret = btrfs_free_extent(trans, root, extent_start,
4617 extent_num_bytes, 0,
4618 btrfs_header_owner(leaf),
4619 ino, extent_offset);
4620 BUG_ON(ret);
4621 if (btrfs_should_throttle_delayed_refs(trans, root))
4622 btrfs_async_run_delayed_refs(root,
4623 trans->delayed_ref_updates * 2,
4624 trans->transid, 0);
4625 if (be_nice) {
4626 if (truncate_space_check(trans, root,
4627 extent_num_bytes)) {
4628 should_end = 1;
4629 }
4630 if (btrfs_should_throttle_delayed_refs(trans,
4631 root)) {
4632 should_throttle = 1;
4633 }
4634 }
4635 }
4636
4637 if (found_type == BTRFS_INODE_ITEM_KEY)
4638 break;
4639
4640 if (path->slots[0] == 0 ||
4641 path->slots[0] != pending_del_slot ||
4642 should_throttle || should_end) {
4643 if (pending_del_nr) {
4644 ret = btrfs_del_items(trans, root, path,
4645 pending_del_slot,
4646 pending_del_nr);
4647 if (ret) {
4648 btrfs_abort_transaction(trans,
4649 root, ret);
4650 goto error;
4651 }
4652 pending_del_nr = 0;
4653 }
4654 btrfs_release_path(path);
4655 if (should_throttle) {
4656 unsigned long updates = trans->delayed_ref_updates;
4657 if (updates) {
4658 trans->delayed_ref_updates = 0;
4659 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4660 if (ret && !err)
4661 err = ret;
4662 }
4663 }
4664
4665
4666
4667
4668 if (should_end) {
4669 err = -EAGAIN;
4670 goto error;
4671 }
4672 goto search_again;
4673 } else {
4674 path->slots[0]--;
4675 }
4676 }
4677out:
4678 if (pending_del_nr) {
4679 ret = btrfs_del_items(trans, root, path, pending_del_slot,
4680 pending_del_nr);
4681 if (ret)
4682 btrfs_abort_transaction(trans, root, ret);
4683 }
4684error:
4685 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4686 btrfs_ordered_update_i_size(inode, last_size, NULL);
4687
4688 btrfs_free_path(path);
4689
4690 if (be_nice && bytes_deleted > SZ_32M) {
4691 unsigned long updates = trans->delayed_ref_updates;
4692 if (updates) {
4693 trans->delayed_ref_updates = 0;
4694 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4695 if (ret && !err)
4696 err = ret;
4697 }
4698 }
4699 return err;
4700}
4701
4702
4703
4704
4705
4706
4707
4708
4709
4710
4711
4712
4713int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4714 int front)
4715{
4716 struct address_space *mapping = inode->i_mapping;
4717 struct btrfs_root *root = BTRFS_I(inode)->root;
4718 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4719 struct btrfs_ordered_extent *ordered;
4720 struct extent_state *cached_state = NULL;
4721 char *kaddr;
4722 u32 blocksize = root->sectorsize;
4723 pgoff_t index = from >> PAGE_CACHE_SHIFT;
4724 unsigned offset = from & (blocksize - 1);
4725 struct page *page;
4726 gfp_t mask = btrfs_alloc_write_mask(mapping);
4727 int ret = 0;
4728 u64 block_start;
4729 u64 block_end;
4730
4731 if ((offset & (blocksize - 1)) == 0 &&
4732 (!len || ((len & (blocksize - 1)) == 0)))
4733 goto out;
4734
4735 ret = btrfs_delalloc_reserve_space(inode,
4736 round_down(from, blocksize), blocksize);
4737 if (ret)
4738 goto out;
4739
4740again:
4741 page = find_or_create_page(mapping, index, mask);
4742 if (!page) {
4743 btrfs_delalloc_release_space(inode,
4744 round_down(from, blocksize),
4745 blocksize);
4746 ret = -ENOMEM;
4747 goto out;
4748 }
4749
4750 block_start = round_down(from, blocksize);
4751 block_end = block_start + blocksize - 1;
4752
4753 if (!PageUptodate(page)) {
4754 ret = btrfs_readpage(NULL, page);
4755 lock_page(page);
4756 if (page->mapping != mapping) {
4757 unlock_page(page);
4758 page_cache_release(page);
4759 goto again;
4760 }
4761 if (!PageUptodate(page)) {
4762 ret = -EIO;
4763 goto out_unlock;
4764 }
4765 }
4766 wait_on_page_writeback(page);
4767
4768 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4769 set_page_extent_mapped(page);
4770
4771 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4772 if (ordered) {
4773 unlock_extent_cached(io_tree, block_start, block_end,
4774 &cached_state, GFP_NOFS);
4775 unlock_page(page);
4776 page_cache_release(page);
4777 btrfs_start_ordered_extent(inode, ordered, 1);
4778 btrfs_put_ordered_extent(ordered);
4779 goto again;
4780 }
4781
4782 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4783 EXTENT_DIRTY | EXTENT_DELALLOC |
4784 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4785 0, 0, &cached_state, GFP_NOFS);
4786
4787 ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
4788 &cached_state, 0);
4789 if (ret) {
4790 unlock_extent_cached(io_tree, block_start, block_end,
4791 &cached_state, GFP_NOFS);
4792 goto out_unlock;
4793 }
4794
4795 if (offset != blocksize) {
4796 if (!len)
4797 len = blocksize - offset;
4798 kaddr = kmap(page);
4799 if (front)
4800 memset(kaddr + (block_start - page_offset(page)),
4801 0, offset);
4802 else
4803 memset(kaddr + (block_start - page_offset(page)) + offset,
4804 0, len);
4805 flush_dcache_page(page);
4806 kunmap(page);
4807 }
4808 ClearPageChecked(page);
4809 set_page_dirty(page);
4810 unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
4811 GFP_NOFS);
4812
4813out_unlock:
4814 if (ret)
4815 btrfs_delalloc_release_space(inode, block_start,
4816 blocksize);
4817 unlock_page(page);
4818 page_cache_release(page);
4819out:
4820 return ret;
4821}
4822
4823static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4824 u64 offset, u64 len)
4825{
4826 struct btrfs_trans_handle *trans;
4827 int ret;
4828
4829
4830
4831
4832
4833 if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
4834 BTRFS_I(inode)->last_trans = root->fs_info->generation;
4835 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4836 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4837 return 0;
4838 }
4839
4840
4841
4842
4843
4844
4845 trans = btrfs_start_transaction(root, 3);
4846 if (IS_ERR(trans))
4847 return PTR_ERR(trans);
4848
4849 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4850 if (ret) {
4851 btrfs_abort_transaction(trans, root, ret);
4852 btrfs_end_transaction(trans, root);
4853 return ret;
4854 }
4855
4856 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
4857 0, 0, len, 0, len, 0, 0, 0);
4858 if (ret)
4859 btrfs_abort_transaction(trans, root, ret);
4860 else
4861 btrfs_update_inode(trans, root, inode);
4862 btrfs_end_transaction(trans, root);
4863 return ret;
4864}
4865
4866
4867
4868
4869
4870
4871
4872int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4873{
4874 struct btrfs_root *root = BTRFS_I(inode)->root;
4875 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4876 struct extent_map *em = NULL;
4877 struct extent_state *cached_state = NULL;
4878 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4879 u64 hole_start = ALIGN(oldsize, root->sectorsize);
4880 u64 block_end = ALIGN(size, root->sectorsize);
4881 u64 last_byte;
4882 u64 cur_offset;
4883 u64 hole_size;
4884 int err = 0;
4885
4886
4887
4888
4889
4890
4891 err = btrfs_truncate_block(inode, oldsize, 0, 0);
4892 if (err)
4893 return err;
4894
4895 if (size <= hole_start)
4896 return 0;
4897
4898 while (1) {
4899 struct btrfs_ordered_extent *ordered;
4900
4901 lock_extent_bits(io_tree, hole_start, block_end - 1,
4902 &cached_state);
4903 ordered = btrfs_lookup_ordered_range(inode, hole_start,
4904 block_end - hole_start);
4905 if (!ordered)
4906 break;
4907 unlock_extent_cached(io_tree, hole_start, block_end - 1,
4908 &cached_state, GFP_NOFS);
4909 btrfs_start_ordered_extent(inode, ordered, 1);
4910 btrfs_put_ordered_extent(ordered);
4911 }
4912
4913 cur_offset = hole_start;
4914 while (1) {
4915 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
4916 block_end - cur_offset, 0);
4917 if (IS_ERR(em)) {
4918 err = PTR_ERR(em);
4919 em = NULL;
4920 break;
4921 }
4922 last_byte = min(extent_map_end(em), block_end);
4923 last_byte = ALIGN(last_byte , root->sectorsize);
4924 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4925 struct extent_map *hole_em;
4926 hole_size = last_byte - cur_offset;
4927
4928 err = maybe_insert_hole(root, inode, cur_offset,
4929 hole_size);
4930 if (err)
4931 break;
4932 btrfs_drop_extent_cache(inode, cur_offset,
4933 cur_offset + hole_size - 1, 0);
4934 hole_em = alloc_extent_map();
4935 if (!hole_em) {
4936 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4937 &BTRFS_I(inode)->runtime_flags);
4938 goto next;
4939 }
4940 hole_em->start = cur_offset;
4941 hole_em->len = hole_size;
4942 hole_em->orig_start = cur_offset;
4943
4944 hole_em->block_start = EXTENT_MAP_HOLE;
4945 hole_em->block_len = 0;
4946 hole_em->orig_block_len = 0;
4947 hole_em->ram_bytes = hole_size;
4948 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4949 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4950 hole_em->generation = root->fs_info->generation;
4951
4952 while (1) {
4953 write_lock(&em_tree->lock);
4954 err = add_extent_mapping(em_tree, hole_em, 1);
4955 write_unlock(&em_tree->lock);
4956 if (err != -EEXIST)
4957 break;
4958 btrfs_drop_extent_cache(inode, cur_offset,
4959 cur_offset +
4960 hole_size - 1, 0);
4961 }
4962 free_extent_map(hole_em);
4963 }
4964next:
4965 free_extent_map(em);
4966 em = NULL;
4967 cur_offset = last_byte;
4968 if (cur_offset >= block_end)
4969 break;
4970 }
4971 free_extent_map(em);
4972 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
4973 GFP_NOFS);
4974 return err;
4975}
4976
4977static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4978{
4979 struct btrfs_root *root = BTRFS_I(inode)->root;
4980 struct btrfs_trans_handle *trans;
4981 loff_t oldsize = i_size_read(inode);
4982 loff_t newsize = attr->ia_size;
4983 int mask = attr->ia_valid;
4984 int ret;
4985
4986
4987
4988
4989
4990
4991
4992 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
4993 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
4994
4995 if (newsize > oldsize) {
4996 truncate_pagecache(inode, newsize);
4997
4998
4999
5000
5001
5002
5003
5004 btrfs_wait_for_snapshot_creation(root);
5005 ret = btrfs_cont_expand(inode, oldsize, newsize);
5006 if (ret) {
5007 btrfs_end_write_no_snapshoting(root);
5008 return ret;
5009 }
5010
5011 trans = btrfs_start_transaction(root, 1);
5012 if (IS_ERR(trans)) {
5013 btrfs_end_write_no_snapshoting(root);
5014 return PTR_ERR(trans);
5015 }
5016
5017 i_size_write(inode, newsize);
5018 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
5019 ret = btrfs_update_inode(trans, root, inode);
5020 btrfs_end_write_no_snapshoting(root);
5021 btrfs_end_transaction(trans, root);
5022 } else {
5023
5024
5025
5026
5027
5028
5029 if (newsize == 0)
5030 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
5031 &BTRFS_I(inode)->runtime_flags);
5032
5033
5034
5035
5036
5037 trans = btrfs_start_transaction(root, 2);
5038 if (IS_ERR(trans))
5039 return PTR_ERR(trans);
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051 ret = btrfs_orphan_add(trans, inode);
5052 btrfs_end_transaction(trans, root);
5053 if (ret)
5054 return ret;
5055
5056
5057 truncate_setsize(inode, newsize);
5058
5059
5060 btrfs_inode_block_unlocked_dio(inode);
5061 inode_dio_wait(inode);
5062 btrfs_inode_resume_unlocked_dio(inode);
5063
5064 ret = btrfs_truncate(inode);
5065 if (ret && inode->i_nlink) {
5066 int err;
5067
5068
5069
5070
5071
5072
5073
5074 trans = btrfs_join_transaction(root);
5075 if (IS_ERR(trans)) {
5076 btrfs_orphan_del(NULL, inode);
5077 return ret;
5078 }
5079 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
5080 err = btrfs_orphan_del(trans, inode);
5081 if (err)
5082 btrfs_abort_transaction(trans, root, err);
5083 btrfs_end_transaction(trans, root);
5084 }
5085 }
5086
5087 return ret;
5088}
5089
5090static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
5091{
5092 struct inode *inode = dentry->d_inode;
5093 struct btrfs_root *root = BTRFS_I(inode)->root;
5094 int err;
5095
5096 if (btrfs_root_readonly(root))
5097 return -EROFS;
5098
5099 err = inode_change_ok(inode, attr);
5100 if (err)
5101 return err;
5102
5103 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5104 err = btrfs_setsize(inode, attr);
5105 if (err)
5106 return err;
5107 }
5108
5109 if (attr->ia_valid) {
5110 setattr_copy(inode, attr);
5111 inode_inc_iversion(inode);
5112 err = btrfs_dirty_inode(inode);
5113
5114 if (!err && attr->ia_valid & ATTR_MODE)
5115 err = btrfs_acl_chmod(inode);
5116 }
5117
5118 return err;
5119}
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133static void evict_inode_truncate_pages(struct inode *inode)
5134{
5135 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5136 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5137 struct rb_node *node;
5138
5139 ASSERT(inode->i_state & I_FREEING);
5140 truncate_inode_pages(&inode->i_data, 0);
5141
5142 write_lock(&map_tree->lock);
5143 while (!RB_EMPTY_ROOT(&map_tree->map)) {
5144 struct extent_map *em;
5145
5146 node = rb_first(&map_tree->map);
5147 em = rb_entry(node, struct extent_map, rb_node);
5148 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5149 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5150 remove_extent_mapping(map_tree, em);
5151 free_extent_map(em);
5152 if (need_resched()) {
5153 write_unlock(&map_tree->lock);
5154 cond_resched();
5155 write_lock(&map_tree->lock);
5156 }
5157 }
5158 write_unlock(&map_tree->lock);
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176 spin_lock(&io_tree->lock);
5177 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5178 struct extent_state *state;
5179 struct extent_state *cached_state = NULL;
5180 u64 start;
5181 u64 end;
5182
5183 node = rb_first(&io_tree->state);
5184 state = rb_entry(node, struct extent_state, rb_node);
5185 start = state->start;
5186 end = state->end;
5187 spin_unlock(&io_tree->lock);
5188
5189 lock_extent_bits(io_tree, start, end, &cached_state);
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199 if (state->state & EXTENT_DELALLOC)
5200 btrfs_qgroup_free_data(inode, start, end - start + 1);
5201
5202 clear_extent_bit(io_tree, start, end,
5203 EXTENT_LOCKED | EXTENT_DIRTY |
5204 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
5205 EXTENT_DEFRAG, 1, 1,
5206 &cached_state, GFP_NOFS);
5207
5208 cond_resched();
5209 spin_lock(&io_tree->lock);
5210 }
5211 spin_unlock(&io_tree->lock);
5212}
5213
5214void btrfs_evict_inode(struct inode *inode)
5215{
5216 struct btrfs_trans_handle *trans;
5217 struct btrfs_root *root = BTRFS_I(inode)->root;
5218 struct btrfs_block_rsv *rsv, *global_rsv;
5219 int steal_from_global = 0;
5220 u64 min_size;
5221 int ret;
5222
5223 trace_btrfs_inode_evict(inode);
5224
5225 if (!root) {
5226 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
5227 return;
5228 }
5229
5230 min_size = btrfs_calc_trunc_metadata_size(root, 1);
5231
5232 evict_inode_truncate_pages(inode);
5233
5234 if (inode->i_nlink &&
5235 ((btrfs_root_refs(&root->root_item) != 0 &&
5236 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5237 btrfs_is_free_space_inode(inode)))
5238 goto no_delete;
5239
5240 if (is_bad_inode(inode)) {
5241 btrfs_orphan_del(NULL, inode);
5242 goto no_delete;
5243 }
5244
5245 if (!special_file(inode->i_mode))
5246 btrfs_wait_ordered_range(inode, 0, (u64)-1);
5247
5248 btrfs_free_io_failure_record(inode, 0, (u64)-1);
5249
5250 if (test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
5251 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
5252 &BTRFS_I(inode)->runtime_flags));
5253 goto no_delete;
5254 }
5255
5256 if (inode->i_nlink > 0) {
5257 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5258 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5259 goto no_delete;
5260 }
5261
5262 ret = btrfs_commit_inode_delayed_inode(inode);
5263 if (ret) {
5264 btrfs_orphan_del(NULL, inode);
5265 goto no_delete;
5266 }
5267
5268 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
5269 if (!rsv) {
5270 btrfs_orphan_del(NULL, inode);
5271 goto no_delete;
5272 }
5273 rsv->size = min_size;
5274 rsv->failfast = 1;
5275 global_rsv = &root->fs_info->global_block_rsv;
5276
5277 btrfs_i_size_write(inode, 0);
5278
5279
5280
5281
5282
5283
5284
5285 while (1) {
5286 ret = btrfs_block_rsv_refill(root, rsv, min_size,
5287 BTRFS_RESERVE_FLUSH_LIMIT);
5288
5289
5290
5291
5292
5293
5294 if (ret)
5295 steal_from_global++;
5296 else
5297 steal_from_global = 0;
5298 ret = 0;
5299
5300
5301
5302
5303
5304
5305
5306
5307
5308 if (steal_from_global > 2) {
5309 btrfs_warn(root->fs_info,
5310 "Could not get space for a delete, will truncate on mount %d",
5311 ret);
5312 btrfs_orphan_del(NULL, inode);
5313 btrfs_free_block_rsv(root, rsv);
5314 goto no_delete;
5315 }
5316
5317 trans = btrfs_join_transaction(root);
5318 if (IS_ERR(trans)) {
5319 btrfs_orphan_del(NULL, inode);
5320 btrfs_free_block_rsv(root, rsv);
5321 goto no_delete;
5322 }
5323
5324
5325
5326
5327
5328
5329 if (steal_from_global) {
5330 if (!btrfs_check_space_for_delayed_refs(trans, root))
5331 ret = btrfs_block_rsv_migrate(global_rsv, rsv,
5332 min_size, 0);
5333 else
5334 ret = -ENOSPC;
5335 }
5336
5337
5338
5339
5340
5341
5342 if (ret) {
5343 ret = btrfs_commit_transaction(trans, root);
5344 if (ret) {
5345 btrfs_orphan_del(NULL, inode);
5346 btrfs_free_block_rsv(root, rsv);
5347 goto no_delete;
5348 }
5349 continue;
5350 } else {
5351 steal_from_global = 0;
5352 }
5353
5354 trans->block_rsv = rsv;
5355
5356 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5357 if (ret != -ENOSPC && ret != -EAGAIN)
5358 break;
5359
5360 trans->block_rsv = &root->fs_info->trans_block_rsv;
5361 btrfs_end_transaction(trans, root);
5362 trans = NULL;
5363 btrfs_btree_balance_dirty(root);
5364 }
5365
5366 btrfs_free_block_rsv(root, rsv);
5367
5368
5369
5370
5371
5372 if (ret == 0) {
5373 trans->block_rsv = root->orphan_block_rsv;
5374 btrfs_orphan_del(trans, inode);
5375 } else {
5376 btrfs_orphan_del(NULL, inode);
5377 }
5378
5379 trans->block_rsv = &root->fs_info->trans_block_rsv;
5380 if (!(root == root->fs_info->tree_root ||
5381 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
5382 btrfs_return_ino(root, btrfs_ino(inode));
5383
5384 btrfs_end_transaction(trans, root);
5385 btrfs_btree_balance_dirty(root);
5386no_delete:
5387 btrfs_remove_delayed_node(inode);
5388 clear_inode(inode);
5389}
5390
5391
5392
5393
5394
5395static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5396 struct btrfs_key *location)
5397{
5398 const char *name = dentry->d_name.name;
5399 int namelen = dentry->d_name.len;
5400 struct btrfs_dir_item *di;
5401 struct btrfs_path *path;
5402 struct btrfs_root *root = BTRFS_I(dir)->root;
5403 int ret = 0;
5404
5405 path = btrfs_alloc_path();
5406 if (!path)
5407 return -ENOMEM;
5408
5409 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
5410 namelen, 0);
5411 if (IS_ERR(di))
5412 ret = PTR_ERR(di);
5413
5414 if (IS_ERR_OR_NULL(di))
5415 goto out_err;
5416
5417 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5418out:
5419 btrfs_free_path(path);
5420 return ret;
5421out_err:
5422 location->objectid = 0;
5423 goto out;
5424}
5425
5426
5427
5428
5429
5430
5431static int fixup_tree_root_location(struct btrfs_root *root,
5432 struct inode *dir,
5433 struct dentry *dentry,
5434 struct btrfs_key *location,
5435 struct btrfs_root **sub_root)
5436{
5437 struct btrfs_path *path;
5438 struct btrfs_root *new_root;
5439 struct btrfs_root_ref *ref;
5440 struct extent_buffer *leaf;
5441 struct btrfs_key key;
5442 int ret;
5443 int err = 0;
5444
5445 path = btrfs_alloc_path();
5446 if (!path) {
5447 err = -ENOMEM;
5448 goto out;
5449 }
5450
5451 err = -ENOENT;
5452 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5453 key.type = BTRFS_ROOT_REF_KEY;
5454 key.offset = location->objectid;
5455
5456 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
5457 0, 0);
5458 if (ret) {
5459 if (ret < 0)
5460 err = ret;
5461 goto out;
5462 }
5463
5464 leaf = path->nodes[0];
5465 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5466 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
5467 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5468 goto out;
5469
5470 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5471 (unsigned long)(ref + 1),
5472 dentry->d_name.len);
5473 if (ret)
5474 goto out;
5475
5476 btrfs_release_path(path);
5477
5478 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
5479 if (IS_ERR(new_root)) {
5480 err = PTR_ERR(new_root);
5481 goto out;
5482 }
5483
5484 *sub_root = new_root;
5485 location->objectid = btrfs_root_dirid(&new_root->root_item);
5486 location->type = BTRFS_INODE_ITEM_KEY;
5487 location->offset = 0;
5488 err = 0;
5489out:
5490 btrfs_free_path(path);
5491 return err;
5492}
5493
5494static void inode_tree_add(struct inode *inode)
5495{
5496 struct btrfs_root *root = BTRFS_I(inode)->root;
5497 struct btrfs_inode *entry;
5498 struct rb_node **p;
5499 struct rb_node *parent;
5500 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5501 u64 ino = btrfs_ino(inode);
5502
5503 if (inode_unhashed(inode))
5504 return;
5505 parent = NULL;
5506 spin_lock(&root->inode_lock);
5507 p = &root->inode_tree.rb_node;
5508 while (*p) {
5509 parent = *p;
5510 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5511
5512 if (ino < btrfs_ino(&entry->vfs_inode))
5513 p = &parent->rb_left;
5514 else if (ino > btrfs_ino(&entry->vfs_inode))
5515 p = &parent->rb_right;
5516 else {
5517 WARN_ON(!(entry->vfs_inode.i_state &
5518 (I_WILL_FREE | I_FREEING)));
5519 rb_replace_node(parent, new, &root->inode_tree);
5520 RB_CLEAR_NODE(parent);
5521 spin_unlock(&root->inode_lock);
5522 return;
5523 }
5524 }
5525 rb_link_node(new, parent, p);
5526 rb_insert_color(new, &root->inode_tree);
5527 spin_unlock(&root->inode_lock);
5528}
5529
5530static void inode_tree_del(struct inode *inode)
5531{
5532 struct btrfs_root *root = BTRFS_I(inode)->root;
5533 int empty = 0;
5534
5535 spin_lock(&root->inode_lock);
5536 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5537 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5538 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5539 empty = RB_EMPTY_ROOT(&root->inode_tree);
5540 }
5541 spin_unlock(&root->inode_lock);
5542
5543 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5544 synchronize_srcu(&root->fs_info->subvol_srcu);
5545 spin_lock(&root->inode_lock);
5546 empty = RB_EMPTY_ROOT(&root->inode_tree);
5547 spin_unlock(&root->inode_lock);
5548 if (empty)
5549 btrfs_add_dead_root(root);
5550 }
5551}
5552
5553void btrfs_invalidate_inodes(struct btrfs_root *root)
5554{
5555 struct rb_node *node;
5556 struct rb_node *prev;
5557 struct btrfs_inode *entry;
5558 struct inode *inode;
5559 u64 objectid = 0;
5560
5561 if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
5562 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
5563
5564 spin_lock(&root->inode_lock);
5565again:
5566 node = root->inode_tree.rb_node;
5567 prev = NULL;
5568 while (node) {
5569 prev = node;
5570 entry = rb_entry(node, struct btrfs_inode, rb_node);
5571
5572 if (objectid < btrfs_ino(&entry->vfs_inode))
5573 node = node->rb_left;
5574 else if (objectid > btrfs_ino(&entry->vfs_inode))
5575 node = node->rb_right;
5576 else
5577 break;
5578 }
5579 if (!node) {
5580 while (prev) {
5581 entry = rb_entry(prev, struct btrfs_inode, rb_node);
5582 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
5583 node = prev;
5584 break;
5585 }
5586 prev = rb_next(prev);
5587 }
5588 }
5589 while (node) {
5590 entry = rb_entry(node, struct btrfs_inode, rb_node);
5591 objectid = btrfs_ino(&entry->vfs_inode) + 1;
5592 inode = igrab(&entry->vfs_inode);
5593 if (inode) {
5594 spin_unlock(&root->inode_lock);
5595 if (atomic_read(&inode->i_count) > 1)
5596 d_prune_aliases(inode);
5597
5598
5599
5600
5601
5602 iput(inode);
5603 cond_resched();
5604 spin_lock(&root->inode_lock);
5605 goto again;
5606 }
5607
5608 if (cond_resched_lock(&root->inode_lock))
5609 goto again;
5610
5611 node = rb_next(node);
5612 }
5613 spin_unlock(&root->inode_lock);
5614}
5615
5616static int btrfs_init_locked_inode(struct inode *inode, void *p)
5617{
5618 struct btrfs_iget_args *args = p;
5619 inode->i_ino = args->location->objectid;
5620 memcpy(&BTRFS_I(inode)->location, args->location,
5621 sizeof(*args->location));
5622 BTRFS_I(inode)->root = args->root;
5623 return 0;
5624}
5625
5626static int btrfs_find_actor(struct inode *inode, void *opaque)
5627{
5628 struct btrfs_iget_args *args = opaque;
5629 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
5630 args->root == BTRFS_I(inode)->root;
5631}
5632
5633static struct inode *btrfs_iget_locked(struct super_block *s,
5634 struct btrfs_key *location,
5635 struct btrfs_root *root)
5636{
5637 struct inode *inode;
5638 struct btrfs_iget_args args;
5639 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
5640
5641 args.location = location;
5642 args.root = root;
5643
5644 inode = iget5_locked(s, hashval, btrfs_find_actor,
5645 btrfs_init_locked_inode,
5646 (void *)&args);
5647 return inode;
5648}
5649
5650
5651
5652
5653struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5654 struct btrfs_root *root, int *new)
5655{
5656 struct inode *inode;
5657
5658 inode = btrfs_iget_locked(s, location, root);
5659 if (!inode)
5660 return ERR_PTR(-ENOMEM);
5661
5662 if (inode->i_state & I_NEW) {
5663 int ret;
5664
5665 ret = btrfs_read_locked_inode(inode);
5666 if (!is_bad_inode(inode)) {
5667 inode_tree_add(inode);
5668 unlock_new_inode(inode);
5669 if (new)
5670 *new = 1;
5671 } else {
5672 unlock_new_inode(inode);
5673 iput(inode);
5674 ASSERT(ret < 0);
5675 inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
5676 }
5677 }
5678
5679 return inode;
5680}
5681
5682static struct inode *new_simple_dir(struct super_block *s,
5683 struct btrfs_key *key,
5684 struct btrfs_root *root)
5685{
5686 struct inode *inode = new_inode(s);
5687
5688 if (!inode)
5689 return ERR_PTR(-ENOMEM);
5690
5691 BTRFS_I(inode)->root = root;
5692 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5693 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5694
5695 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5696 inode->i_op = &btrfs_dir_ro_inode_operations;
5697 inode->i_fop = &simple_dir_operations;
5698 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5699 inode->i_mtime = current_fs_time(inode->i_sb);
5700 inode->i_atime = inode->i_mtime;
5701 inode->i_ctime = inode->i_mtime;
5702 BTRFS_I(inode)->i_otime = inode->i_mtime;
5703
5704 return inode;
5705}
5706
5707struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5708{
5709 struct inode *inode;
5710 struct btrfs_root *root = BTRFS_I(dir)->root;
5711 struct btrfs_root *sub_root = root;
5712 struct btrfs_key location;
5713 int index;
5714 int ret = 0;
5715
5716 if (dentry->d_name.len > BTRFS_NAME_LEN)
5717 return ERR_PTR(-ENAMETOOLONG);
5718
5719 ret = btrfs_inode_by_name(dir, dentry, &location);
5720 if (ret < 0)
5721 return ERR_PTR(ret);
5722
5723 if (location.objectid == 0)
5724 return ERR_PTR(-ENOENT);
5725
5726 if (location.type == BTRFS_INODE_ITEM_KEY) {
5727 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5728 return inode;
5729 }
5730
5731 BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
5732
5733 index = srcu_read_lock(&root->fs_info->subvol_srcu);
5734 ret = fixup_tree_root_location(root, dir, dentry,
5735 &location, &sub_root);
5736 if (ret < 0) {
5737 if (ret != -ENOENT)
5738 inode = ERR_PTR(ret);
5739 else
5740 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5741 } else {
5742 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
5743 }
5744 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
5745
5746 if (!IS_ERR(inode) && root != sub_root) {
5747 down_read(&root->fs_info->cleanup_work_sem);
5748 if (!(inode->i_sb->s_flags & MS_RDONLY))
5749 ret = btrfs_orphan_cleanup(sub_root);
5750 up_read(&root->fs_info->cleanup_work_sem);
5751 if (ret) {
5752 iput(inode);
5753 inode = ERR_PTR(ret);
5754 }
5755 }
5756
5757 return inode;
5758}
5759
5760static int btrfs_dentry_delete(const struct dentry *dentry)
5761{
5762 struct btrfs_root *root;
5763 struct inode *inode = dentry->d_inode;
5764
5765 if (!inode && !IS_ROOT(dentry))
5766 inode = dentry->d_parent->d_inode;
5767
5768 if (inode) {
5769 root = BTRFS_I(inode)->root;
5770 if (btrfs_root_refs(&root->root_item) == 0)
5771 return 1;
5772
5773 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5774 return 1;
5775 }
5776 return 0;
5777}
5778
5779static void btrfs_dentry_release(struct dentry *dentry)
5780{
5781 kfree(dentry->d_fsdata);
5782}
5783
5784static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5785 unsigned int flags)
5786{
5787 struct inode *inode;
5788
5789 inode = btrfs_lookup_dentry(dir, dentry);
5790 if (IS_ERR(inode)) {
5791 if (PTR_ERR(inode) == -ENOENT)
5792 inode = NULL;
5793 else
5794 return ERR_CAST(inode);
5795 }
5796
5797 return d_materialise_unique(dentry, inode);
5798}
5799
5800unsigned char btrfs_filetype_table[] = {
5801 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5802};
5803
5804static int btrfs_real_readdir(struct file *filp, void *dirent,
5805 filldir_t filldir)
5806{
5807 struct inode *inode = file_inode(filp);
5808 struct btrfs_root *root = BTRFS_I(inode)->root;
5809 struct btrfs_item *item;
5810 struct btrfs_dir_item *di;
5811 struct btrfs_key key;
5812 struct btrfs_key found_key;
5813 struct btrfs_path *path;
5814 struct list_head ins_list;
5815 struct list_head del_list;
5816 int ret;
5817 struct extent_buffer *leaf;
5818 int slot;
5819 unsigned char d_type;
5820 int over = 0;
5821 u32 di_cur;
5822 u32 di_total;
5823 u32 di_len;
5824 int key_type = BTRFS_DIR_INDEX_KEY;
5825 char tmp_name[32];
5826 char *name_ptr;
5827 int name_len;
5828 int is_curr = 0;
5829 bool emitted;
5830
5831
5832 if (root->fs_info->tree_root == root)
5833 key_type = BTRFS_DIR_ITEM_KEY;
5834
5835
5836 if (filp->f_pos == 0) {
5837 over = filldir(dirent, ".", 1,
5838 filp->f_pos, btrfs_ino(inode), DT_DIR);
5839 if (over)
5840 return 0;
5841 filp->f_pos = 1;
5842 }
5843
5844 if (filp->f_pos == 1) {
5845 u64 pino = parent_ino(filp->f_path.dentry);
5846 over = filldir(dirent, "..", 2,
5847 filp->f_pos, pino, DT_DIR);
5848 if (over)
5849 return 0;
5850 filp->f_pos = 2;
5851 }
5852 path = btrfs_alloc_path();
5853 if (!path)
5854 return -ENOMEM;
5855
5856 path->reada = READA_FORWARD;
5857
5858 if (key_type == BTRFS_DIR_INDEX_KEY) {
5859 INIT_LIST_HEAD(&ins_list);
5860 INIT_LIST_HEAD(&del_list);
5861 btrfs_get_delayed_items(inode, &ins_list, &del_list);
5862 }
5863
5864 key.type = key_type;
5865 key.offset = filp->f_pos;
5866 key.objectid = btrfs_ino(inode);
5867
5868 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5869 if (ret < 0)
5870 goto err;
5871
5872 emitted = false;
5873 while (1) {
5874 leaf = path->nodes[0];
5875 slot = path->slots[0];
5876 if (slot >= btrfs_header_nritems(leaf)) {
5877 ret = btrfs_next_leaf(root, path);
5878 if (ret < 0)
5879 goto err;
5880 else if (ret > 0)
5881 break;
5882 continue;
5883 }
5884
5885 item = btrfs_item_nr(slot);
5886 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5887
5888 if (found_key.objectid != key.objectid)
5889 break;
5890 if (found_key.type != key_type)
5891 break;
5892 if (found_key.offset < filp->f_pos)
5893 goto next;
5894 if (key_type == BTRFS_DIR_INDEX_KEY &&
5895 btrfs_should_delete_dir_index(&del_list,
5896 found_key.offset))
5897 goto next;
5898
5899 filp->f_pos = found_key.offset;
5900 is_curr = 1;
5901
5902 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5903 di_cur = 0;
5904 di_total = btrfs_item_size(leaf, item);
5905
5906 while (di_cur < di_total) {
5907 struct btrfs_key location;
5908
5909 if (verify_dir_item(root, leaf, di))
5910 break;
5911
5912 name_len = btrfs_dir_name_len(leaf, di);
5913 if (name_len <= sizeof(tmp_name)) {
5914 name_ptr = tmp_name;
5915 } else {
5916 name_ptr = kmalloc(name_len, GFP_KERNEL);
5917 if (!name_ptr) {
5918 ret = -ENOMEM;
5919 goto err;
5920 }
5921 }
5922 read_extent_buffer(leaf, name_ptr,
5923 (unsigned long)(di + 1), name_len);
5924
5925 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
5926 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5927
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938 if (location.type == BTRFS_ROOT_ITEM_KEY &&
5939 location.objectid == root->root_key.objectid) {
5940 over = 0;
5941 goto skip;
5942 }
5943 over = filldir(dirent, name_ptr, name_len,
5944 found_key.offset, location.objectid,
5945 d_type);
5946
5947skip:
5948 if (name_ptr != tmp_name)
5949 kfree(name_ptr);
5950
5951 if (over)
5952 goto nopos;
5953 emitted = true;
5954 di_len = btrfs_dir_name_len(leaf, di) +
5955 btrfs_dir_data_len(leaf, di) + sizeof(*di);
5956 di_cur += di_len;
5957 di = (struct btrfs_dir_item *)((char *)di + di_len);
5958 }
5959next:
5960 path->slots[0]++;
5961 }
5962
5963 if (key_type == BTRFS_DIR_INDEX_KEY) {
5964 if (is_curr)
5965 filp->f_pos++;
5966 ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
5967 &ins_list, &emitted);
5968 if (ret)
5969 goto nopos;
5970 }
5971
5972
5973
5974
5975
5976
5977
5978 if (filp->f_pos > 2 && !emitted)
5979 goto nopos;
5980
5981
5982 filp->f_pos++;
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000 if (key_type == BTRFS_DIR_INDEX_KEY) {
6001 if (filp->f_pos >= INT_MAX)
6002 filp->f_pos = LLONG_MAX;
6003 else
6004 filp->f_pos = INT_MAX;
6005 }
6006nopos:
6007 ret = 0;
6008err:
6009 if (key_type == BTRFS_DIR_INDEX_KEY)
6010 btrfs_put_delayed_items(&ins_list, &del_list);
6011 btrfs_free_path(path);
6012 return ret;
6013}
6014
6015int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
6016{
6017 struct btrfs_root *root = BTRFS_I(inode)->root;
6018 struct btrfs_trans_handle *trans;
6019 int ret = 0;
6020 bool nolock = false;
6021
6022 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6023 return 0;
6024
6025 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
6026 nolock = true;
6027
6028 if (wbc->sync_mode == WB_SYNC_ALL) {
6029 if (nolock)
6030 trans = btrfs_join_transaction_nolock(root);
6031 else
6032 trans = btrfs_join_transaction(root);
6033 if (IS_ERR(trans))
6034 return PTR_ERR(trans);
6035 ret = btrfs_commit_transaction(trans, root);
6036 }
6037 return ret;
6038}
6039
6040
6041
6042
6043
6044
6045
6046static int btrfs_dirty_inode(struct inode *inode)
6047{
6048 struct btrfs_root *root = BTRFS_I(inode)->root;
6049 struct btrfs_trans_handle *trans;
6050 int ret;
6051
6052 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
6053 return 0;
6054
6055 trans = btrfs_join_transaction(root);
6056 if (IS_ERR(trans))
6057 return PTR_ERR(trans);
6058
6059 ret = btrfs_update_inode(trans, root, inode);
6060 if (ret && ret == -ENOSPC) {
6061
6062 btrfs_end_transaction(trans, root);
6063 trans = btrfs_start_transaction(root, 1);
6064 if (IS_ERR(trans))
6065 return PTR_ERR(trans);
6066
6067 ret = btrfs_update_inode(trans, root, inode);
6068 }
6069 btrfs_end_transaction(trans, root);
6070 if (BTRFS_I(inode)->delayed_node)
6071 btrfs_balance_delayed_items(root);
6072
6073 return ret;
6074}
6075
6076
6077
6078
6079
6080static int btrfs_update_time(struct inode *inode, struct timespec *now,
6081 int flags)
6082{
6083 struct btrfs_root *root = BTRFS_I(inode)->root;
6084
6085 if (btrfs_root_readonly(root))
6086 return -EROFS;
6087
6088 if (flags & S_VERSION)
6089 inode_inc_iversion(inode);
6090 if (flags & S_CTIME)
6091 inode->i_ctime = *now;
6092 if (flags & S_MTIME)
6093 inode->i_mtime = *now;
6094 if (flags & S_ATIME)
6095 inode->i_atime = *now;
6096 return btrfs_dirty_inode(inode);
6097}
6098
6099
6100
6101
6102
6103
6104static int btrfs_set_inode_index_count(struct inode *inode)
6105{
6106 struct btrfs_root *root = BTRFS_I(inode)->root;
6107 struct btrfs_key key, found_key;
6108 struct btrfs_path *path;
6109 struct extent_buffer *leaf;
6110 int ret;
6111
6112 key.objectid = btrfs_ino(inode);
6113 key.type = BTRFS_DIR_INDEX_KEY;
6114 key.offset = (u64)-1;
6115
6116 path = btrfs_alloc_path();
6117 if (!path)
6118 return -ENOMEM;
6119
6120 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6121 if (ret < 0)
6122 goto out;
6123
6124 if (ret == 0)
6125 goto out;
6126 ret = 0;
6127
6128
6129
6130
6131
6132
6133
6134 if (path->slots[0] == 0) {
6135 BTRFS_I(inode)->index_cnt = 2;
6136 goto out;
6137 }
6138
6139 path->slots[0]--;
6140
6141 leaf = path->nodes[0];
6142 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6143
6144 if (found_key.objectid != btrfs_ino(inode) ||
6145 found_key.type != BTRFS_DIR_INDEX_KEY) {
6146 BTRFS_I(inode)->index_cnt = 2;
6147 goto out;
6148 }
6149
6150 BTRFS_I(inode)->index_cnt = found_key.offset + 1;
6151out:
6152 btrfs_free_path(path);
6153 return ret;
6154}
6155
6156
6157
6158
6159
6160int btrfs_set_inode_index(struct inode *dir, u64 *index)
6161{
6162 int ret = 0;
6163
6164 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
6165 ret = btrfs_inode_delayed_dir_index_count(dir);
6166 if (ret) {
6167 ret = btrfs_set_inode_index_count(dir);
6168 if (ret)
6169 return ret;
6170 }
6171 }
6172
6173 *index = BTRFS_I(dir)->index_cnt;
6174 BTRFS_I(dir)->index_cnt++;
6175
6176 return ret;
6177}
6178
6179static int btrfs_insert_inode_locked(struct inode *inode)
6180{
6181 struct btrfs_iget_args args;
6182 args.location = &BTRFS_I(inode)->location;
6183 args.root = BTRFS_I(inode)->root;
6184
6185 return insert_inode_locked4(inode,
6186 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6187 btrfs_find_actor, &args);
6188}
6189
6190static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6191 struct btrfs_root *root,
6192 struct inode *dir,
6193 const char *name, int name_len,
6194 u64 ref_objectid, u64 objectid,
6195 umode_t mode, u64 *index)
6196{
6197 struct inode *inode;
6198 struct btrfs_inode_item *inode_item;
6199 struct btrfs_key *location;
6200 struct btrfs_path *path;
6201 struct btrfs_inode_ref *ref;
6202 struct btrfs_key key[2];
6203 u32 sizes[2];
6204 unsigned long ptr;
6205 int ret;
6206
6207 path = btrfs_alloc_path();
6208 if (!path)
6209 return ERR_PTR(-ENOMEM);
6210
6211 inode = new_inode(root->fs_info->sb);
6212 if (!inode) {
6213 btrfs_free_path(path);
6214 return ERR_PTR(-ENOMEM);
6215 }
6216
6217
6218
6219
6220
6221 inode->i_ino = objectid;
6222
6223 if (dir) {
6224 trace_btrfs_inode_request(dir);
6225
6226 ret = btrfs_set_inode_index(dir, index);
6227 if (ret) {
6228 btrfs_free_path(path);
6229 iput(inode);
6230 return ERR_PTR(ret);
6231 }
6232 }
6233
6234
6235
6236
6237
6238 BTRFS_I(inode)->index_cnt = 2;
6239 BTRFS_I(inode)->dir_index = *index;
6240 BTRFS_I(inode)->root = root;
6241 BTRFS_I(inode)->generation = trans->transid;
6242 inode->i_generation = BTRFS_I(inode)->generation;
6243
6244
6245
6246
6247
6248
6249
6250 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6251
6252 key[0].objectid = objectid;
6253 key[0].type = BTRFS_INODE_ITEM_KEY;
6254 key[0].offset = 0;
6255
6256
6257
6258
6259
6260
6261
6262 key[1].objectid = objectid;
6263 key[1].type = BTRFS_INODE_REF_KEY;
6264 key[1].offset = ref_objectid;
6265
6266 sizes[0] = sizeof(struct btrfs_inode_item);
6267 sizes[1] = name_len + sizeof(*ref);
6268
6269 location = &BTRFS_I(inode)->location;
6270 location->objectid = objectid;
6271 location->offset = 0;
6272 location->type = BTRFS_INODE_ITEM_KEY;
6273
6274 ret = btrfs_insert_inode_locked(inode);
6275 if (ret < 0)
6276 goto fail;
6277
6278 path->leave_spinning = 1;
6279 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
6280 if (ret != 0)
6281 goto fail_unlock;
6282
6283 inode_init_owner(inode, dir, mode);
6284 inode_set_bytes(inode, 0);
6285
6286 inode->i_mtime = current_fs_time(inode->i_sb);
6287 inode->i_atime = inode->i_mtime;
6288 inode->i_ctime = inode->i_mtime;
6289 BTRFS_I(inode)->i_otime = inode->i_mtime;
6290
6291 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6292 struct btrfs_inode_item);
6293 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
6294 sizeof(*inode_item));
6295 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6296
6297 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6298 struct btrfs_inode_ref);
6299 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6300 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6301 ptr = (unsigned long)(ref + 1);
6302 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6303
6304 btrfs_mark_buffer_dirty(path->nodes[0]);
6305 btrfs_free_path(path);
6306
6307 btrfs_inherit_iflags(inode, dir);
6308
6309 if (S_ISREG(mode)) {
6310 if (btrfs_test_opt(root->fs_info, NODATASUM))
6311 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6312 if (btrfs_test_opt(root->fs_info, NODATACOW))
6313 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6314 BTRFS_INODE_NODATASUM;
6315 }
6316
6317 inode_tree_add(inode);
6318
6319 trace_btrfs_inode_new(inode);
6320 btrfs_set_inode_last_trans(trans, inode);
6321
6322 btrfs_update_root_times(trans, root);
6323
6324 ret = btrfs_inode_inherit_props(trans, inode, dir);
6325 if (ret)
6326 btrfs_err(root->fs_info,
6327 "error inheriting props for ino %llu (root %llu): %d",
6328 btrfs_ino(inode), root->root_key.objectid, ret);
6329
6330 return inode;
6331
6332fail_unlock:
6333 unlock_new_inode(inode);
6334fail:
6335 if (dir)
6336 BTRFS_I(dir)->index_cnt--;
6337 btrfs_free_path(path);
6338 iput(inode);
6339 return ERR_PTR(ret);
6340}
6341
6342static inline u8 btrfs_inode_type(struct inode *inode)
6343{
6344 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
6345}
6346
6347
6348
6349
6350
6351
6352
6353int btrfs_add_link(struct btrfs_trans_handle *trans,
6354 struct inode *parent_inode, struct inode *inode,
6355 const char *name, int name_len, int add_backref, u64 index)
6356{
6357 int ret = 0;
6358 struct btrfs_key key;
6359 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
6360 u64 ino = btrfs_ino(inode);
6361 u64 parent_ino = btrfs_ino(parent_inode);
6362
6363 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6364 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
6365 } else {
6366 key.objectid = ino;
6367 key.type = BTRFS_INODE_ITEM_KEY;
6368 key.offset = 0;
6369 }
6370
6371 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6372 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
6373 key.objectid, root->root_key.objectid,
6374 parent_ino, index, name, name_len);
6375 } else if (add_backref) {
6376 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6377 parent_ino, index);
6378 }
6379
6380
6381 if (ret)
6382 return ret;
6383
6384 ret = btrfs_insert_dir_item(trans, root, name, name_len,
6385 parent_inode, &key,
6386 btrfs_inode_type(inode), index);
6387 if (ret == -EEXIST || ret == -EOVERFLOW)
6388 goto fail_dir_item;
6389 else if (ret) {
6390 btrfs_abort_transaction(trans, root, ret);
6391 return ret;
6392 }
6393
6394 btrfs_i_size_write(parent_inode, parent_inode->i_size +
6395 name_len * 2);
6396 inode_inc_iversion(parent_inode);
6397 parent_inode->i_mtime = parent_inode->i_ctime =
6398 current_fs_time(parent_inode->i_sb);
6399 ret = btrfs_update_inode(trans, root, parent_inode);
6400 if (ret)
6401 btrfs_abort_transaction(trans, root, ret);
6402 return ret;
6403
6404fail_dir_item:
6405 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6406 u64 local_index;
6407 int err;
6408 err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
6409 key.objectid, root->root_key.objectid,
6410 parent_ino, &local_index, name, name_len);
6411
6412 } else if (add_backref) {
6413 u64 local_index;
6414 int err;
6415
6416 err = btrfs_del_inode_ref(trans, root, name, name_len,
6417 ino, parent_ino, &local_index);
6418 }
6419 return ret;
6420}
6421
6422static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6423 struct inode *dir, struct dentry *dentry,
6424 struct inode *inode, int backref, u64 index)
6425{
6426 int err = btrfs_add_link(trans, dir, inode,
6427 dentry->d_name.name, dentry->d_name.len,
6428 backref, index);
6429 if (err > 0)
6430 err = -EEXIST;
6431 return err;
6432}
6433
6434static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6435 umode_t mode, dev_t rdev)
6436{
6437 struct btrfs_trans_handle *trans;
6438 struct btrfs_root *root = BTRFS_I(dir)->root;
6439 struct inode *inode = NULL;
6440 int err;
6441 int drop_inode = 0;
6442 u64 objectid;
6443 u64 index = 0;
6444
6445
6446
6447
6448
6449
6450 trans = btrfs_start_transaction(root, 5);
6451 if (IS_ERR(trans))
6452 return PTR_ERR(trans);
6453
6454 err = btrfs_find_free_ino(root, &objectid);
6455 if (err)
6456 goto out_unlock;
6457
6458 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6459 dentry->d_name.len, btrfs_ino(dir), objectid,
6460 mode, &index);
6461 if (IS_ERR(inode)) {
6462 err = PTR_ERR(inode);
6463 goto out_unlock;
6464 }
6465
6466
6467
6468
6469
6470
6471
6472 inode->i_op = &btrfs_special_inode_operations;
6473 init_special_inode(inode, inode->i_mode, rdev);
6474
6475 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6476 if (err)
6477 goto out_unlock_inode;
6478
6479 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6480 if (err) {
6481 goto out_unlock_inode;
6482 } else {
6483 btrfs_update_inode(trans, root, inode);
6484 unlock_new_inode(inode);
6485 d_instantiate(dentry, inode);
6486 }
6487
6488out_unlock:
6489 btrfs_end_transaction(trans, root);
6490 btrfs_balance_delayed_items(root);
6491 btrfs_btree_balance_dirty(root);
6492 if (drop_inode) {
6493 inode_dec_link_count(inode);
6494 iput(inode);
6495 }
6496 return err;
6497
6498out_unlock_inode:
6499 drop_inode = 1;
6500 unlock_new_inode(inode);
6501 goto out_unlock;
6502
6503}
6504
6505static int btrfs_create(struct inode *dir, struct dentry *dentry,
6506 umode_t mode, bool excl)
6507{
6508 struct btrfs_trans_handle *trans;
6509 struct btrfs_root *root = BTRFS_I(dir)->root;
6510 struct inode *inode = NULL;
6511 int drop_inode_on_err = 0;
6512 int err;
6513 u64 objectid;
6514 u64 index = 0;
6515
6516
6517
6518
6519
6520
6521 trans = btrfs_start_transaction(root, 5);
6522 if (IS_ERR(trans))
6523 return PTR_ERR(trans);
6524
6525 err = btrfs_find_free_ino(root, &objectid);
6526 if (err)
6527 goto out_unlock;
6528
6529 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6530 dentry->d_name.len, btrfs_ino(dir), objectid,
6531 mode, &index);
6532 if (IS_ERR(inode)) {
6533 err = PTR_ERR(inode);
6534 goto out_unlock;
6535 }
6536 drop_inode_on_err = 1;
6537
6538
6539
6540
6541
6542
6543 inode->i_fop = &btrfs_file_operations.kabi_fops;
6544 inode->i_op = &btrfs_file_inode_operations;
6545 inode->i_mapping->a_ops = &btrfs_aops;
6546 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
6547
6548 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6549 if (err)
6550 goto out_unlock_inode;
6551
6552 err = btrfs_update_inode(trans, root, inode);
6553 if (err)
6554 goto out_unlock_inode;
6555
6556 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6557 if (err)
6558 goto out_unlock_inode;
6559
6560 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6561 unlock_new_inode(inode);
6562 d_instantiate(dentry, inode);
6563
6564out_unlock:
6565 btrfs_end_transaction(trans, root);
6566 if (err && drop_inode_on_err) {
6567 inode_dec_link_count(inode);
6568 iput(inode);
6569 }
6570 btrfs_balance_delayed_items(root);
6571 btrfs_btree_balance_dirty(root);
6572 return err;
6573
6574out_unlock_inode:
6575 unlock_new_inode(inode);
6576 goto out_unlock;
6577
6578}
6579
6580static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6581 struct dentry *dentry)
6582{
6583 struct btrfs_trans_handle *trans = NULL;
6584 struct btrfs_root *root = BTRFS_I(dir)->root;
6585 struct inode *inode = old_dentry->d_inode;
6586 u64 index;
6587 int err;
6588 int drop_inode = 0;
6589
6590
6591 if (root->objectid != BTRFS_I(inode)->root->objectid)
6592 return -EXDEV;
6593
6594 if (inode->i_nlink >= BTRFS_LINK_MAX)
6595 return -EMLINK;
6596
6597 err = btrfs_set_inode_index(dir, &index);
6598 if (err)
6599 goto fail;
6600
6601
6602
6603
6604
6605
6606 trans = btrfs_start_transaction(root, 5);
6607 if (IS_ERR(trans)) {
6608 err = PTR_ERR(trans);
6609 trans = NULL;
6610 goto fail;
6611 }
6612
6613
6614 BTRFS_I(inode)->dir_index = 0ULL;
6615 inc_nlink(inode);
6616 inode_inc_iversion(inode);
6617 inode->i_ctime = current_fs_time(inode->i_sb);
6618 ihold(inode);
6619 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6620
6621 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
6622
6623 if (err) {
6624 drop_inode = 1;
6625 } else {
6626 struct dentry *parent = dentry->d_parent;
6627 err = btrfs_update_inode(trans, root, inode);
6628 if (err)
6629 goto fail;
6630 d_instantiate(dentry, inode);
6631 btrfs_log_new_name(trans, inode, NULL, parent);
6632 }
6633
6634 btrfs_balance_delayed_items(root);
6635fail:
6636 if (trans)
6637 btrfs_end_transaction(trans, root);
6638 if (drop_inode) {
6639 inode_dec_link_count(inode);
6640 iput(inode);
6641 }
6642 btrfs_btree_balance_dirty(root);
6643 return err;
6644}
6645
6646static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6647{
6648 struct inode *inode = NULL;
6649 struct btrfs_trans_handle *trans;
6650 struct btrfs_root *root = BTRFS_I(dir)->root;
6651 int err = 0;
6652 int drop_on_err = 0;
6653 u64 objectid = 0;
6654 u64 index = 0;
6655
6656
6657
6658
6659
6660
6661 trans = btrfs_start_transaction(root, 5);
6662 if (IS_ERR(trans))
6663 return PTR_ERR(trans);
6664
6665 err = btrfs_find_free_ino(root, &objectid);
6666 if (err)
6667 goto out_fail;
6668
6669 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6670 dentry->d_name.len, btrfs_ino(dir), objectid,
6671 S_IFDIR | mode, &index);
6672 if (IS_ERR(inode)) {
6673 err = PTR_ERR(inode);
6674 goto out_fail;
6675 }
6676
6677 drop_on_err = 1;
6678
6679 inode->i_op = &btrfs_dir_inode_operations.ops;
6680 inode->i_flags |= S_IOPS_WRAPPER;
6681 inode->i_fop = &btrfs_dir_file_operations;
6682
6683 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6684 if (err)
6685 goto out_fail_inode;
6686
6687 btrfs_i_size_write(inode, 0);
6688 err = btrfs_update_inode(trans, root, inode);
6689 if (err)
6690 goto out_fail_inode;
6691
6692 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
6693 dentry->d_name.len, 0, index);
6694 if (err)
6695 goto out_fail_inode;
6696
6697 d_instantiate(dentry, inode);
6698
6699
6700
6701
6702 unlock_new_inode(inode);
6703 drop_on_err = 0;
6704
6705out_fail:
6706 btrfs_end_transaction(trans, root);
6707 if (drop_on_err) {
6708 inode_dec_link_count(inode);
6709 iput(inode);
6710 }
6711 btrfs_balance_delayed_items(root);
6712 btrfs_btree_balance_dirty(root);
6713 return err;
6714
6715out_fail_inode:
6716 unlock_new_inode(inode);
6717 goto out_fail;
6718}
6719
6720
6721static struct extent_map *next_extent_map(struct extent_map *em)
6722{
6723 struct rb_node *next;
6724
6725 next = rb_next(&em->rb_node);
6726 if (!next)
6727 return NULL;
6728 return container_of(next, struct extent_map, rb_node);
6729}
6730
6731static struct extent_map *prev_extent_map(struct extent_map *em)
6732{
6733 struct rb_node *prev;
6734
6735 prev = rb_prev(&em->rb_node);
6736 if (!prev)
6737 return NULL;
6738 return container_of(prev, struct extent_map, rb_node);
6739}
6740
6741
6742
6743
6744
6745
6746static int merge_extent_mapping(struct extent_map_tree *em_tree,
6747 struct extent_map *existing,
6748 struct extent_map *em,
6749 u64 map_start)
6750{
6751 struct extent_map *prev;
6752 struct extent_map *next;
6753 u64 start;
6754 u64 end;
6755 u64 start_diff;
6756
6757 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6758
6759 if (existing->start > map_start) {
6760 next = existing;
6761 prev = prev_extent_map(next);
6762 } else {
6763 prev = existing;
6764 next = next_extent_map(prev);
6765 }
6766
6767 start = prev ? extent_map_end(prev) : em->start;
6768 start = max_t(u64, start, em->start);
6769 end = next ? next->start : extent_map_end(em);
6770 end = min_t(u64, end, extent_map_end(em));
6771 start_diff = start - em->start;
6772 em->start = start;
6773 em->len = end - start;
6774 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6775 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6776 em->block_start += start_diff;
6777 em->block_len -= start_diff;
6778 }
6779 return add_extent_mapping(em_tree, em, 0);
6780}
6781
6782static noinline int uncompress_inline(struct btrfs_path *path,
6783 struct page *page,
6784 size_t pg_offset, u64 extent_offset,
6785 struct btrfs_file_extent_item *item)
6786{
6787 int ret;
6788 struct extent_buffer *leaf = path->nodes[0];
6789 char *tmp;
6790 size_t max_size;
6791 unsigned long inline_size;
6792 unsigned long ptr;
6793 int compress_type;
6794
6795 WARN_ON(pg_offset != 0);
6796 compress_type = btrfs_file_extent_compression(leaf, item);
6797 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6798 inline_size = btrfs_file_extent_inline_item_len(leaf,
6799 btrfs_item_nr(path->slots[0]));
6800 tmp = kmalloc(inline_size, GFP_NOFS);
6801 if (!tmp)
6802 return -ENOMEM;
6803 ptr = btrfs_file_extent_inline_start(item);
6804
6805 read_extent_buffer(leaf, tmp, ptr, inline_size);
6806
6807 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
6808 ret = btrfs_decompress(compress_type, tmp, page,
6809 extent_offset, inline_size, max_size);
6810 kfree(tmp);
6811 return ret;
6812}
6813
6814
6815
6816
6817
6818
6819
6820
6821
6822
6823struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
6824 size_t pg_offset, u64 start, u64 len,
6825 int create)
6826{
6827 int ret;
6828 int err = 0;
6829 u64 extent_start = 0;
6830 u64 extent_end = 0;
6831 u64 objectid = btrfs_ino(inode);
6832 u32 found_type;
6833 struct btrfs_path *path = NULL;
6834 struct btrfs_root *root = BTRFS_I(inode)->root;
6835 struct btrfs_file_extent_item *item;
6836 struct extent_buffer *leaf;
6837 struct btrfs_key found_key;
6838 struct extent_map *em = NULL;
6839 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
6840 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6841 struct btrfs_trans_handle *trans = NULL;
6842 const bool new_inline = !page || create;
6843
6844again:
6845 read_lock(&em_tree->lock);
6846 em = lookup_extent_mapping(em_tree, start, len);
6847 if (em)
6848 em->bdev = root->fs_info->fs_devices->latest_bdev;
6849 read_unlock(&em_tree->lock);
6850
6851 if (em) {
6852 if (em->start > start || em->start + em->len <= start)
6853 free_extent_map(em);
6854 else if (em->block_start == EXTENT_MAP_INLINE && page)
6855 free_extent_map(em);
6856 else
6857 goto out;
6858 }
6859 em = alloc_extent_map();
6860 if (!em) {
6861 err = -ENOMEM;
6862 goto out;
6863 }
6864 em->bdev = root->fs_info->fs_devices->latest_bdev;
6865 em->start = EXTENT_MAP_HOLE;
6866 em->orig_start = EXTENT_MAP_HOLE;
6867 em->len = (u64)-1;
6868 em->block_len = (u64)-1;
6869
6870 if (!path) {
6871 path = btrfs_alloc_path();
6872 if (!path) {
6873 err = -ENOMEM;
6874 goto out;
6875 }
6876
6877
6878
6879
6880 path->reada = READA_FORWARD;
6881 }
6882
6883 ret = btrfs_lookup_file_extent(trans, root, path,
6884 objectid, start, trans != NULL);
6885 if (ret < 0) {
6886 err = ret;
6887 goto out;
6888 }
6889
6890 if (ret != 0) {
6891 if (path->slots[0] == 0)
6892 goto not_found;
6893 path->slots[0]--;
6894 }
6895
6896 leaf = path->nodes[0];
6897 item = btrfs_item_ptr(leaf, path->slots[0],
6898 struct btrfs_file_extent_item);
6899
6900 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6901 found_type = found_key.type;
6902 if (found_key.objectid != objectid ||
6903 found_type != BTRFS_EXTENT_DATA_KEY) {
6904
6905
6906
6907
6908
6909
6910 extent_end = start;
6911 goto next;
6912 }
6913
6914 found_type = btrfs_file_extent_type(leaf, item);
6915 extent_start = found_key.offset;
6916 if (found_type == BTRFS_FILE_EXTENT_REG ||
6917 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6918 extent_end = extent_start +
6919 btrfs_file_extent_num_bytes(leaf, item);
6920 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6921 size_t size;
6922 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6923 extent_end = ALIGN(extent_start + size, root->sectorsize);
6924 }
6925next:
6926 if (start >= extent_end) {
6927 path->slots[0]++;
6928 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6929 ret = btrfs_next_leaf(root, path);
6930 if (ret < 0) {
6931 err = ret;
6932 goto out;
6933 }
6934 if (ret > 0)
6935 goto not_found;
6936 leaf = path->nodes[0];
6937 }
6938 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6939 if (found_key.objectid != objectid ||
6940 found_key.type != BTRFS_EXTENT_DATA_KEY)
6941 goto not_found;
6942 if (start + len <= found_key.offset)
6943 goto not_found;
6944 if (start > found_key.offset)
6945 goto next;
6946 em->start = start;
6947 em->orig_start = start;
6948 em->len = found_key.offset - start;
6949 goto not_found_em;
6950 }
6951
6952 btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
6953
6954 if (found_type == BTRFS_FILE_EXTENT_REG ||
6955 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6956 goto insert;
6957 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6958 unsigned long ptr;
6959 char *map;
6960 size_t size;
6961 size_t extent_offset;
6962 size_t copy_size;
6963
6964 if (new_inline)
6965 goto out;
6966
6967 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6968 extent_offset = page_offset(page) + pg_offset - extent_start;
6969 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
6970 size - extent_offset);
6971 em->start = extent_start + extent_offset;
6972 em->len = ALIGN(copy_size, root->sectorsize);
6973 em->orig_block_len = em->len;
6974 em->orig_start = em->start;
6975 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6976 if (create == 0 && !PageUptodate(page)) {
6977 if (btrfs_file_extent_compression(leaf, item) !=
6978 BTRFS_COMPRESS_NONE) {
6979 ret = uncompress_inline(path, page, pg_offset,
6980 extent_offset, item);
6981 if (ret) {
6982 err = ret;
6983 goto out;
6984 }
6985 } else {
6986 map = kmap(page);
6987 read_extent_buffer(leaf, map + pg_offset, ptr,
6988 copy_size);
6989 if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
6990 memset(map + pg_offset + copy_size, 0,
6991 PAGE_CACHE_SIZE - pg_offset -
6992 copy_size);
6993 }
6994 kunmap(page);
6995 }
6996 flush_dcache_page(page);
6997 } else if (create && PageUptodate(page)) {
6998 BUG();
6999 if (!trans) {
7000 kunmap(page);
7001 free_extent_map(em);
7002 em = NULL;
7003
7004 btrfs_release_path(path);
7005 trans = btrfs_join_transaction(root);
7006
7007 if (IS_ERR(trans))
7008 return ERR_CAST(trans);
7009 goto again;
7010 }
7011 map = kmap(page);
7012 write_extent_buffer(leaf, map + pg_offset, ptr,
7013 copy_size);
7014 kunmap(page);
7015 btrfs_mark_buffer_dirty(leaf);
7016 }
7017 set_extent_uptodate(io_tree, em->start,
7018 extent_map_end(em) - 1, NULL, GFP_NOFS);
7019 goto insert;
7020 }
7021not_found:
7022 em->start = start;
7023 em->orig_start = start;
7024 em->len = len;
7025not_found_em:
7026 em->block_start = EXTENT_MAP_HOLE;
7027 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
7028insert:
7029 btrfs_release_path(path);
7030 if (em->start > start || extent_map_end(em) <= start) {
7031 btrfs_err(root->fs_info,
7032 "bad extent! em: [%llu %llu] passed [%llu %llu]",
7033 em->start, em->len, start, len);
7034 err = -EIO;
7035 goto out;
7036 }
7037
7038 err = 0;
7039 write_lock(&em_tree->lock);
7040 ret = add_extent_mapping(em_tree, em, 0);
7041
7042
7043
7044
7045 if (ret == -EEXIST) {
7046 struct extent_map *existing;
7047
7048 ret = 0;
7049
7050 existing = search_extent_mapping(em_tree, start, len);
7051
7052
7053
7054
7055 if (existing->start == em->start &&
7056 extent_map_end(existing) == extent_map_end(em) &&
7057 em->block_start == existing->block_start) {
7058
7059
7060
7061
7062 free_extent_map(em);
7063 em = existing;
7064 err = 0;
7065
7066 } else if (start >= extent_map_end(existing) ||
7067 start <= existing->start) {
7068
7069
7070
7071
7072 err = merge_extent_mapping(em_tree, existing,
7073 em, start);
7074 free_extent_map(existing);
7075 if (err) {
7076 free_extent_map(em);
7077 em = NULL;
7078 }
7079 } else {
7080 free_extent_map(em);
7081 em = existing;
7082 err = 0;
7083 }
7084 }
7085 write_unlock(&em_tree->lock);
7086out:
7087
7088 trace_btrfs_get_extent(root, em);
7089
7090 btrfs_free_path(path);
7091 if (trans) {
7092 ret = btrfs_end_transaction(trans, root);
7093 if (!err)
7094 err = ret;
7095 }
7096 if (err) {
7097 free_extent_map(em);
7098 return ERR_PTR(err);
7099 }
7100 BUG_ON(!em);
7101 return em;
7102}
7103
7104struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
7105 size_t pg_offset, u64 start, u64 len,
7106 int create)
7107{
7108 struct extent_map *em;
7109 struct extent_map *hole_em = NULL;
7110 u64 range_start = start;
7111 u64 end;
7112 u64 found;
7113 u64 found_end;
7114 int err = 0;
7115
7116 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
7117 if (IS_ERR(em))
7118 return em;
7119 if (em) {
7120
7121
7122
7123
7124
7125
7126 if (em->block_start != EXTENT_MAP_HOLE &&
7127 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7128 return em;
7129 else
7130 hole_em = em;
7131 }
7132
7133
7134 end = start + len;
7135 if (end < start)
7136 end = (u64)-1;
7137 else
7138 end -= 1;
7139
7140 em = NULL;
7141
7142
7143 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
7144 end, len, EXTENT_DELALLOC, 1);
7145 found_end = range_start + found;
7146 if (found_end < range_start)
7147 found_end = (u64)-1;
7148
7149
7150
7151
7152
7153 if (range_start > end || found_end <= start) {
7154 em = hole_em;
7155 hole_em = NULL;
7156 goto out;
7157 }
7158
7159
7160
7161
7162 range_start = max(start, range_start);
7163 found = found_end - range_start;
7164
7165 if (found > 0) {
7166 u64 hole_start = start;
7167 u64 hole_len = len;
7168
7169 em = alloc_extent_map();
7170 if (!em) {
7171 err = -ENOMEM;
7172 goto out;
7173 }
7174
7175
7176
7177
7178
7179
7180
7181
7182 if (hole_em) {
7183 u64 calc_end = extent_map_end(hole_em);
7184
7185 if (calc_end <= start || (hole_em->start > end)) {
7186 free_extent_map(hole_em);
7187 hole_em = NULL;
7188 } else {
7189 hole_start = max(hole_em->start, start);
7190 hole_len = calc_end - hole_start;
7191 }
7192 }
7193 em->bdev = NULL;
7194 if (hole_em && range_start > hole_start) {
7195
7196
7197
7198
7199 em->len = min(hole_len,
7200 range_start - hole_start);
7201 em->start = hole_start;
7202 em->orig_start = hole_start;
7203
7204
7205
7206
7207 em->block_start = hole_em->block_start;
7208 em->block_len = hole_len;
7209 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7210 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7211 } else {
7212 em->start = range_start;
7213 em->len = found;
7214 em->orig_start = range_start;
7215 em->block_start = EXTENT_MAP_DELALLOC;
7216 em->block_len = found;
7217 }
7218 } else if (hole_em) {
7219 return hole_em;
7220 }
7221out:
7222
7223 free_extent_map(hole_em);
7224 if (err) {
7225 free_extent_map(em);
7226 return ERR_PTR(err);
7227 }
7228 return em;
7229}
7230
7231static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
7232 const u64 start,
7233 const u64 len,
7234 const u64 orig_start,
7235 const u64 block_start,
7236 const u64 block_len,
7237 const u64 orig_block_len,
7238 const u64 ram_bytes,
7239 const int type)
7240{
7241 struct extent_map *em = NULL;
7242 int ret;
7243
7244 if (type != BTRFS_ORDERED_NOCOW) {
7245 em = create_pinned_em(inode, start, len, orig_start,
7246 block_start, block_len, orig_block_len,
7247 ram_bytes, type);
7248 if (IS_ERR(em))
7249 goto out;
7250 }
7251 ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
7252 len, block_len, type);
7253 if (ret) {
7254 if (em) {
7255 free_extent_map(em);
7256 btrfs_drop_extent_cache(inode, start,
7257 start + len - 1, 0);
7258 }
7259 em = ERR_PTR(ret);
7260 }
7261 out:
7262
7263 return em;
7264}
7265
7266static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7267 u64 start, u64 len)
7268{
7269 struct btrfs_root *root = BTRFS_I(inode)->root;
7270 struct extent_map *em;
7271 struct btrfs_key ins;
7272 u64 alloc_hint;
7273 int ret;
7274
7275 alloc_hint = get_extent_allocation_hint(inode, start, len);
7276 ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
7277 alloc_hint, &ins, 1, 1);
7278 if (ret)
7279 return ERR_PTR(ret);
7280
7281 em = btrfs_create_dio_extent(inode, start, ins.offset, start,
7282 ins.objectid, ins.offset, ins.offset,
7283 ins.offset, 0);
7284 btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
7285 if (IS_ERR(em))
7286 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7287
7288 return em;
7289}
7290
7291
7292
7293
7294
7295noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7296 u64 *orig_start, u64 *orig_block_len,
7297 u64 *ram_bytes)
7298{
7299 struct btrfs_trans_handle *trans;
7300 struct btrfs_path *path;
7301 int ret;
7302 struct extent_buffer *leaf;
7303 struct btrfs_root *root = BTRFS_I(inode)->root;
7304 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7305 struct btrfs_file_extent_item *fi;
7306 struct btrfs_key key;
7307 u64 disk_bytenr;
7308 u64 backref_offset;
7309 u64 extent_end;
7310 u64 num_bytes;
7311 int slot;
7312 int found_type;
7313 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7314
7315 path = btrfs_alloc_path();
7316 if (!path)
7317 return -ENOMEM;
7318
7319 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
7320 offset, 0);
7321 if (ret < 0)
7322 goto out;
7323
7324 slot = path->slots[0];
7325 if (ret == 1) {
7326 if (slot == 0) {
7327
7328 ret = 0;
7329 goto out;
7330 }
7331 slot--;
7332 }
7333 ret = 0;
7334 leaf = path->nodes[0];
7335 btrfs_item_key_to_cpu(leaf, &key, slot);
7336 if (key.objectid != btrfs_ino(inode) ||
7337 key.type != BTRFS_EXTENT_DATA_KEY) {
7338
7339 goto out;
7340 }
7341
7342 if (key.offset > offset) {
7343
7344 goto out;
7345 }
7346
7347 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7348 found_type = btrfs_file_extent_type(leaf, fi);
7349 if (found_type != BTRFS_FILE_EXTENT_REG &&
7350 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7351
7352 goto out;
7353 }
7354
7355 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7356 goto out;
7357
7358 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7359 if (extent_end <= offset)
7360 goto out;
7361
7362 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7363 if (disk_bytenr == 0)
7364 goto out;
7365
7366 if (btrfs_file_extent_compression(leaf, fi) ||
7367 btrfs_file_extent_encryption(leaf, fi) ||
7368 btrfs_file_extent_other_encoding(leaf, fi))
7369 goto out;
7370
7371 backref_offset = btrfs_file_extent_offset(leaf, fi);
7372
7373 if (orig_start) {
7374 *orig_start = key.offset - backref_offset;
7375 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7376 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7377 }
7378
7379 if (btrfs_extent_readonly(root, disk_bytenr))
7380 goto out;
7381
7382 num_bytes = min(offset + *len, extent_end) - offset;
7383 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7384 u64 range_end;
7385
7386 range_end = round_up(offset + num_bytes, root->sectorsize) - 1;
7387 ret = test_range_bit(io_tree, offset, range_end,
7388 EXTENT_DELALLOC, 0, NULL);
7389 if (ret) {
7390 ret = -EAGAIN;
7391 goto out;
7392 }
7393 }
7394
7395 btrfs_release_path(path);
7396
7397
7398
7399
7400
7401 trans = btrfs_join_transaction(root);
7402 if (IS_ERR(trans)) {
7403 ret = 0;
7404 goto out;
7405 }
7406
7407 ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
7408 key.offset - backref_offset, disk_bytenr);
7409 btrfs_end_transaction(trans, root);
7410 if (ret) {
7411 ret = 0;
7412 goto out;
7413 }
7414
7415
7416
7417
7418
7419
7420
7421 disk_bytenr += backref_offset;
7422 disk_bytenr += offset - key.offset;
7423 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
7424 goto out;
7425
7426
7427
7428
7429 *len = num_bytes;
7430 ret = 1;
7431out:
7432 btrfs_free_path(path);
7433 return ret;
7434}
7435
7436bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
7437{
7438 struct radix_tree_root *root = &inode->i_mapping->page_tree;
7439 int found = false;
7440 void **pagep = NULL;
7441 struct page *page = NULL;
7442 int start_idx;
7443 int end_idx;
7444
7445 start_idx = start >> PAGE_CACHE_SHIFT;
7446
7447
7448
7449
7450 end_idx = end >> PAGE_CACHE_SHIFT;
7451
7452 rcu_read_lock();
7453
7454
7455
7456
7457
7458
7459
7460 while (page == NULL &&
7461 radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
7462 page = radix_tree_deref_slot(pagep);
7463 if (unlikely(!page))
7464 break;
7465
7466 if (radix_tree_exception(page)) {
7467 if (radix_tree_deref_retry(page)) {
7468 page = NULL;
7469 continue;
7470 }
7471
7472
7473
7474
7475
7476 page = NULL;
7477 break;
7478 }
7479
7480 if (!page_cache_get_speculative(page)) {
7481 page = NULL;
7482 continue;
7483 }
7484
7485
7486
7487
7488
7489
7490 if (unlikely(page != *pagep)) {
7491 page_cache_release(page);
7492 page = NULL;
7493 }
7494 }
7495
7496 if (page) {
7497 if (page->index <= end_idx)
7498 found = true;
7499 page_cache_release(page);
7500 }
7501
7502 rcu_read_unlock();
7503 return found;
7504}
7505
7506static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7507 struct extent_state **cached_state, int writing)
7508{
7509 struct btrfs_ordered_extent *ordered;
7510 int ret = 0;
7511
7512 while (1) {
7513 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7514 cached_state);
7515
7516
7517
7518
7519
7520 ordered = btrfs_lookup_ordered_range(inode, lockstart,
7521 lockend - lockstart + 1);
7522
7523
7524
7525
7526
7527
7528
7529
7530 if (!ordered &&
7531 (!writing ||
7532 !btrfs_page_exists_in_range(inode, lockstart, lockend)))
7533 break;
7534
7535 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7536 cached_state, GFP_NOFS);
7537
7538 if (ordered) {
7539
7540
7541
7542
7543
7544
7545
7546
7547
7548
7549
7550
7551
7552
7553
7554 if (writing ||
7555 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7556 btrfs_start_ordered_extent(inode, ordered, 1);
7557 else
7558 ret = -ENOTBLK;
7559 btrfs_put_ordered_extent(ordered);
7560 } else {
7561
7562
7563
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574 ret = -ENOTBLK;
7575 }
7576
7577 if (ret)
7578 break;
7579
7580 cond_resched();
7581 }
7582
7583 return ret;
7584}
7585
7586static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
7587 u64 len, u64 orig_start,
7588 u64 block_start, u64 block_len,
7589 u64 orig_block_len, u64 ram_bytes,
7590 int type)
7591{
7592 struct extent_map_tree *em_tree;
7593 struct extent_map *em;
7594 struct btrfs_root *root = BTRFS_I(inode)->root;
7595 int ret;
7596
7597 em_tree = &BTRFS_I(inode)->extent_tree;
7598 em = alloc_extent_map();
7599 if (!em)
7600 return ERR_PTR(-ENOMEM);
7601
7602 em->start = start;
7603 em->orig_start = orig_start;
7604 em->mod_start = start;
7605 em->mod_len = len;
7606 em->len = len;
7607 em->block_len = block_len;
7608 em->block_start = block_start;
7609 em->bdev = root->fs_info->fs_devices->latest_bdev;
7610 em->orig_block_len = orig_block_len;
7611 em->ram_bytes = ram_bytes;
7612 em->generation = -1;
7613 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7614 if (type == BTRFS_ORDERED_PREALLOC)
7615 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7616
7617 do {
7618 btrfs_drop_extent_cache(inode, em->start,
7619 em->start + em->len - 1, 0);
7620 write_lock(&em_tree->lock);
7621 ret = add_extent_mapping(em_tree, em, 1);
7622 write_unlock(&em_tree->lock);
7623 } while (ret == -EEXIST);
7624
7625 if (ret) {
7626 free_extent_map(em);
7627 return ERR_PTR(ret);
7628 }
7629
7630 return em;
7631}
7632
7633struct btrfs_dio_data {
7634 u64 outstanding_extents;
7635 u64 reserve;
7636};
7637
7638static void adjust_dio_outstanding_extents(struct inode *inode,
7639 struct btrfs_dio_data *dio_data,
7640 const u64 len)
7641{
7642 unsigned num_extents;
7643
7644 num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
7645 BTRFS_MAX_EXTENT_SIZE);
7646
7647
7648
7649
7650
7651 if (dio_data->outstanding_extents >= num_extents) {
7652 dio_data->outstanding_extents -= num_extents;
7653 } else {
7654
7655
7656
7657
7658
7659 u64 num_needed = num_extents - dio_data->outstanding_extents;
7660
7661 spin_lock(&BTRFS_I(inode)->lock);
7662 BTRFS_I(inode)->outstanding_extents += num_needed;
7663 spin_unlock(&BTRFS_I(inode)->lock);
7664 }
7665}
7666
7667static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7668 struct buffer_head *bh_result, int create)
7669{
7670 struct extent_map *em;
7671 struct btrfs_root *root = BTRFS_I(inode)->root;
7672 struct extent_state *cached_state = NULL;
7673 struct btrfs_dio_data *dio_data = NULL;
7674 u64 start = iblock << inode->i_blkbits;
7675 u64 lockstart, lockend;
7676 u64 len = bh_result->b_size;
7677 int unlock_bits = EXTENT_LOCKED;
7678 int ret = 0;
7679
7680 if (create)
7681 unlock_bits |= EXTENT_DIRTY;
7682 else
7683 len = min_t(u64, len, root->sectorsize);
7684
7685 lockstart = start;
7686 lockend = start + len - 1;
7687
7688 if (current->journal_info) {
7689
7690
7691
7692
7693
7694 dio_data = current->journal_info;
7695 current->journal_info = NULL;
7696 }
7697
7698
7699
7700
7701
7702 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7703 create)) {
7704 ret = -ENOTBLK;
7705 goto err;
7706 }
7707
7708 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7709 if (IS_ERR(em)) {
7710 ret = PTR_ERR(em);
7711 goto unlock_err;
7712 }
7713
7714
7715
7716
7717
7718
7719
7720
7721
7722
7723
7724
7725
7726
7727
7728 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7729 em->block_start == EXTENT_MAP_INLINE) {
7730 free_extent_map(em);
7731 ret = -ENOTBLK;
7732 goto unlock_err;
7733 }
7734
7735
7736 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
7737 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
7738 free_extent_map(em);
7739 goto unlock_err;
7740 }
7741
7742
7743
7744
7745
7746
7747
7748
7749
7750
7751 if (!create) {
7752 len = min(len, em->len - (start - em->start));
7753 lockstart = start + len;
7754 goto unlock;
7755 }
7756
7757 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7758 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7759 em->block_start != EXTENT_MAP_HOLE)) {
7760 int type;
7761 u64 block_start, orig_start, orig_block_len, ram_bytes;
7762
7763 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7764 type = BTRFS_ORDERED_PREALLOC;
7765 else
7766 type = BTRFS_ORDERED_NOCOW;
7767 len = min(len, em->len - (start - em->start));
7768 block_start = em->block_start + (start - em->start);
7769
7770 if (can_nocow_extent(inode, start, &len, &orig_start,
7771 &orig_block_len, &ram_bytes) == 1 &&
7772 btrfs_inc_nocow_writers(root->fs_info, block_start)) {
7773 struct extent_map *em2;
7774
7775 em2 = btrfs_create_dio_extent(inode, start, len,
7776 orig_start, block_start,
7777 len, orig_block_len,
7778 ram_bytes, type);
7779 btrfs_dec_nocow_writers(root->fs_info, block_start);
7780 if (type == BTRFS_ORDERED_PREALLOC) {
7781 free_extent_map(em);
7782 em = em2;
7783 }
7784 if (em2 && IS_ERR(em2)) {
7785 ret = PTR_ERR(em2);
7786 goto unlock_err;
7787 }
7788
7789
7790
7791
7792
7793 btrfs_free_reserved_data_space_noquota(inode,
7794 start, len);
7795 goto unlock;
7796 }
7797 }
7798
7799
7800
7801
7802
7803 len = bh_result->b_size;
7804 free_extent_map(em);
7805 em = btrfs_new_extent_direct(inode, start, len);
7806 if (IS_ERR(em)) {
7807 ret = PTR_ERR(em);
7808 goto unlock_err;
7809 }
7810 len = min(len, em->len - (start - em->start));
7811unlock:
7812 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7813 inode->i_blkbits;
7814 bh_result->b_size = len;
7815 bh_result->b_bdev = em->bdev;
7816 set_buffer_mapped(bh_result);
7817 if (create) {
7818 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7819 set_buffer_new(bh_result);
7820
7821
7822
7823
7824
7825 if (start + len > i_size_read(inode))
7826 i_size_write(inode, start + len);
7827
7828 adjust_dio_outstanding_extents(inode, dio_data, len);
7829 WARN_ON(dio_data->reserve < len);
7830 dio_data->reserve -= len;
7831 current->journal_info = dio_data;
7832 }
7833
7834
7835
7836
7837
7838
7839 if (lockstart < lockend) {
7840 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7841 lockend, unlock_bits, 1, 0,
7842 &cached_state, GFP_NOFS);
7843 } else {
7844 free_extent_state(cached_state);
7845 }
7846
7847 free_extent_map(em);
7848
7849 return 0;
7850
7851unlock_err:
7852 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7853 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7854err:
7855 if (dio_data)
7856 current->journal_info = dio_data;
7857
7858
7859
7860
7861
7862 if (create && dio_data)
7863 adjust_dio_outstanding_extents(inode, dio_data, len);
7864
7865 return ret;
7866}
7867
7868static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7869 int rw, int mirror_num)
7870{
7871 struct btrfs_root *root = BTRFS_I(inode)->root;
7872 int ret;
7873
7874 BUG_ON(rw & REQ_WRITE);
7875
7876 bio_get(bio);
7877
7878 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
7879 BTRFS_WQ_ENDIO_DIO_REPAIR);
7880 if (ret)
7881 goto err;
7882
7883 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
7884err:
7885 bio_put(bio);
7886 return ret;
7887}
7888
7889static int btrfs_check_dio_repairable(struct inode *inode,
7890 struct bio *failed_bio,
7891 struct io_failure_record *failrec,
7892 int failed_mirror)
7893{
7894 int num_copies;
7895
7896 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
7897 failrec->logical, failrec->len);
7898 if (num_copies == 1) {
7899
7900
7901
7902
7903
7904 pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
7905 num_copies, failrec->this_mirror, failed_mirror);
7906 return 0;
7907 }
7908
7909 failrec->failed_mirror = failed_mirror;
7910 failrec->this_mirror++;
7911 if (failrec->this_mirror == failed_mirror)
7912 failrec->this_mirror++;
7913
7914 if (failrec->this_mirror > num_copies) {
7915 pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
7916 num_copies, failrec->this_mirror, failed_mirror);
7917 return 0;
7918 }
7919
7920 return 1;
7921}
7922
7923static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7924 struct page *page, unsigned int pgoff,
7925 u64 start, u64 end, int failed_mirror,
7926 bio_end_io_t *repair_endio, void *repair_arg)
7927{
7928 struct io_failure_record *failrec;
7929 struct bio *bio;
7930 int isector;
7931 int read_mode;
7932 int ret;
7933
7934 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
7935
7936 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7937 if (ret)
7938 return ret;
7939
7940 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7941 failed_mirror);
7942 if (!ret) {
7943 free_io_failure(inode, failrec);
7944 return -EIO;
7945 }
7946
7947 if ((failed_bio->bi_vcnt > 1)
7948 || (failed_bio->bi_io_vec->bv_len
7949 > BTRFS_I(inode)->root->sectorsize))
7950 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
7951 else
7952 read_mode = READ_SYNC;
7953
7954 isector = start - btrfs_io_bio(failed_bio)->logical;
7955 isector >>= inode->i_sb->s_blocksize_bits;
7956 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7957 pgoff, isector, repair_endio, repair_arg);
7958 if (!bio) {
7959 free_io_failure(inode, failrec);
7960 return -EIO;
7961 }
7962
7963 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7964 "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
7965 read_mode, failrec->this_mirror, failrec->in_validation);
7966
7967 ret = submit_dio_repair_bio(inode, bio, read_mode,
7968 failrec->this_mirror);
7969 if (ret) {
7970 free_io_failure(inode, failrec);
7971 bio_put(bio);
7972 }
7973
7974 return ret;
7975}
7976
7977struct btrfs_retry_complete {
7978 struct completion done;
7979 struct inode *inode;
7980 u64 start;
7981 int uptodate;
7982};
7983
7984static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
7985{
7986 struct btrfs_retry_complete *done = bio->bi_private;
7987 struct inode *inode;
7988 struct bio_vec *bvec;
7989 int i;
7990
7991 if (err)
7992 goto end;
7993
7994 ASSERT(bio->bi_vcnt == 1);
7995 inode = bio->bi_io_vec->bv_page->mapping->host;
7996 ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
7997
7998 done->uptodate = 1;
7999 bio_for_each_segment_all(bvec, bio, i)
8000 clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
8001end:
8002 complete(&done->done);
8003 bio_put(bio);
8004}
8005
8006static int __btrfs_correct_data_nocsum(struct inode *inode,
8007 struct btrfs_io_bio *io_bio)
8008{
8009 struct btrfs_fs_info *fs_info;
8010 struct bio_vec *bvec;
8011 struct btrfs_retry_complete done;
8012 u64 start;
8013 unsigned int pgoff;
8014 u32 sectorsize;
8015 int nr_sectors;
8016 int i;
8017 int ret;
8018
8019 fs_info = BTRFS_I(inode)->root->fs_info;
8020 sectorsize = BTRFS_I(inode)->root->sectorsize;
8021
8022 start = io_bio->logical;
8023 done.inode = inode;
8024
8025 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
8026 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
8027 pgoff = bvec->bv_offset;
8028
8029next_block_or_try_again:
8030 done.uptodate = 0;
8031 done.start = start;
8032 init_completion(&done.done);
8033
8034 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
8035 pgoff, start, start + sectorsize - 1,
8036 io_bio->mirror_num,
8037 btrfs_retry_endio_nocsum, &done);
8038 if (ret)
8039 return ret;
8040
8041 wait_for_completion(&done.done);
8042
8043 if (!done.uptodate) {
8044
8045 goto next_block_or_try_again;
8046 }
8047
8048 start += sectorsize;
8049
8050 if (nr_sectors--) {
8051 pgoff += sectorsize;
8052 goto next_block_or_try_again;
8053 }
8054 }
8055
8056 return 0;
8057}
8058
8059static void btrfs_retry_endio(struct bio *bio, int err)
8060{
8061 struct btrfs_retry_complete *done = bio->bi_private;
8062 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8063 struct inode *inode;
8064 struct bio_vec *bvec;
8065 u64 start;
8066 int uptodate;
8067 int ret;
8068 int i;
8069
8070 if (err)
8071 goto end;
8072
8073 uptodate = 1;
8074
8075 start = done->start;
8076
8077 ASSERT(bio->bi_vcnt == 1);
8078 inode = bio->bi_io_vec->bv_page->mapping->host;
8079 ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
8080
8081 bio_for_each_segment_all(bvec, bio, i) {
8082 ret = __readpage_endio_check(done->inode, io_bio, i,
8083 bvec->bv_page, bvec->bv_offset,
8084 done->start, bvec->bv_len);
8085 if (!ret)
8086 clean_io_failure(done->inode, done->start,
8087 bvec->bv_page, bvec->bv_offset);
8088 else
8089 uptodate = 0;
8090 }
8091
8092 done->uptodate = uptodate;
8093end:
8094 complete(&done->done);
8095 bio_put(bio);
8096}
8097
8098static int __btrfs_subio_endio_read(struct inode *inode,
8099 struct btrfs_io_bio *io_bio, int err)
8100{
8101 struct btrfs_fs_info *fs_info;
8102 struct bio_vec *bvec;
8103 struct btrfs_retry_complete done;
8104 u64 start;
8105 u64 offset = 0;
8106 u32 sectorsize;
8107 int nr_sectors;
8108 unsigned int pgoff;
8109 int csum_pos;
8110 int i;
8111 int ret;
8112
8113 fs_info = BTRFS_I(inode)->root->fs_info;
8114 sectorsize = BTRFS_I(inode)->root->sectorsize;
8115
8116 err = 0;
8117 start = io_bio->logical;
8118 done.inode = inode;
8119
8120 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
8121 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
8122
8123 pgoff = bvec->bv_offset;
8124next_block:
8125 csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
8126 ret = __readpage_endio_check(inode, io_bio, csum_pos,
8127 bvec->bv_page, pgoff, start,
8128 sectorsize);
8129 if (likely(!ret))
8130 goto next;
8131try_again:
8132 done.uptodate = 0;
8133 done.start = start;
8134 init_completion(&done.done);
8135
8136 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
8137 pgoff, start, start + sectorsize - 1,
8138 io_bio->mirror_num,
8139 btrfs_retry_endio, &done);
8140 if (ret) {
8141 err = ret;
8142 goto next;
8143 }
8144
8145 wait_for_completion(&done.done);
8146
8147 if (!done.uptodate) {
8148
8149 goto try_again;
8150 }
8151next:
8152 offset += sectorsize;
8153 start += sectorsize;
8154
8155 ASSERT(nr_sectors);
8156
8157 if (--nr_sectors) {
8158 pgoff += sectorsize;
8159 goto next_block;
8160 }
8161 }
8162
8163 return err;
8164}
8165
8166static int btrfs_subio_endio_read(struct inode *inode,
8167 struct btrfs_io_bio *io_bio, int err)
8168{
8169 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8170
8171 if (skip_csum) {
8172 if (unlikely(err))
8173 return __btrfs_correct_data_nocsum(inode, io_bio);
8174 else
8175 return 0;
8176 } else {
8177 return __btrfs_subio_endio_read(inode, io_bio, err);
8178 }
8179}
8180
8181static void btrfs_endio_direct_read(struct bio *bio, int err)
8182{
8183 struct btrfs_dio_private *dip = bio->bi_private;
8184 struct inode *inode = dip->inode;
8185 struct bio *dio_bio;
8186 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8187
8188 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8189 err = btrfs_subio_endio_read(inode, io_bio, err);
8190
8191 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8192 dip->logical_offset + dip->bytes - 1);
8193 dio_bio = dip->dio_bio;
8194
8195 kfree(dip);
8196
8197
8198 if (err)
8199 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
8200 dio_end_io(dio_bio, err);
8201
8202 if (io_bio->end_io)
8203 io_bio->end_io(io_bio, err);
8204 bio_put(bio);
8205}
8206
8207static void btrfs_endio_direct_write(struct bio *bio, int err)
8208{
8209 struct btrfs_dio_private *dip = bio->bi_private;
8210 struct inode *inode = dip->inode;
8211 struct btrfs_root *root = BTRFS_I(inode)->root;
8212 struct btrfs_ordered_extent *ordered = NULL;
8213 u64 ordered_offset = dip->logical_offset;
8214 u64 ordered_bytes = dip->bytes;
8215 struct bio *dio_bio;
8216 int ret;
8217
8218again:
8219 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
8220 &ordered_offset,
8221 ordered_bytes, !err);
8222 if (!ret)
8223 goto out_test;
8224
8225 btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
8226 finish_ordered_fn, NULL, NULL);
8227 btrfs_queue_work(root->fs_info->endio_write_workers,
8228 &ordered->work);
8229out_test:
8230
8231
8232
8233
8234 if (ordered_offset < dip->logical_offset + dip->bytes) {
8235 ordered_bytes = dip->logical_offset + dip->bytes -
8236 ordered_offset;
8237 ordered = NULL;
8238 goto again;
8239 }
8240 dio_bio = dip->dio_bio;
8241
8242 kfree(dip);
8243
8244
8245 if (err)
8246 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
8247 dio_end_io(dio_bio, err);
8248 bio_put(bio);
8249}
8250
8251static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
8252 struct bio *bio, int mirror_num,
8253 unsigned long bio_flags, u64 offset)
8254{
8255 int ret;
8256 struct btrfs_root *root = BTRFS_I(inode)->root;
8257 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
8258 BUG_ON(ret);
8259 return 0;
8260}
8261
8262static void btrfs_end_dio_bio(struct bio *bio, int err)
8263{
8264 struct btrfs_dio_private *dip = bio->bi_private;
8265
8266 if (err)
8267 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8268 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
8269 btrfs_ino(dip->inode), bio->bi_rw,
8270 (unsigned long long)bio->bi_sector,
8271 bio->bi_size, err);
8272
8273 if (dip->subio_endio)
8274 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
8275
8276 if (err) {
8277 dip->errors = 1;
8278
8279
8280
8281
8282
8283 smp_mb__before_atomic_dec();
8284 }
8285
8286
8287 if (!atomic_dec_and_test(&dip->pending_bios))
8288 goto out;
8289
8290 if (dip->errors) {
8291 bio_io_error(dip->orig_bio);
8292 } else {
8293 set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
8294 bio_endio(dip->orig_bio, 0);
8295 }
8296out:
8297 bio_put(bio);
8298}
8299
8300static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
8301 u64 first_sector, gfp_t gfp_flags)
8302{
8303 int nr_vecs = bio_get_nr_vecs(bdev);
8304 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
8305}
8306
8307static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
8308 struct inode *inode,
8309 struct btrfs_dio_private *dip,
8310 struct bio *bio,
8311 u64 file_offset)
8312{
8313 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8314 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8315 int ret;
8316
8317
8318
8319
8320
8321
8322 if (dip->logical_offset == file_offset) {
8323 ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
8324 file_offset);
8325 if (ret)
8326 return ret;
8327 }
8328
8329 if (bio == dip->orig_bio)
8330 return 0;
8331
8332 file_offset -= dip->logical_offset;
8333 file_offset >>= inode->i_sb->s_blocksize_bits;
8334 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
8335
8336 return 0;
8337}
8338
8339static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
8340 int rw, u64 file_offset, int skip_sum,
8341 int async_submit)
8342{
8343 struct btrfs_dio_private *dip = bio->bi_private;
8344 int write = rw & REQ_WRITE;
8345 struct btrfs_root *root = BTRFS_I(inode)->root;
8346 int ret;
8347
8348 if (async_submit)
8349 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
8350
8351 bio_get(bio);
8352
8353 if (!write) {
8354 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
8355 BTRFS_WQ_ENDIO_DATA);
8356 if (ret)
8357 goto err;
8358 }
8359
8360 if (skip_sum)
8361 goto map;
8362
8363 if (write && async_submit) {
8364 ret = btrfs_wq_submit_bio(root->fs_info,
8365 inode, rw, bio, 0, 0,
8366 file_offset,
8367 __btrfs_submit_bio_start_direct_io,
8368 __btrfs_submit_bio_done);
8369 goto err;
8370 } else if (write) {
8371
8372
8373
8374
8375 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
8376 if (ret)
8377 goto err;
8378 } else {
8379 ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio,
8380 file_offset);
8381 if (ret)
8382 goto err;
8383 }
8384map:
8385 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
8386err:
8387 bio_put(bio);
8388 return ret;
8389}
8390
8391static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
8392 int skip_sum)
8393{
8394 struct inode *inode = dip->inode;
8395 struct btrfs_root *root = BTRFS_I(inode)->root;
8396 struct bio *bio;
8397 struct bio *orig_bio = dip->orig_bio;
8398 struct bio_vec *bvec = orig_bio->bi_io_vec;
8399 u64 start_sector = orig_bio->bi_sector;
8400 u64 file_offset = dip->logical_offset;
8401 u64 submit_len = 0;
8402 u64 map_length;
8403 u32 blocksize = root->sectorsize;
8404 int async_submit = 0;
8405 int nr_sectors;
8406 int ret;
8407 int i;
8408
8409 map_length = orig_bio->bi_size;
8410 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
8411 &map_length, NULL, 0);
8412 if (ret)
8413 return -EIO;
8414
8415 if (map_length >= orig_bio->bi_size) {
8416 bio = orig_bio;
8417 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
8418 goto submit;
8419 }
8420
8421
8422 if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
8423 async_submit = 0;
8424 else
8425 async_submit = 1;
8426
8427 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
8428 if (!bio)
8429 return -ENOMEM;
8430
8431 bio->bi_private = dip;
8432 bio->bi_end_io = btrfs_end_dio_bio;
8433 btrfs_io_bio(bio)->logical = file_offset;
8434 atomic_inc(&dip->pending_bios);
8435
8436 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
8437 nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
8438 i = 0;
8439next_block:
8440 if (unlikely(map_length < submit_len + blocksize ||
8441 bio_add_page(bio, bvec->bv_page, blocksize,
8442 bvec->bv_offset + (i * blocksize)) < blocksize)) {
8443
8444
8445
8446
8447
8448
8449 atomic_inc(&dip->pending_bios);
8450 ret = __btrfs_submit_dio_bio(bio, inode, rw,
8451 file_offset, skip_sum,
8452 async_submit);
8453 if (ret) {
8454 bio_put(bio);
8455 atomic_dec(&dip->pending_bios);
8456 goto out_err;
8457 }
8458
8459 start_sector += submit_len >> 9;
8460 file_offset += submit_len;
8461
8462 submit_len = 0;
8463
8464 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
8465 start_sector, GFP_NOFS);
8466 if (!bio)
8467 goto out_err;
8468 bio->bi_private = dip;
8469 bio->bi_end_io = btrfs_end_dio_bio;
8470 btrfs_io_bio(bio)->logical = file_offset;
8471
8472 map_length = orig_bio->bi_size;
8473 ret = btrfs_map_block(root->fs_info, rw,
8474 start_sector << 9,
8475 &map_length, NULL, 0);
8476 if (ret) {
8477 bio_put(bio);
8478 goto out_err;
8479 }
8480
8481 goto next_block;
8482 } else {
8483 submit_len += blocksize;
8484 if (--nr_sectors) {
8485 i++;
8486 goto next_block;
8487 }
8488 bvec++;
8489 }
8490 }
8491
8492submit:
8493 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
8494 async_submit);
8495 if (!ret)
8496 return 0;
8497
8498 bio_put(bio);
8499out_err:
8500 dip->errors = 1;
8501
8502
8503
8504
8505 smp_mb__before_atomic_dec();
8506 if (atomic_dec_and_test(&dip->pending_bios))
8507 bio_io_error(dip->orig_bio);
8508
8509
8510 return 0;
8511}
8512
8513static void btrfs_submit_direct(int rw, struct bio *dio_bio,
8514 struct inode *inode, loff_t file_offset)
8515{
8516 struct btrfs_dio_private *dip = NULL;
8517 struct bio *io_bio = NULL;
8518 struct btrfs_io_bio *btrfs_bio;
8519 int skip_sum;
8520 int write = rw & REQ_WRITE;
8521 int ret = 0;
8522
8523 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8524
8525 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
8526 if (!io_bio) {
8527 ret = -ENOMEM;
8528 goto free_ordered;
8529 }
8530
8531 dip = kzalloc(sizeof(*dip), GFP_NOFS);
8532 if (!dip) {
8533 ret = -ENOMEM;
8534 goto free_ordered;
8535 }
8536
8537 dip->private = dio_bio->bi_private;
8538 dip->inode = inode;
8539 dip->logical_offset = file_offset;
8540 dip->bytes = dio_bio->bi_size;
8541 dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
8542 io_bio->bi_private = dip;
8543 dip->orig_bio = io_bio;
8544 dip->dio_bio = dio_bio;
8545 atomic_set(&dip->pending_bios, 0);
8546 btrfs_bio = btrfs_io_bio(io_bio);
8547 btrfs_bio->logical = file_offset;
8548
8549 if (write) {
8550 io_bio->bi_end_io = btrfs_endio_direct_write;
8551 } else {
8552 io_bio->bi_end_io = btrfs_endio_direct_read;
8553 dip->subio_endio = btrfs_subio_endio_read;
8554 }
8555
8556 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
8557 if (!ret)
8558 return;
8559
8560 if (btrfs_bio->end_io)
8561 btrfs_bio->end_io(btrfs_bio, ret);
8562
8563free_ordered:
8564
8565
8566
8567
8568
8569
8570
8571
8572
8573 if (io_bio && dip) {
8574 bio_endio(io_bio, ret);
8575
8576
8577
8578
8579
8580 dip = NULL;
8581 io_bio = NULL;
8582 } else {
8583 if (write) {
8584 struct btrfs_ordered_extent *ordered;
8585
8586 ordered = btrfs_lookup_ordered_extent(inode,
8587 file_offset);
8588 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
8589
8590
8591
8592
8593
8594
8595
8596 btrfs_finish_ordered_io(ordered);
8597 } else {
8598 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8599 file_offset + dio_bio->bi_size - 1);
8600 }
8601 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
8602
8603
8604
8605
8606 dio_end_io(dio_bio, ret);
8607 }
8608 if (io_bio)
8609 bio_put(io_bio);
8610 kfree(dip);
8611}
8612
8613static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
8614 const struct iovec *iov, loff_t offset,
8615 unsigned long nr_segs)
8616{
8617 int seg;
8618 int i;
8619 size_t size;
8620 unsigned long addr;
8621 unsigned blocksize_mask = root->sectorsize - 1;
8622 ssize_t retval = -EINVAL;
8623 loff_t end = offset;
8624
8625 if (offset & blocksize_mask)
8626 goto out;
8627
8628
8629 for (seg = 0; seg < nr_segs; seg++) {
8630 addr = (unsigned long)iov[seg].iov_base;
8631 size = iov[seg].iov_len;
8632 end += size;
8633 if ((addr & blocksize_mask) || (size & blocksize_mask))
8634 goto out;
8635
8636
8637 if (rw & WRITE)
8638 continue;
8639
8640
8641
8642
8643
8644
8645 for (i = seg + 1; i < nr_segs; i++) {
8646 if (iov[seg].iov_base == iov[i].iov_base)
8647 goto out;
8648 }
8649 }
8650 retval = 0;
8651out:
8652 return retval;
8653}
8654
8655static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8656 const struct iovec *iov, loff_t offset,
8657 unsigned long nr_segs)
8658{
8659 struct file *file = iocb->ki_filp;
8660 struct inode *inode = file->f_mapping->host;
8661 struct btrfs_root *root = BTRFS_I(inode)->root;
8662 struct btrfs_dio_data dio_data = { 0 };
8663 size_t count = 0;
8664 int flags = 0;
8665 bool wakeup = true;
8666 bool relock = false;
8667 ssize_t ret;
8668
8669 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
8670 offset, nr_segs))
8671 return 0;
8672
8673 inode_dio_begin(inode);
8674 smp_mb__after_atomic_inc();
8675
8676
8677
8678
8679
8680
8681
8682 count = iov_length(iov, nr_segs);
8683 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8684 &BTRFS_I(inode)->runtime_flags))
8685 filemap_fdatawrite_range(inode->i_mapping, offset,
8686 offset + count - 1);
8687
8688 if (rw & WRITE) {
8689
8690
8691
8692
8693
8694 if (offset + count <= inode->i_size) {
8695 mutex_unlock(&inode->i_mutex);
8696 relock = true;
8697 }
8698 ret = btrfs_delalloc_reserve_space(inode, offset, count);
8699 if (ret)
8700 goto out;
8701 dio_data.outstanding_extents = div64_u64(count +
8702 BTRFS_MAX_EXTENT_SIZE - 1,
8703 BTRFS_MAX_EXTENT_SIZE);
8704
8705
8706
8707
8708
8709
8710 dio_data.reserve = round_up(count, root->sectorsize);
8711 current->journal_info = &dio_data;
8712 down_read(&BTRFS_I(inode)->dio_sem);
8713 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8714 &BTRFS_I(inode)->runtime_flags)) {
8715 inode_dio_end(inode);
8716 flags = DIO_LOCKING | DIO_SKIP_HOLES;
8717 wakeup = false;
8718 }
8719
8720 ret = __blockdev_direct_IO(rw, iocb, inode,
8721 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
8722 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
8723 btrfs_submit_direct, flags);
8724 if (rw & WRITE) {
8725 up_read(&BTRFS_I(inode)->dio_sem);
8726 current->journal_info = NULL;
8727 if (ret < 0 && ret != -EIOCBQUEUED) {
8728 if (dio_data.reserve)
8729 btrfs_delalloc_release_space(inode, offset,
8730 dio_data.reserve);
8731 } else if (ret >= 0 && (size_t)ret < count)
8732 btrfs_delalloc_release_space(inode, offset,
8733 count - (size_t)ret);
8734 }
8735out:
8736 if (wakeup)
8737 inode_dio_end(inode);
8738 if (relock)
8739 mutex_lock(&inode->i_mutex);
8740
8741 return ret;
8742}
8743
8744#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
8745
8746static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8747 __u64 start, __u64 len)
8748{
8749 int ret;
8750
8751 ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
8752 if (ret)
8753 return ret;
8754
8755 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
8756}
8757
8758int btrfs_readpage(struct file *file, struct page *page)
8759{
8760 struct extent_io_tree *tree;
8761 tree = &BTRFS_I(page->mapping->host)->io_tree;
8762 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
8763}
8764
8765static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8766{
8767 struct extent_io_tree *tree;
8768 struct inode *inode = page->mapping->host;
8769 int ret;
8770
8771 if (current->flags & PF_MEMALLOC) {
8772 redirty_page_for_writepage(wbc, page);
8773 unlock_page(page);
8774 return 0;
8775 }
8776
8777
8778
8779
8780
8781
8782 if (!igrab(inode)) {
8783 redirty_page_for_writepage(wbc, page);
8784 return AOP_WRITEPAGE_ACTIVATE;
8785 }
8786 tree = &BTRFS_I(page->mapping->host)->io_tree;
8787 ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc);
8788 btrfs_add_delayed_iput(inode);
8789 return ret;
8790}
8791
8792static int btrfs_writepages(struct address_space *mapping,
8793 struct writeback_control *wbc)
8794{
8795 struct extent_io_tree *tree;
8796
8797 tree = &BTRFS_I(mapping->host)->io_tree;
8798 return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
8799}
8800
8801static int
8802btrfs_readpages(struct file *file, struct address_space *mapping,
8803 struct list_head *pages, unsigned nr_pages)
8804{
8805 struct extent_io_tree *tree;
8806 tree = &BTRFS_I(mapping->host)->io_tree;
8807 return extent_readpages(tree, mapping, pages, nr_pages,
8808 btrfs_get_extent);
8809}
8810static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8811{
8812 struct extent_io_tree *tree;
8813 struct extent_map_tree *map;
8814 int ret;
8815
8816 tree = &BTRFS_I(page->mapping->host)->io_tree;
8817 map = &BTRFS_I(page->mapping->host)->extent_tree;
8818 ret = try_release_extent_mapping(map, tree, page, gfp_flags);
8819 if (ret == 1) {
8820 ClearPagePrivate(page);
8821 set_page_private(page, 0);
8822 page_cache_release(page);
8823 }
8824 return ret;
8825}
8826
8827static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8828{
8829 if (PageWriteback(page) || PageDirty(page))
8830 return 0;
8831 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
8832}
8833
8834static void btrfs_invalidatepage(struct page *page, unsigned long offset)
8835{
8836 struct inode *inode = page->mapping->host;
8837 struct extent_io_tree *tree;
8838 struct btrfs_ordered_extent *ordered;
8839 struct extent_state *cached_state = NULL;
8840 u64 page_start = page_offset(page);
8841 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
8842 u64 start;
8843 u64 end;
8844 int inode_evicting = inode->i_state & I_FREEING;
8845
8846
8847
8848
8849
8850
8851
8852
8853 wait_on_page_writeback(page);
8854
8855 tree = &BTRFS_I(inode)->io_tree;
8856 if (offset) {
8857 btrfs_releasepage(page, GFP_NOFS);
8858 return;
8859 }
8860
8861 if (!inode_evicting)
8862 lock_extent_bits(tree, page_start, page_end, &cached_state);
8863again:
8864 start = page_start;
8865 ordered = btrfs_lookup_ordered_range(inode, start,
8866 page_end - start + 1);
8867 if (ordered) {
8868 end = min(page_end, ordered->file_offset + ordered->len - 1);
8869
8870
8871
8872
8873 if (!inode_evicting)
8874 clear_extent_bit(tree, start, end,
8875 EXTENT_DIRTY | EXTENT_DELALLOC |
8876 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8877 EXTENT_DEFRAG, 1, 0, &cached_state,
8878 GFP_NOFS);
8879
8880
8881
8882
8883 if (TestClearPagePrivate2(page)) {
8884 struct btrfs_ordered_inode_tree *tree;
8885 u64 new_len;
8886
8887 tree = &BTRFS_I(inode)->ordered_tree;
8888
8889 spin_lock_irq(&tree->lock);
8890 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8891 new_len = start - ordered->file_offset;
8892 if (new_len < ordered->truncated_len)
8893 ordered->truncated_len = new_len;
8894 spin_unlock_irq(&tree->lock);
8895
8896 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8897 start,
8898 end - start + 1, 1))
8899 btrfs_finish_ordered_io(ordered);
8900 }
8901 btrfs_put_ordered_extent(ordered);
8902 if (!inode_evicting) {
8903 cached_state = NULL;
8904 lock_extent_bits(tree, start, end,
8905 &cached_state);
8906 }
8907
8908 start = end + 1;
8909 if (start < page_end)
8910 goto again;
8911 }
8912
8913
8914
8915
8916
8917
8918
8919
8920
8921
8922
8923
8924
8925
8926
8927
8928 if (PageDirty(page))
8929 btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
8930 if (!inode_evicting) {
8931 clear_extent_bit(tree, page_start, page_end,
8932 EXTENT_LOCKED | EXTENT_DIRTY |
8933 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8934 EXTENT_DEFRAG, 1, 1,
8935 &cached_state, GFP_NOFS);
8936
8937 __btrfs_releasepage(page, GFP_NOFS);
8938 }
8939
8940 ClearPageChecked(page);
8941 if (PagePrivate(page)) {
8942 ClearPagePrivate(page);
8943 set_page_private(page, 0);
8944 page_cache_release(page);
8945 }
8946}
8947
8948
8949
8950
8951
8952
8953
8954
8955
8956
8957
8958
8959
8960
8961
8962
8963int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8964{
8965 struct page *page = vmf->page;
8966 struct inode *inode = file_inode(vma->vm_file);
8967 struct btrfs_root *root = BTRFS_I(inode)->root;
8968 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8969 struct btrfs_ordered_extent *ordered;
8970 struct extent_state *cached_state = NULL;
8971 char *kaddr;
8972 unsigned long zero_start;
8973 loff_t size;
8974 int ret;
8975 int reserved = 0;
8976 u64 reserved_space;
8977 u64 page_start;
8978 u64 page_end;
8979 u64 end;
8980
8981 reserved_space = PAGE_CACHE_SIZE;
8982
8983 sb_start_pagefault(inode->i_sb);
8984 page_start = page_offset(page);
8985 page_end = page_start + PAGE_CACHE_SIZE - 1;
8986 end = page_end;
8987
8988
8989
8990
8991
8992
8993
8994
8995
8996 ret = btrfs_delalloc_reserve_space(inode, page_start,
8997 reserved_space);
8998 if (!ret) {
8999 ret = file_update_time(vma->vm_file);
9000 reserved = 1;
9001 }
9002 if (ret) {
9003 if (ret == -ENOMEM)
9004 ret = VM_FAULT_OOM;
9005 else
9006 ret = VM_FAULT_SIGBUS;
9007 if (reserved)
9008 goto out;
9009 goto out_noreserve;
9010 }
9011
9012 ret = VM_FAULT_NOPAGE;
9013again:
9014 lock_page(page);
9015 size = i_size_read(inode);
9016
9017 if ((page->mapping != inode->i_mapping) ||
9018 (page_start >= size)) {
9019
9020 goto out_unlock;
9021 }
9022 wait_on_page_writeback(page);
9023
9024 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
9025 set_page_extent_mapped(page);
9026
9027
9028
9029
9030
9031 ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
9032 if (ordered) {
9033 unlock_extent_cached(io_tree, page_start, page_end,
9034 &cached_state, GFP_NOFS);
9035 unlock_page(page);
9036 btrfs_start_ordered_extent(inode, ordered, 1);
9037 btrfs_put_ordered_extent(ordered);
9038 goto again;
9039 }
9040
9041 if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
9042 reserved_space = round_up(size - page_start, root->sectorsize);
9043 if (reserved_space < PAGE_CACHE_SIZE) {
9044 end = page_start + reserved_space - 1;
9045 spin_lock(&BTRFS_I(inode)->lock);
9046 BTRFS_I(inode)->outstanding_extents++;
9047 spin_unlock(&BTRFS_I(inode)->lock);
9048 btrfs_delalloc_release_space(inode, page_start,
9049 PAGE_CACHE_SIZE - reserved_space);
9050 }
9051 }
9052
9053
9054
9055
9056
9057
9058
9059
9060 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
9061 EXTENT_DIRTY | EXTENT_DELALLOC |
9062 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
9063 0, 0, &cached_state, GFP_NOFS);
9064
9065 ret = btrfs_set_extent_delalloc(inode, page_start, end,
9066 &cached_state, 0);
9067 if (ret) {
9068 unlock_extent_cached(io_tree, page_start, page_end,
9069 &cached_state, GFP_NOFS);
9070 ret = VM_FAULT_SIGBUS;
9071 goto out_unlock;
9072 }
9073 ret = 0;
9074
9075
9076 if (page_start + PAGE_CACHE_SIZE > size)
9077 zero_start = size & ~PAGE_CACHE_MASK;
9078 else
9079 zero_start = PAGE_CACHE_SIZE;
9080
9081 if (zero_start != PAGE_CACHE_SIZE) {
9082 kaddr = kmap(page);
9083 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
9084 flush_dcache_page(page);
9085 kunmap(page);
9086 }
9087 ClearPageChecked(page);
9088 set_page_dirty(page);
9089 SetPageUptodate(page);
9090
9091 BTRFS_I(inode)->last_trans = root->fs_info->generation;
9092 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
9093 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
9094
9095 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
9096
9097out_unlock:
9098 if (!ret) {
9099 sb_end_pagefault(inode->i_sb);
9100 return VM_FAULT_LOCKED;
9101 }
9102 unlock_page(page);
9103out:
9104 btrfs_delalloc_release_space(inode, page_start, reserved_space);
9105out_noreserve:
9106 sb_end_pagefault(inode->i_sb);
9107 return ret;
9108}
9109
9110static int btrfs_truncate(struct inode *inode)
9111{
9112 struct btrfs_root *root = BTRFS_I(inode)->root;
9113 struct btrfs_block_rsv *rsv;
9114 int ret = 0;
9115 int err = 0;
9116 struct btrfs_trans_handle *trans;
9117 u64 mask = root->sectorsize - 1;
9118 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
9119
9120 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
9121 (u64)-1);
9122 if (ret)
9123 return ret;
9124
9125
9126
9127
9128
9129
9130
9131
9132
9133
9134
9135
9136
9137
9138
9139
9140
9141
9142
9143
9144
9145
9146
9147
9148
9149
9150
9151
9152
9153
9154
9155
9156
9157
9158
9159
9160
9161 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
9162 if (!rsv)
9163 return -ENOMEM;
9164 rsv->size = min_size;
9165 rsv->failfast = 1;
9166
9167
9168
9169
9170
9171 trans = btrfs_start_transaction(root, 2);
9172 if (IS_ERR(trans)) {
9173 err = PTR_ERR(trans);
9174 goto out;
9175 }
9176
9177
9178 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
9179 min_size, 0);
9180 BUG_ON(ret);
9181
9182
9183
9184
9185
9186
9187
9188
9189 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
9190 trans->block_rsv = rsv;
9191
9192 while (1) {
9193 ret = btrfs_truncate_inode_items(trans, root, inode,
9194 inode->i_size,
9195 BTRFS_EXTENT_DATA_KEY);
9196 if (ret != -ENOSPC && ret != -EAGAIN) {
9197 err = ret;
9198 break;
9199 }
9200
9201 trans->block_rsv = &root->fs_info->trans_block_rsv;
9202 ret = btrfs_update_inode(trans, root, inode);
9203 if (ret) {
9204 err = ret;
9205 break;
9206 }
9207
9208 btrfs_end_transaction(trans, root);
9209 btrfs_btree_balance_dirty(root);
9210
9211 trans = btrfs_start_transaction(root, 2);
9212 if (IS_ERR(trans)) {
9213 ret = err = PTR_ERR(trans);
9214 trans = NULL;
9215 break;
9216 }
9217
9218 btrfs_block_rsv_release(root, rsv, -1);
9219 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
9220 rsv, min_size, 0);
9221 BUG_ON(ret);
9222 trans->block_rsv = rsv;
9223 }
9224
9225 if (ret == 0 && inode->i_nlink > 0) {
9226 trans->block_rsv = root->orphan_block_rsv;
9227 ret = btrfs_orphan_del(trans, inode);
9228 if (ret)
9229 err = ret;
9230 }
9231
9232 if (trans) {
9233 trans->block_rsv = &root->fs_info->trans_block_rsv;
9234 ret = btrfs_update_inode(trans, root, inode);
9235 if (ret && !err)
9236 err = ret;
9237
9238 ret = btrfs_end_transaction(trans, root);
9239 btrfs_btree_balance_dirty(root);
9240 }
9241out:
9242 btrfs_free_block_rsv(root, rsv);
9243
9244 if (ret && !err)
9245 err = ret;
9246
9247 return err;
9248}
9249
9250
9251
9252
9253int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
9254 struct btrfs_root *new_root,
9255 struct btrfs_root *parent_root,
9256 u64 new_dirid)
9257{
9258 struct inode *inode;
9259 int err;
9260 u64 index = 0;
9261
9262 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
9263 new_dirid, new_dirid,
9264 S_IFDIR | (~current_umask() & S_IRWXUGO),
9265 &index);
9266 if (IS_ERR(inode))
9267 return PTR_ERR(inode);
9268 inode->i_op = &btrfs_dir_inode_operations.ops;
9269 inode->i_flags |= S_IOPS_WRAPPER;
9270 inode->i_fop = &btrfs_dir_file_operations;
9271
9272 set_nlink(inode, 1);
9273 btrfs_i_size_write(inode, 0);
9274 unlock_new_inode(inode);
9275
9276 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
9277 if (err)
9278 btrfs_err(new_root->fs_info,
9279 "error inheriting subvolume %llu properties: %d",
9280 new_root->root_key.objectid, err);
9281
9282 err = btrfs_update_inode(trans, new_root, inode);
9283
9284 iput(inode);
9285 return err;
9286}
9287
9288struct inode *btrfs_alloc_inode(struct super_block *sb)
9289{
9290 struct btrfs_inode *ei;
9291 struct inode *inode;
9292
9293 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
9294 if (!ei)
9295 return NULL;
9296
9297 ei->root = NULL;
9298 ei->generation = 0;
9299 ei->last_trans = 0;
9300 ei->last_sub_trans = 0;
9301 ei->logged_trans = 0;
9302 ei->delalloc_bytes = 0;
9303 ei->defrag_bytes = 0;
9304 ei->disk_i_size = 0;
9305 ei->flags = 0;
9306 ei->csum_bytes = 0;
9307 ei->index_cnt = (u64)-1;
9308 ei->dir_index = 0;
9309 ei->last_unlink_trans = 0;
9310 ei->last_log_commit = 0;
9311
9312 spin_lock_init(&ei->lock);
9313 ei->outstanding_extents = 0;
9314 ei->reserved_extents = 0;
9315
9316 ei->runtime_flags = 0;
9317 ei->force_compress = BTRFS_COMPRESS_NONE;
9318
9319 ei->delayed_node = NULL;
9320
9321 ei->i_otime.tv_sec = 0;
9322 ei->i_otime.tv_nsec = 0;
9323
9324 inode = &ei->vfs_inode;
9325 extent_map_tree_init(&ei->extent_tree);
9326 extent_io_tree_init(&ei->io_tree, &inode->i_data);
9327 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
9328 ei->io_tree.track_uptodate = 1;
9329 ei->io_failure_tree.track_uptodate = 1;
9330 atomic_set(&ei->sync_writers, 0);
9331 mutex_init(&ei->log_mutex);
9332 mutex_init(&ei->delalloc_mutex);
9333 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
9334 INIT_LIST_HEAD(&ei->delalloc_inodes);
9335 RB_CLEAR_NODE(&ei->rb_node);
9336 init_rwsem(&ei->dio_sem);
9337
9338 return inode;
9339}
9340
9341#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
9342void btrfs_test_destroy_inode(struct inode *inode)
9343{
9344 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
9345 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9346}
9347#endif
9348
9349static void btrfs_i_callback(struct rcu_head *head)
9350{
9351 struct inode *inode = container_of(head, struct inode, i_rcu);
9352 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9353}
9354
9355void btrfs_destroy_inode(struct inode *inode)
9356{
9357 struct btrfs_ordered_extent *ordered;
9358 struct btrfs_root *root = BTRFS_I(inode)->root;
9359
9360 WARN_ON(!hlist_empty(&inode->i_dentry));
9361 WARN_ON(inode->i_data.nrpages);
9362 WARN_ON(BTRFS_I(inode)->outstanding_extents);
9363 WARN_ON(BTRFS_I(inode)->reserved_extents);
9364 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
9365 WARN_ON(BTRFS_I(inode)->csum_bytes);
9366 WARN_ON(BTRFS_I(inode)->defrag_bytes);
9367
9368
9369
9370
9371
9372
9373 if (!root)
9374 goto free;
9375
9376 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
9377 &BTRFS_I(inode)->runtime_flags)) {
9378 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
9379 btrfs_ino(inode));
9380 atomic_dec(&root->orphan_inodes);
9381 }
9382
9383 while (1) {
9384 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
9385 if (!ordered)
9386 break;
9387 else {
9388 btrfs_err(root->fs_info,
9389 "found ordered extent %llu %llu on inode cleanup",
9390 ordered->file_offset, ordered->len);
9391 btrfs_remove_ordered_extent(inode, ordered);
9392 btrfs_put_ordered_extent(ordered);
9393 btrfs_put_ordered_extent(ordered);
9394 }
9395 }
9396 btrfs_qgroup_check_reserved_leak(inode);
9397 inode_tree_del(inode);
9398 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
9399free:
9400 call_rcu(&inode->i_rcu, btrfs_i_callback);
9401}
9402
9403int btrfs_drop_inode(struct inode *inode)
9404{
9405 struct btrfs_root *root = BTRFS_I(inode)->root;
9406
9407 if (root == NULL)
9408 return 1;
9409
9410
9411 if (btrfs_root_refs(&root->root_item) == 0)
9412 return 1;
9413 else
9414 return generic_drop_inode(inode);
9415}
9416
9417static void init_once(void *foo)
9418{
9419 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
9420
9421 inode_init_once(&ei->vfs_inode);
9422}
9423
9424void btrfs_destroy_cachep(void)
9425{
9426
9427
9428
9429
9430 rcu_barrier();
9431 kmem_cache_destroy(btrfs_inode_cachep);
9432 kmem_cache_destroy(btrfs_trans_handle_cachep);
9433 kmem_cache_destroy(btrfs_transaction_cachep);
9434 kmem_cache_destroy(btrfs_path_cachep);
9435 kmem_cache_destroy(btrfs_free_space_cachep);
9436}
9437
9438int btrfs_init_cachep(void)
9439{
9440 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9441 sizeof(struct btrfs_inode), 0,
9442 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
9443 if (!btrfs_inode_cachep)
9444 goto fail;
9445
9446 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9447 sizeof(struct btrfs_trans_handle), 0,
9448 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9449 if (!btrfs_trans_handle_cachep)
9450 goto fail;
9451
9452 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
9453 sizeof(struct btrfs_transaction), 0,
9454 SLAB_TEMPORARY | SLAB_MEM_SPREAD, NULL);
9455 if (!btrfs_transaction_cachep)
9456 goto fail;
9457
9458 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9459 sizeof(struct btrfs_path), 0,
9460 SLAB_MEM_SPREAD, NULL);
9461 if (!btrfs_path_cachep)
9462 goto fail;
9463
9464 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9465 sizeof(struct btrfs_free_space), 0,
9466 SLAB_MEM_SPREAD, NULL);
9467 if (!btrfs_free_space_cachep)
9468 goto fail;
9469
9470 return 0;
9471fail:
9472 btrfs_destroy_cachep();
9473 return -ENOMEM;
9474}
9475
9476static int btrfs_getattr(struct vfsmount *mnt,
9477 struct dentry *dentry, struct kstat *stat)
9478{
9479 u64 delalloc_bytes;
9480 struct inode *inode = dentry->d_inode;
9481 u32 blocksize = inode->i_sb->s_blocksize;
9482
9483 generic_fillattr(inode, stat);
9484 stat->dev = BTRFS_I(inode)->root->anon_dev;
9485
9486 spin_lock(&BTRFS_I(inode)->lock);
9487 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
9488 spin_unlock(&BTRFS_I(inode)->lock);
9489 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
9490 ALIGN(delalloc_bytes, blocksize)) >> 9;
9491 return 0;
9492}
9493
9494static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9495 struct inode *new_dir, struct dentry *new_dentry)
9496{
9497 struct btrfs_trans_handle *trans;
9498 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9499 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9500 struct inode *new_inode = new_dentry->d_inode;
9501 struct inode *old_inode = old_dentry->d_inode;
9502 u64 index = 0;
9503 u64 root_objectid;
9504 int ret;
9505 u64 old_ino = btrfs_ino(old_inode);
9506 bool log_pinned = false;
9507
9508 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9509 return -EPERM;
9510
9511
9512 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9513 return -EXDEV;
9514
9515 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9516 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
9517 return -ENOTEMPTY;
9518
9519 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9520 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9521 return -ENOTEMPTY;
9522
9523
9524
9525 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9526 new_dentry->d_name.name,
9527 new_dentry->d_name.len);
9528
9529 if (ret) {
9530 if (ret == -EEXIST) {
9531
9532
9533 if (WARN_ON(!new_inode)) {
9534 return ret;
9535 }
9536 } else {
9537
9538 return ret;
9539 }
9540 }
9541 ret = 0;
9542
9543
9544
9545
9546
9547 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9548 filemap_flush(old_inode->i_mapping);
9549
9550
9551 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9552 down_read(&root->fs_info->subvol_sem);
9553
9554
9555
9556
9557
9558
9559
9560
9561 trans = btrfs_start_transaction(root, 11);
9562 if (IS_ERR(trans)) {
9563 ret = PTR_ERR(trans);
9564 goto out_notrans;
9565 }
9566
9567 if (dest != root)
9568 btrfs_record_root_in_trans(trans, dest);
9569
9570 ret = btrfs_set_inode_index(new_dir, &index);
9571 if (ret)
9572 goto out_fail;
9573
9574 BTRFS_I(old_inode)->dir_index = 0ULL;
9575 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9576
9577 btrfs_set_log_full_commit(root->fs_info, trans);
9578 } else {
9579 btrfs_pin_log_trans(root);
9580 log_pinned = true;
9581 ret = btrfs_insert_inode_ref(trans, dest,
9582 new_dentry->d_name.name,
9583 new_dentry->d_name.len,
9584 old_ino,
9585 btrfs_ino(new_dir), index);
9586 if (ret)
9587 goto out_fail;
9588 }
9589
9590 inode_inc_iversion(old_dir);
9591 inode_inc_iversion(new_dir);
9592 inode_inc_iversion(old_inode);
9593 old_dir->i_ctime = old_dir->i_mtime =
9594 new_dir->i_ctime = new_dir->i_mtime =
9595 old_inode->i_ctime = current_fs_time(old_dir->i_sb);
9596
9597 if (old_dentry->d_parent != new_dentry->d_parent)
9598 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
9599
9600 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9601 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9602 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
9603 old_dentry->d_name.name,
9604 old_dentry->d_name.len);
9605 } else {
9606 ret = __btrfs_unlink_inode(trans, root, old_dir,
9607 old_dentry->d_inode,
9608 old_dentry->d_name.name,
9609 old_dentry->d_name.len);
9610 if (!ret)
9611 ret = btrfs_update_inode(trans, root, old_inode);
9612 }
9613 if (ret) {
9614 btrfs_abort_transaction(trans, root, ret);
9615 goto out_fail;
9616 }
9617
9618 if (new_inode) {
9619 inode_inc_iversion(new_inode);
9620 new_inode->i_ctime = current_fs_time(new_inode->i_sb);
9621 if (unlikely(btrfs_ino(new_inode) ==
9622 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9623 root_objectid = BTRFS_I(new_inode)->location.objectid;
9624 ret = btrfs_unlink_subvol(trans, dest, new_dir,
9625 root_objectid,
9626 new_dentry->d_name.name,
9627 new_dentry->d_name.len);
9628 BUG_ON(new_inode->i_nlink == 0);
9629 } else {
9630 ret = btrfs_unlink_inode(trans, dest, new_dir,
9631 new_dentry->d_inode,
9632 new_dentry->d_name.name,
9633 new_dentry->d_name.len);
9634 }
9635 if (!ret && new_inode->i_nlink == 0)
9636 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
9637 if (ret) {
9638 btrfs_abort_transaction(trans, root, ret);
9639 goto out_fail;
9640 }
9641 }
9642
9643 ret = btrfs_add_link(trans, new_dir, old_inode,
9644 new_dentry->d_name.name,
9645 new_dentry->d_name.len, 0, index);
9646 if (ret) {
9647 btrfs_abort_transaction(trans, root, ret);
9648 goto out_fail;
9649 }
9650
9651 if (old_inode->i_nlink == 1)
9652 BTRFS_I(old_inode)->dir_index = index;
9653
9654 if (log_pinned) {
9655 struct dentry *parent = new_dentry->d_parent;
9656
9657 btrfs_log_new_name(trans, old_inode, old_dir, parent);
9658 btrfs_end_log_trans(root);
9659 log_pinned = false;
9660 }
9661out_fail:
9662
9663
9664
9665
9666
9667
9668
9669
9670
9671
9672
9673 if (ret && log_pinned) {
9674 if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
9675 btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
9676 btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
9677 (new_inode &&
9678 btrfs_inode_in_log(new_inode, root->fs_info->generation)))
9679 btrfs_set_log_full_commit(root->fs_info, trans);
9680
9681 btrfs_end_log_trans(root);
9682 log_pinned = false;
9683 }
9684 btrfs_end_transaction(trans, root);
9685out_notrans:
9686 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9687 up_read(&root->fs_info->subvol_sem);
9688
9689 return ret;
9690}
9691
9692static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
9693 struct inode *new_dir, struct dentry *new_dentry,
9694 unsigned int flags)
9695{
9696 if (flags & ~RENAME_NOREPLACE)
9697 return -EINVAL;
9698
9699 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
9700}
9701
9702static void btrfs_run_delalloc_work(struct btrfs_work *work)
9703{
9704 struct btrfs_delalloc_work *delalloc_work;
9705 struct inode *inode;
9706
9707 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9708 work);
9709 inode = delalloc_work->inode;
9710 filemap_flush(inode->i_mapping);
9711 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9712 &BTRFS_I(inode)->runtime_flags))
9713 filemap_flush(inode->i_mapping);
9714
9715 if (delalloc_work->delay_iput)
9716 btrfs_add_delayed_iput(inode);
9717 else
9718 iput(inode);
9719 complete(&delalloc_work->completion);
9720}
9721
9722struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
9723 int delay_iput)
9724{
9725 struct btrfs_delalloc_work *work;
9726
9727 work = kmalloc(sizeof(*work), GFP_NOFS);
9728 if (!work)
9729 return NULL;
9730
9731 init_completion(&work->completion);
9732 INIT_LIST_HEAD(&work->list);
9733 work->inode = inode;
9734 work->delay_iput = delay_iput;
9735 WARN_ON_ONCE(!inode);
9736 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
9737 btrfs_run_delalloc_work, NULL, NULL);
9738
9739 return work;
9740}
9741
9742void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
9743{
9744 wait_for_completion(&work->completion);
9745 kfree(work);
9746}
9747
9748
9749
9750
9751
9752static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
9753 int nr)
9754{
9755 struct btrfs_inode *binode;
9756 struct inode *inode;
9757 struct btrfs_delalloc_work *work, *next;
9758 struct list_head works;
9759 struct list_head splice;
9760 int ret = 0;
9761
9762 INIT_LIST_HEAD(&works);
9763 INIT_LIST_HEAD(&splice);
9764
9765 mutex_lock(&root->delalloc_mutex);
9766 spin_lock(&root->delalloc_lock);
9767 list_splice_init(&root->delalloc_inodes, &splice);
9768 while (!list_empty(&splice)) {
9769 binode = list_entry(splice.next, struct btrfs_inode,
9770 delalloc_inodes);
9771
9772 list_move_tail(&binode->delalloc_inodes,
9773 &root->delalloc_inodes);
9774 inode = igrab(&binode->vfs_inode);
9775 if (!inode) {
9776 cond_resched_lock(&root->delalloc_lock);
9777 continue;
9778 }
9779 spin_unlock(&root->delalloc_lock);
9780
9781 work = btrfs_alloc_delalloc_work(inode, delay_iput);
9782 if (!work) {
9783 if (delay_iput)
9784 btrfs_add_delayed_iput(inode);
9785 else
9786 iput(inode);
9787 ret = -ENOMEM;
9788 goto out;
9789 }
9790 list_add_tail(&work->list, &works);
9791 btrfs_queue_work(root->fs_info->flush_workers,
9792 &work->work);
9793 ret++;
9794 if (nr != -1 && ret >= nr)
9795 goto out;
9796 cond_resched();
9797 spin_lock(&root->delalloc_lock);
9798 }
9799 spin_unlock(&root->delalloc_lock);
9800
9801out:
9802 list_for_each_entry_safe(work, next, &works, list) {
9803 list_del_init(&work->list);
9804 btrfs_wait_and_free_delalloc_work(work);
9805 }
9806
9807 if (!list_empty_careful(&splice)) {
9808 spin_lock(&root->delalloc_lock);
9809 list_splice_tail(&splice, &root->delalloc_inodes);
9810 spin_unlock(&root->delalloc_lock);
9811 }
9812 mutex_unlock(&root->delalloc_mutex);
9813 return ret;
9814}
9815
9816int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
9817{
9818 int ret;
9819
9820 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
9821 return -EROFS;
9822
9823 ret = __start_delalloc_inodes(root, delay_iput, -1);
9824 if (ret > 0)
9825 ret = 0;
9826
9827
9828
9829
9830
9831 atomic_inc(&root->fs_info->async_submit_draining);
9832 while (atomic_read(&root->fs_info->nr_async_submits) ||
9833 atomic_read(&root->fs_info->async_delalloc_pages)) {
9834 wait_event(root->fs_info->async_submit_wait,
9835 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
9836 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
9837 }
9838 atomic_dec(&root->fs_info->async_submit_draining);
9839 return ret;
9840}
9841
9842int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
9843 int nr)
9844{
9845 struct btrfs_root *root;
9846 struct list_head splice;
9847 int ret;
9848
9849 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
9850 return -EROFS;
9851
9852 INIT_LIST_HEAD(&splice);
9853
9854 mutex_lock(&fs_info->delalloc_root_mutex);
9855 spin_lock(&fs_info->delalloc_root_lock);
9856 list_splice_init(&fs_info->delalloc_roots, &splice);
9857 while (!list_empty(&splice) && nr) {
9858 root = list_first_entry(&splice, struct btrfs_root,
9859 delalloc_root);
9860 root = btrfs_grab_fs_root(root);
9861 BUG_ON(!root);
9862 list_move_tail(&root->delalloc_root,
9863 &fs_info->delalloc_roots);
9864 spin_unlock(&fs_info->delalloc_root_lock);
9865
9866 ret = __start_delalloc_inodes(root, delay_iput, nr);
9867 btrfs_put_fs_root(root);
9868 if (ret < 0)
9869 goto out;
9870
9871 if (nr != -1) {
9872 nr -= ret;
9873 WARN_ON(nr < 0);
9874 }
9875 spin_lock(&fs_info->delalloc_root_lock);
9876 }
9877 spin_unlock(&fs_info->delalloc_root_lock);
9878
9879 ret = 0;
9880 atomic_inc(&fs_info->async_submit_draining);
9881 while (atomic_read(&fs_info->nr_async_submits) ||
9882 atomic_read(&fs_info->async_delalloc_pages)) {
9883 wait_event(fs_info->async_submit_wait,
9884 (atomic_read(&fs_info->nr_async_submits) == 0 &&
9885 atomic_read(&fs_info->async_delalloc_pages) == 0));
9886 }
9887 atomic_dec(&fs_info->async_submit_draining);
9888out:
9889 if (!list_empty_careful(&splice)) {
9890 spin_lock(&fs_info->delalloc_root_lock);
9891 list_splice_tail(&splice, &fs_info->delalloc_roots);
9892 spin_unlock(&fs_info->delalloc_root_lock);
9893 }
9894 mutex_unlock(&fs_info->delalloc_root_mutex);
9895 return ret;
9896}
9897
9898static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9899 const char *symname)
9900{
9901 struct btrfs_trans_handle *trans;
9902 struct btrfs_root *root = BTRFS_I(dir)->root;
9903 struct btrfs_path *path;
9904 struct btrfs_key key;
9905 struct inode *inode = NULL;
9906 int err;
9907 int drop_inode = 0;
9908 u64 objectid;
9909 u64 index = 0;
9910 int name_len;
9911 int datasize;
9912 unsigned long ptr;
9913 struct btrfs_file_extent_item *ei;
9914 struct extent_buffer *leaf;
9915
9916 name_len = strlen(symname);
9917 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
9918 return -ENAMETOOLONG;
9919
9920
9921
9922
9923
9924
9925
9926
9927 trans = btrfs_start_transaction(root, 7);
9928 if (IS_ERR(trans))
9929 return PTR_ERR(trans);
9930
9931 err = btrfs_find_free_ino(root, &objectid);
9932 if (err)
9933 goto out_unlock;
9934
9935 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
9936 dentry->d_name.len, btrfs_ino(dir), objectid,
9937 S_IFLNK|S_IRWXUGO, &index);
9938 if (IS_ERR(inode)) {
9939 err = PTR_ERR(inode);
9940 goto out_unlock;
9941 }
9942
9943
9944
9945
9946
9947
9948
9949 inode->i_fop = &btrfs_file_operations.kabi_fops;
9950 inode->i_op = &btrfs_file_inode_operations;
9951 inode->i_mapping->a_ops = &btrfs_aops;
9952 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9953 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9954
9955 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
9956 if (err)
9957 goto out_unlock_inode;
9958
9959 path = btrfs_alloc_path();
9960 if (!path) {
9961 err = -ENOMEM;
9962 goto out_unlock_inode;
9963 }
9964 key.objectid = btrfs_ino(inode);
9965 key.offset = 0;
9966 key.type = BTRFS_EXTENT_DATA_KEY;
9967 datasize = btrfs_file_extent_calc_inline_size(name_len);
9968 err = btrfs_insert_empty_item(trans, root, path, &key,
9969 datasize);
9970 if (err) {
9971 btrfs_free_path(path);
9972 goto out_unlock_inode;
9973 }
9974 leaf = path->nodes[0];
9975 ei = btrfs_item_ptr(leaf, path->slots[0],
9976 struct btrfs_file_extent_item);
9977 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
9978 btrfs_set_file_extent_type(leaf, ei,
9979 BTRFS_FILE_EXTENT_INLINE);
9980 btrfs_set_file_extent_encryption(leaf, ei, 0);
9981 btrfs_set_file_extent_compression(leaf, ei, 0);
9982 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
9983 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
9984
9985 ptr = btrfs_file_extent_inline_start(ei);
9986 write_extent_buffer(leaf, symname, ptr, name_len);
9987 btrfs_mark_buffer_dirty(leaf);
9988 btrfs_free_path(path);
9989
9990 inode->i_op = &btrfs_symlink_inode_operations;
9991 inode->i_mapping->a_ops = &btrfs_symlink_aops;
9992 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9993 inode_set_bytes(inode, name_len);
9994 btrfs_i_size_write(inode, name_len);
9995 err = btrfs_update_inode(trans, root, inode);
9996
9997
9998
9999
10000
10001 if (!err)
10002 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
10003 if (err) {
10004 drop_inode = 1;
10005 goto out_unlock_inode;
10006 }
10007
10008 unlock_new_inode(inode);
10009 d_instantiate(dentry, inode);
10010
10011out_unlock:
10012 btrfs_end_transaction(trans, root);
10013 if (drop_inode) {
10014 inode_dec_link_count(inode);
10015 iput(inode);
10016 }
10017 btrfs_btree_balance_dirty(root);
10018 return err;
10019
10020out_unlock_inode:
10021 drop_inode = 1;
10022 unlock_new_inode(inode);
10023 goto out_unlock;
10024}
10025
10026static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
10027 u64 start, u64 num_bytes, u64 min_size,
10028 loff_t actual_len, u64 *alloc_hint,
10029 struct btrfs_trans_handle *trans)
10030{
10031 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
10032 struct extent_map *em;
10033 struct btrfs_root *root = BTRFS_I(inode)->root;
10034 struct btrfs_key ins;
10035 u64 cur_offset = start;
10036 u64 i_size;
10037 u64 cur_bytes;
10038 u64 last_alloc = (u64)-1;
10039 int ret = 0;
10040 bool own_trans = true;
10041 u64 end = start + num_bytes - 1;
10042
10043 if (trans)
10044 own_trans = false;
10045 while (num_bytes > 0) {
10046 if (own_trans) {
10047 trans = btrfs_start_transaction(root, 3);
10048 if (IS_ERR(trans)) {
10049 ret = PTR_ERR(trans);
10050 break;
10051 }
10052 }
10053
10054 cur_bytes = min_t(u64, num_bytes, SZ_256M);
10055 cur_bytes = max(cur_bytes, min_size);
10056
10057
10058
10059
10060
10061
10062 cur_bytes = min(cur_bytes, last_alloc);
10063 ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
10064 min_size, 0, *alloc_hint, &ins, 1, 0);
10065 if (ret) {
10066 if (own_trans)
10067 btrfs_end_transaction(trans, root);
10068 break;
10069 }
10070 btrfs_dec_block_group_reservations(root->fs_info, ins.objectid);
10071
10072 last_alloc = ins.offset;
10073 ret = insert_reserved_file_extent(trans, inode,
10074 cur_offset, ins.objectid,
10075 ins.offset, ins.offset,
10076 ins.offset, 0, 0, 0,
10077 BTRFS_FILE_EXTENT_PREALLOC);
10078 if (ret) {
10079 btrfs_free_reserved_extent(root, ins.objectid,
10080 ins.offset, 0);
10081 btrfs_abort_transaction(trans, root, ret);
10082 if (own_trans)
10083 btrfs_end_transaction(trans, root);
10084 break;
10085 }
10086
10087 btrfs_drop_extent_cache(inode, cur_offset,
10088 cur_offset + ins.offset -1, 0);
10089
10090 em = alloc_extent_map();
10091 if (!em) {
10092 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
10093 &BTRFS_I(inode)->runtime_flags);
10094 goto next;
10095 }
10096
10097 em->start = cur_offset;
10098 em->orig_start = cur_offset;
10099 em->len = ins.offset;
10100 em->block_start = ins.objectid;
10101 em->block_len = ins.offset;
10102 em->orig_block_len = ins.offset;
10103 em->ram_bytes = ins.offset;
10104 em->bdev = root->fs_info->fs_devices->latest_bdev;
10105 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
10106 em->generation = trans->transid;
10107
10108 while (1) {
10109 write_lock(&em_tree->lock);
10110 ret = add_extent_mapping(em_tree, em, 1);
10111 write_unlock(&em_tree->lock);
10112 if (ret != -EEXIST)
10113 break;
10114 btrfs_drop_extent_cache(inode, cur_offset,
10115 cur_offset + ins.offset - 1,
10116 0);
10117 }
10118 free_extent_map(em);
10119next:
10120 num_bytes -= ins.offset;
10121 cur_offset += ins.offset;
10122 *alloc_hint = ins.objectid + ins.offset;
10123
10124 inode_inc_iversion(inode);
10125 inode->i_ctime = current_fs_time(inode->i_sb);
10126 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10127 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10128 (actual_len > inode->i_size) &&
10129 (cur_offset > inode->i_size)) {
10130 if (cur_offset > actual_len)
10131 i_size = actual_len;
10132 else
10133 i_size = cur_offset;
10134 i_size_write(inode, i_size);
10135 btrfs_ordered_update_i_size(inode, i_size, NULL);
10136 }
10137
10138 ret = btrfs_update_inode(trans, root, inode);
10139
10140 if (ret) {
10141 btrfs_abort_transaction(trans, root, ret);
10142 if (own_trans)
10143 btrfs_end_transaction(trans, root);
10144 break;
10145 }
10146
10147 if (own_trans)
10148 btrfs_end_transaction(trans, root);
10149 }
10150 if (cur_offset < end)
10151 btrfs_free_reserved_data_space(inode, cur_offset,
10152 end - cur_offset + 1);
10153 return ret;
10154}
10155
10156int btrfs_prealloc_file_range(struct inode *inode, int mode,
10157 u64 start, u64 num_bytes, u64 min_size,
10158 loff_t actual_len, u64 *alloc_hint)
10159{
10160 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10161 min_size, actual_len, alloc_hint,
10162 NULL);
10163}
10164
10165int btrfs_prealloc_file_range_trans(struct inode *inode,
10166 struct btrfs_trans_handle *trans, int mode,
10167 u64 start, u64 num_bytes, u64 min_size,
10168 loff_t actual_len, u64 *alloc_hint)
10169{
10170 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10171 min_size, actual_len, alloc_hint, trans);
10172}
10173
10174static int btrfs_set_page_dirty(struct page *page)
10175{
10176 return __set_page_dirty_nobuffers(page);
10177}
10178
10179static int btrfs_permission(struct inode *inode, int mask)
10180{
10181 struct btrfs_root *root = BTRFS_I(inode)->root;
10182 umode_t mode = inode->i_mode;
10183
10184 if (mask & MAY_WRITE &&
10185 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10186 if (btrfs_root_readonly(root))
10187 return -EROFS;
10188 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10189 return -EACCES;
10190 }
10191 return generic_permission(inode, mask);
10192}
10193
10194static const struct inode_operations_wrapper btrfs_dir_inode_operations = {
10195 .ops = {
10196 .getattr = btrfs_getattr,
10197 .lookup = btrfs_lookup,
10198 .create = btrfs_create,
10199 .unlink = btrfs_unlink,
10200 .link = btrfs_link,
10201 .mkdir = btrfs_mkdir,
10202 .rmdir = btrfs_rmdir,
10203 .rename = btrfs_rename,
10204 .symlink = btrfs_symlink,
10205 .setattr = btrfs_setattr,
10206 .mknod = btrfs_mknod,
10207 .setxattr = btrfs_setxattr,
10208 .getxattr = btrfs_getxattr,
10209 .listxattr = btrfs_listxattr,
10210 .removexattr = btrfs_removexattr,
10211 .permission = btrfs_permission,
10212 .get_acl = btrfs_get_acl,
10213 .update_time = btrfs_update_time,
10214 },
10215 .rename2 = btrfs_rename2,
10216};
10217static const struct inode_operations btrfs_dir_ro_inode_operations = {
10218 .lookup = btrfs_lookup,
10219 .permission = btrfs_permission,
10220 .get_acl = btrfs_get_acl,
10221 .update_time = btrfs_update_time,
10222};
10223
10224static const struct file_operations btrfs_dir_file_operations = {
10225 .llseek = generic_file_llseek,
10226 .read = generic_read_dir,
10227 .readdir = btrfs_real_readdir,
10228 .unlocked_ioctl = btrfs_ioctl,
10229#ifdef CONFIG_COMPAT
10230 .compat_ioctl = btrfs_compat_ioctl,
10231#endif
10232 .release = btrfs_release_file,
10233 .fsync = btrfs_sync_file,
10234};
10235
10236static const struct extent_io_ops btrfs_extent_io_ops = {
10237 .fill_delalloc = run_delalloc_range,
10238 .submit_bio_hook = btrfs_submit_bio_hook,
10239 .merge_bio_hook = btrfs_merge_bio_hook,
10240 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10241 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
10242 .writepage_start_hook = btrfs_writepage_start_hook,
10243 .set_bit_hook = btrfs_set_bit_hook,
10244 .clear_bit_hook = btrfs_clear_bit_hook,
10245 .merge_extent_hook = btrfs_merge_extent_hook,
10246 .split_extent_hook = btrfs_split_extent_hook,
10247};
10248
10249
10250
10251
10252
10253
10254
10255
10256
10257
10258
10259
10260
10261static const struct address_space_operations btrfs_aops = {
10262 .readpage = btrfs_readpage,
10263 .writepage = btrfs_writepage,
10264 .writepages = btrfs_writepages,
10265 .readpages = btrfs_readpages,
10266 .direct_IO = btrfs_direct_IO,
10267 .invalidatepage = btrfs_invalidatepage,
10268 .releasepage = btrfs_releasepage,
10269 .set_page_dirty = btrfs_set_page_dirty,
10270 .error_remove_page = generic_error_remove_page,
10271};
10272
10273static const struct address_space_operations btrfs_symlink_aops = {
10274 .readpage = btrfs_readpage,
10275 .writepage = btrfs_writepage,
10276 .invalidatepage = btrfs_invalidatepage,
10277 .releasepage = btrfs_releasepage,
10278};
10279
10280static const struct inode_operations btrfs_file_inode_operations = {
10281 .getattr = btrfs_getattr,
10282 .setattr = btrfs_setattr,
10283 .setxattr = btrfs_setxattr,
10284 .getxattr = btrfs_getxattr,
10285 .listxattr = btrfs_listxattr,
10286 .removexattr = btrfs_removexattr,
10287 .permission = btrfs_permission,
10288 .fiemap = btrfs_fiemap,
10289 .get_acl = btrfs_get_acl,
10290 .update_time = btrfs_update_time,
10291};
10292static const struct inode_operations btrfs_special_inode_operations = {
10293 .getattr = btrfs_getattr,
10294 .setattr = btrfs_setattr,
10295 .permission = btrfs_permission,
10296 .setxattr = btrfs_setxattr,
10297 .getxattr = btrfs_getxattr,
10298 .listxattr = btrfs_listxattr,
10299 .removexattr = btrfs_removexattr,
10300 .get_acl = btrfs_get_acl,
10301 .update_time = btrfs_update_time,
10302};
10303static const struct inode_operations btrfs_symlink_inode_operations = {
10304 .readlink = generic_readlink,
10305 .follow_link = page_follow_link_light,
10306 .put_link = page_put_link,
10307 .getattr = btrfs_getattr,
10308 .setattr = btrfs_setattr,
10309 .permission = btrfs_permission,
10310 .setxattr = btrfs_setxattr,
10311 .getxattr = btrfs_getxattr,
10312 .listxattr = btrfs_listxattr,
10313 .removexattr = btrfs_removexattr,
10314 .get_acl = btrfs_get_acl,
10315 .update_time = btrfs_update_time,
10316};
10317
10318const struct dentry_operations btrfs_dentry_operations = {
10319 .d_delete = btrfs_dentry_delete,
10320 .d_release = btrfs_dentry_release,
10321};
10322