1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
24#include <linux/pagemap.h>
25#include <linux/highmem.h>
26#include <linux/time.h>
27#include <linux/init.h>
28#include <linux/string.h>
29#include <linux/backing-dev.h>
30#include <linux/mpage.h>
31#include <linux/swap.h>
32#include <linux/writeback.h>
33#include <linux/statfs.h>
34#include <linux/compat.h>
35#include <linux/bit_spinlock.h>
36#include <linux/xattr.h>
37#include <linux/posix_acl.h>
38#include <linux/falloc.h>
39#include <linux/slab.h>
40#include <linux/ratelimit.h>
41#include <linux/mount.h>
42#include <linux/btrfs.h>
43#include <linux/blkdev.h>
44#include <linux/posix_acl_xattr.h>
45#include <linux/uio.h>
46#include "ctree.h"
47#include "disk-io.h"
48#include "transaction.h"
49#include "btrfs_inode.h"
50#include "print-tree.h"
51#include "ordered-data.h"
52#include "xattr.h"
53#include "tree-log.h"
54#include "volumes.h"
55#include "compression.h"
56#include "locking.h"
57#include "free-space-cache.h"
58#include "inode-map.h"
59#include "backref.h"
60#include "hash.h"
61#include "props.h"
62#include "qgroup.h"
63
64struct btrfs_iget_args {
65 struct btrfs_key *location;
66 struct btrfs_root *root;
67};
68
69struct btrfs_dio_data {
70 u64 outstanding_extents;
71 u64 reserve;
72 u64 unsubmitted_oe_range_start;
73 u64 unsubmitted_oe_range_end;
74};
75
76static const struct inode_operations btrfs_dir_inode_operations;
77static const struct inode_operations btrfs_symlink_inode_operations;
78static const struct inode_operations btrfs_dir_ro_inode_operations;
79static const struct inode_operations btrfs_special_inode_operations;
80static const struct inode_operations btrfs_file_inode_operations;
81static const struct address_space_operations btrfs_aops;
82static const struct address_space_operations btrfs_symlink_aops;
83static const struct file_operations btrfs_dir_file_operations;
84static const struct extent_io_ops btrfs_extent_io_ops;
85
86static struct kmem_cache *btrfs_inode_cachep;
87struct kmem_cache *btrfs_trans_handle_cachep;
88struct kmem_cache *btrfs_transaction_cachep;
89struct kmem_cache *btrfs_path_cachep;
90struct kmem_cache *btrfs_free_space_cachep;
91
92#define S_SHIFT 12
93static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
94 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
95 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
96 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
97 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
98 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
99 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
100 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
101};
102
103static int btrfs_setsize(struct inode *inode, struct iattr *attr);
104static int btrfs_truncate(struct inode *inode);
105static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
106static noinline int cow_file_range(struct inode *inode,
107 struct page *locked_page,
108 u64 start, u64 end, int *page_started,
109 unsigned long *nr_written, int unlock);
110static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
111 u64 len, u64 orig_start,
112 u64 block_start, u64 block_len,
113 u64 orig_block_len, u64 ram_bytes,
114 int type);
115
116static int btrfs_dirty_inode(struct inode *inode);
117
118#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
119void btrfs_test_inode_set_ops(struct inode *inode)
120{
121 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
122}
123#endif
124
125static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
126 struct inode *inode, struct inode *dir,
127 const struct qstr *qstr)
128{
129 int err;
130
131 err = btrfs_init_acl(trans, inode, dir);
132 if (!err)
133 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
134 return err;
135}
136
137
138
139
140
141
142static int insert_inline_extent(struct btrfs_trans_handle *trans,
143 struct btrfs_path *path, int extent_inserted,
144 struct btrfs_root *root, struct inode *inode,
145 u64 start, size_t size, size_t compressed_size,
146 int compress_type,
147 struct page **compressed_pages)
148{
149 struct extent_buffer *leaf;
150 struct page *page = NULL;
151 char *kaddr;
152 unsigned long ptr;
153 struct btrfs_file_extent_item *ei;
154 int err = 0;
155 int ret;
156 size_t cur_size = size;
157 unsigned long offset;
158
159 if (compressed_size && compressed_pages)
160 cur_size = compressed_size;
161
162 inode_add_bytes(inode, size);
163
164 if (!extent_inserted) {
165 struct btrfs_key key;
166 size_t datasize;
167
168 key.objectid = btrfs_ino(inode);
169 key.offset = start;
170 key.type = BTRFS_EXTENT_DATA_KEY;
171
172 datasize = btrfs_file_extent_calc_inline_size(cur_size);
173 path->leave_spinning = 1;
174 ret = btrfs_insert_empty_item(trans, root, path, &key,
175 datasize);
176 if (ret) {
177 err = ret;
178 goto fail;
179 }
180 }
181 leaf = path->nodes[0];
182 ei = btrfs_item_ptr(leaf, path->slots[0],
183 struct btrfs_file_extent_item);
184 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
185 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
186 btrfs_set_file_extent_encryption(leaf, ei, 0);
187 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
188 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
189 ptr = btrfs_file_extent_inline_start(ei);
190
191 if (compress_type != BTRFS_COMPRESS_NONE) {
192 struct page *cpage;
193 int i = 0;
194 while (compressed_size > 0) {
195 cpage = compressed_pages[i];
196 cur_size = min_t(unsigned long, compressed_size,
197 PAGE_SIZE);
198
199 kaddr = kmap_atomic(cpage);
200 write_extent_buffer(leaf, kaddr, ptr, cur_size);
201 kunmap_atomic(kaddr);
202
203 i++;
204 ptr += cur_size;
205 compressed_size -= cur_size;
206 }
207 btrfs_set_file_extent_compression(leaf, ei,
208 compress_type);
209 } else {
210 page = find_get_page(inode->i_mapping,
211 start >> PAGE_SHIFT);
212 btrfs_set_file_extent_compression(leaf, ei, 0);
213 kaddr = kmap_atomic(page);
214 offset = start & (PAGE_SIZE - 1);
215 write_extent_buffer(leaf, kaddr + offset, ptr, size);
216 kunmap_atomic(kaddr);
217 put_page(page);
218 }
219 btrfs_mark_buffer_dirty(leaf);
220 btrfs_release_path(path);
221
222
223
224
225
226
227
228
229
230
231 BTRFS_I(inode)->disk_i_size = inode->i_size;
232 ret = btrfs_update_inode(trans, root, inode);
233
234 return ret;
235fail:
236 return err;
237}
238
239
240
241
242
243
244
245static noinline int cow_file_range_inline(struct btrfs_root *root,
246 struct inode *inode, u64 start,
247 u64 end, size_t compressed_size,
248 int compress_type,
249 struct page **compressed_pages)
250{
251 struct btrfs_trans_handle *trans;
252 u64 isize = i_size_read(inode);
253 u64 actual_end = min(end + 1, isize);
254 u64 inline_len = actual_end - start;
255 u64 aligned_end = ALIGN(end, root->sectorsize);
256 u64 data_len = inline_len;
257 int ret;
258 struct btrfs_path *path;
259 int extent_inserted = 0;
260 u32 extent_item_size;
261
262 if (compressed_size)
263 data_len = compressed_size;
264
265 if (start > 0 ||
266 actual_end > root->sectorsize ||
267 data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
268 (!compressed_size &&
269 (actual_end & (root->sectorsize - 1)) == 0) ||
270 end + 1 < isize ||
271 data_len > root->fs_info->max_inline) {
272 return 1;
273 }
274
275 path = btrfs_alloc_path();
276 if (!path)
277 return -ENOMEM;
278
279 trans = btrfs_join_transaction(root);
280 if (IS_ERR(trans)) {
281 btrfs_free_path(path);
282 return PTR_ERR(trans);
283 }
284 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
285
286 if (compressed_size && compressed_pages)
287 extent_item_size = btrfs_file_extent_calc_inline_size(
288 compressed_size);
289 else
290 extent_item_size = btrfs_file_extent_calc_inline_size(
291 inline_len);
292
293 ret = __btrfs_drop_extents(trans, root, inode, path,
294 start, aligned_end, NULL,
295 1, 1, extent_item_size, &extent_inserted);
296 if (ret) {
297 btrfs_abort_transaction(trans, root, ret);
298 goto out;
299 }
300
301 if (isize > actual_end)
302 inline_len = min_t(u64, isize, actual_end);
303 ret = insert_inline_extent(trans, path, extent_inserted,
304 root, inode, start,
305 inline_len, compressed_size,
306 compress_type, compressed_pages);
307 if (ret && ret != -ENOSPC) {
308 btrfs_abort_transaction(trans, root, ret);
309 goto out;
310 } else if (ret == -ENOSPC) {
311 ret = 1;
312 goto out;
313 }
314
315 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
316 btrfs_delalloc_release_metadata(inode, end + 1 - start);
317 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
318out:
319
320
321
322
323
324
325 btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
326 btrfs_free_path(path);
327 btrfs_end_transaction(trans, root);
328 return ret;
329}
330
331struct async_extent {
332 u64 start;
333 u64 ram_size;
334 u64 compressed_size;
335 struct page **pages;
336 unsigned long nr_pages;
337 int compress_type;
338 struct list_head list;
339};
340
341struct async_cow {
342 struct inode *inode;
343 struct btrfs_root *root;
344 struct page *locked_page;
345 u64 start;
346 u64 end;
347 struct list_head extents;
348 struct btrfs_work work;
349};
350
351static noinline int add_async_extent(struct async_cow *cow,
352 u64 start, u64 ram_size,
353 u64 compressed_size,
354 struct page **pages,
355 unsigned long nr_pages,
356 int compress_type)
357{
358 struct async_extent *async_extent;
359
360 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
361 BUG_ON(!async_extent);
362 async_extent->start = start;
363 async_extent->ram_size = ram_size;
364 async_extent->compressed_size = compressed_size;
365 async_extent->pages = pages;
366 async_extent->nr_pages = nr_pages;
367 async_extent->compress_type = compress_type;
368 list_add_tail(&async_extent->list, &cow->extents);
369 return 0;
370}
371
372static inline int inode_need_compress(struct inode *inode)
373{
374 struct btrfs_root *root = BTRFS_I(inode)->root;
375
376
377 if (btrfs_test_opt(root, FORCE_COMPRESS))
378 return 1;
379
380 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
381 return 0;
382 if (btrfs_test_opt(root, COMPRESS) ||
383 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
384 BTRFS_I(inode)->force_compress)
385 return 1;
386 return 0;
387}
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406static noinline void compress_file_range(struct inode *inode,
407 struct page *locked_page,
408 u64 start, u64 end,
409 struct async_cow *async_cow,
410 int *num_added)
411{
412 struct btrfs_root *root = BTRFS_I(inode)->root;
413 u64 num_bytes;
414 u64 blocksize = root->sectorsize;
415 u64 actual_end;
416 u64 isize = i_size_read(inode);
417 int ret = 0;
418 struct page **pages = NULL;
419 unsigned long nr_pages;
420 unsigned long nr_pages_ret = 0;
421 unsigned long total_compressed = 0;
422 unsigned long total_in = 0;
423 unsigned long max_compressed = SZ_128K;
424 unsigned long max_uncompressed = SZ_128K;
425 int i;
426 int will_compress;
427 int compress_type = root->fs_info->compress_type;
428 int redirty = 0;
429
430
431 if ((end - start + 1) < SZ_16K &&
432 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
433 btrfs_add_inode_defrag(NULL, inode);
434
435 actual_end = min_t(u64, isize, end + 1);
436again:
437 will_compress = 0;
438 nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
439 nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
440
441
442
443
444
445
446
447
448
449
450
451 if (actual_end <= start)
452 goto cleanup_and_bail_uncompressed;
453
454 total_compressed = actual_end - start;
455
456
457
458
459
460 if (total_compressed <= blocksize &&
461 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
462 goto cleanup_and_bail_uncompressed;
463
464
465
466
467
468
469
470
471
472
473
474 total_compressed = min(total_compressed, max_uncompressed);
475 num_bytes = ALIGN(end - start + 1, blocksize);
476 num_bytes = max(blocksize, num_bytes);
477 total_in = 0;
478 ret = 0;
479
480
481
482
483
484
485 if (inode_need_compress(inode)) {
486 WARN_ON(pages);
487 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
488 if (!pages) {
489
490 goto cont;
491 }
492
493 if (BTRFS_I(inode)->force_compress)
494 compress_type = BTRFS_I(inode)->force_compress;
495
496
497
498
499
500
501
502
503
504
505 extent_range_clear_dirty_for_io(inode, start, end);
506 redirty = 1;
507 ret = btrfs_compress_pages(compress_type,
508 inode->i_mapping, start,
509 total_compressed, pages,
510 nr_pages, &nr_pages_ret,
511 &total_in,
512 &total_compressed,
513 max_compressed);
514
515 if (!ret) {
516 unsigned long offset = total_compressed &
517 (PAGE_SIZE - 1);
518 struct page *page = pages[nr_pages_ret - 1];
519 char *kaddr;
520
521
522
523
524 if (offset) {
525 kaddr = kmap_atomic(page);
526 memset(kaddr + offset, 0,
527 PAGE_SIZE - offset);
528 kunmap_atomic(kaddr);
529 }
530 will_compress = 1;
531 }
532 }
533cont:
534 if (start == 0) {
535
536 if (ret || total_in < (actual_end - start)) {
537
538
539
540 ret = cow_file_range_inline(root, inode, start, end,
541 0, 0, NULL);
542 } else {
543
544 ret = cow_file_range_inline(root, inode, start, end,
545 total_compressed,
546 compress_type, pages);
547 }
548 if (ret <= 0) {
549 unsigned long clear_flags = EXTENT_DELALLOC |
550 EXTENT_DEFRAG;
551 unsigned long page_error_op;
552
553 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
554 page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
555
556
557
558
559
560
561 extent_clear_unlock_delalloc(inode, start, end, NULL,
562 clear_flags, PAGE_UNLOCK |
563 PAGE_CLEAR_DIRTY |
564 PAGE_SET_WRITEBACK |
565 page_error_op |
566 PAGE_END_WRITEBACK);
567 goto free_pages_out;
568 }
569 }
570
571 if (will_compress) {
572
573
574
575
576
577 total_compressed = ALIGN(total_compressed, blocksize);
578
579
580
581
582
583 total_in = ALIGN(total_in, PAGE_SIZE);
584 if (total_compressed >= total_in) {
585 will_compress = 0;
586 } else {
587 num_bytes = total_in;
588 }
589 }
590 if (!will_compress && pages) {
591
592
593
594
595 for (i = 0; i < nr_pages_ret; i++) {
596 WARN_ON(pages[i]->mapping);
597 put_page(pages[i]);
598 }
599 kfree(pages);
600 pages = NULL;
601 total_compressed = 0;
602 nr_pages_ret = 0;
603
604
605 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
606 !(BTRFS_I(inode)->force_compress)) {
607 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
608 }
609 }
610 if (will_compress) {
611 *num_added += 1;
612
613
614
615
616
617 add_async_extent(async_cow, start, num_bytes,
618 total_compressed, pages, nr_pages_ret,
619 compress_type);
620
621 if (start + num_bytes < end) {
622 start += num_bytes;
623 pages = NULL;
624 cond_resched();
625 goto again;
626 }
627 } else {
628cleanup_and_bail_uncompressed:
629
630
631
632
633
634
635
636 if (page_offset(locked_page) >= start &&
637 page_offset(locked_page) <= end) {
638 __set_page_dirty_nobuffers(locked_page);
639
640 }
641 if (redirty)
642 extent_range_redirty_for_io(inode, start, end);
643 add_async_extent(async_cow, start, end - start + 1,
644 0, NULL, 0, BTRFS_COMPRESS_NONE);
645 *num_added += 1;
646 }
647
648 return;
649
650free_pages_out:
651 for (i = 0; i < nr_pages_ret; i++) {
652 WARN_ON(pages[i]->mapping);
653 put_page(pages[i]);
654 }
655 kfree(pages);
656}
657
658static void free_async_extent_pages(struct async_extent *async_extent)
659{
660 int i;
661
662 if (!async_extent->pages)
663 return;
664
665 for (i = 0; i < async_extent->nr_pages; i++) {
666 WARN_ON(async_extent->pages[i]->mapping);
667 put_page(async_extent->pages[i]);
668 }
669 kfree(async_extent->pages);
670 async_extent->nr_pages = 0;
671 async_extent->pages = NULL;
672}
673
674
675
676
677
678
679
680static noinline void submit_compressed_extents(struct inode *inode,
681 struct async_cow *async_cow)
682{
683 struct async_extent *async_extent;
684 u64 alloc_hint = 0;
685 struct btrfs_key ins;
686 struct extent_map *em;
687 struct btrfs_root *root = BTRFS_I(inode)->root;
688 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
689 struct extent_io_tree *io_tree;
690 int ret = 0;
691
692again:
693 while (!list_empty(&async_cow->extents)) {
694 async_extent = list_entry(async_cow->extents.next,
695 struct async_extent, list);
696 list_del(&async_extent->list);
697
698 io_tree = &BTRFS_I(inode)->io_tree;
699
700retry:
701
702 if (!async_extent->pages) {
703 int page_started = 0;
704 unsigned long nr_written = 0;
705
706 lock_extent(io_tree, async_extent->start,
707 async_extent->start +
708 async_extent->ram_size - 1);
709
710
711 ret = cow_file_range(inode, async_cow->locked_page,
712 async_extent->start,
713 async_extent->start +
714 async_extent->ram_size - 1,
715 &page_started, &nr_written, 0);
716
717
718
719
720
721
722
723
724
725 if (!page_started && !ret)
726 extent_write_locked_range(io_tree,
727 inode, async_extent->start,
728 async_extent->start +
729 async_extent->ram_size - 1,
730 btrfs_get_extent,
731 WB_SYNC_ALL);
732 else if (ret)
733 unlock_page(async_cow->locked_page);
734 kfree(async_extent);
735 cond_resched();
736 continue;
737 }
738
739 lock_extent(io_tree, async_extent->start,
740 async_extent->start + async_extent->ram_size - 1);
741
742 ret = btrfs_reserve_extent(root,
743 async_extent->compressed_size,
744 async_extent->compressed_size,
745 0, alloc_hint, &ins, 1, 1);
746 if (ret) {
747 free_async_extent_pages(async_extent);
748
749 if (ret == -ENOSPC) {
750 unlock_extent(io_tree, async_extent->start,
751 async_extent->start +
752 async_extent->ram_size - 1);
753
754
755
756
757
758
759
760 extent_range_redirty_for_io(inode,
761 async_extent->start,
762 async_extent->start +
763 async_extent->ram_size - 1);
764
765 goto retry;
766 }
767 goto out_free;
768 }
769
770
771
772
773 btrfs_drop_extent_cache(inode, async_extent->start,
774 async_extent->start +
775 async_extent->ram_size - 1, 0);
776
777 em = alloc_extent_map();
778 if (!em) {
779 ret = -ENOMEM;
780 goto out_free_reserve;
781 }
782 em->start = async_extent->start;
783 em->len = async_extent->ram_size;
784 em->orig_start = em->start;
785 em->mod_start = em->start;
786 em->mod_len = em->len;
787
788 em->block_start = ins.objectid;
789 em->block_len = ins.offset;
790 em->orig_block_len = ins.offset;
791 em->ram_bytes = async_extent->ram_size;
792 em->bdev = root->fs_info->fs_devices->latest_bdev;
793 em->compress_type = async_extent->compress_type;
794 set_bit(EXTENT_FLAG_PINNED, &em->flags);
795 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
796 em->generation = -1;
797
798 while (1) {
799 write_lock(&em_tree->lock);
800 ret = add_extent_mapping(em_tree, em, 1);
801 write_unlock(&em_tree->lock);
802 if (ret != -EEXIST) {
803 free_extent_map(em);
804 break;
805 }
806 btrfs_drop_extent_cache(inode, async_extent->start,
807 async_extent->start +
808 async_extent->ram_size - 1, 0);
809 }
810
811 if (ret)
812 goto out_free_reserve;
813
814 ret = btrfs_add_ordered_extent_compress(inode,
815 async_extent->start,
816 ins.objectid,
817 async_extent->ram_size,
818 ins.offset,
819 BTRFS_ORDERED_COMPRESSED,
820 async_extent->compress_type);
821 if (ret) {
822 btrfs_drop_extent_cache(inode, async_extent->start,
823 async_extent->start +
824 async_extent->ram_size - 1, 0);
825 goto out_free_reserve;
826 }
827
828
829
830
831 extent_clear_unlock_delalloc(inode, async_extent->start,
832 async_extent->start +
833 async_extent->ram_size - 1,
834 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
835 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
836 PAGE_SET_WRITEBACK);
837 ret = btrfs_submit_compressed_write(inode,
838 async_extent->start,
839 async_extent->ram_size,
840 ins.objectid,
841 ins.offset, async_extent->pages,
842 async_extent->nr_pages);
843 if (ret) {
844 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
845 struct page *p = async_extent->pages[0];
846 const u64 start = async_extent->start;
847 const u64 end = start + async_extent->ram_size - 1;
848
849 p->mapping = inode->i_mapping;
850 tree->ops->writepage_end_io_hook(p, start, end,
851 NULL, 0);
852 p->mapping = NULL;
853 extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
854 PAGE_END_WRITEBACK |
855 PAGE_SET_ERROR);
856 free_async_extent_pages(async_extent);
857 }
858 alloc_hint = ins.objectid + ins.offset;
859 kfree(async_extent);
860 cond_resched();
861 }
862 return;
863out_free_reserve:
864 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
865out_free:
866 extent_clear_unlock_delalloc(inode, async_extent->start,
867 async_extent->start +
868 async_extent->ram_size - 1,
869 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
870 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
871 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
872 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK |
873 PAGE_SET_ERROR);
874 free_async_extent_pages(async_extent);
875 kfree(async_extent);
876 goto again;
877}
878
879static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
880 u64 num_bytes)
881{
882 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
883 struct extent_map *em;
884 u64 alloc_hint = 0;
885
886 read_lock(&em_tree->lock);
887 em = search_extent_mapping(em_tree, start, num_bytes);
888 if (em) {
889
890
891
892
893
894 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
895 free_extent_map(em);
896 em = search_extent_mapping(em_tree, 0, 0);
897 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
898 alloc_hint = em->block_start;
899 if (em)
900 free_extent_map(em);
901 } else {
902 alloc_hint = em->block_start;
903 free_extent_map(em);
904 }
905 }
906 read_unlock(&em_tree->lock);
907
908 return alloc_hint;
909}
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924static noinline int cow_file_range(struct inode *inode,
925 struct page *locked_page,
926 u64 start, u64 end, int *page_started,
927 unsigned long *nr_written,
928 int unlock)
929{
930 struct btrfs_root *root = BTRFS_I(inode)->root;
931 u64 alloc_hint = 0;
932 u64 num_bytes;
933 unsigned long ram_size;
934 u64 disk_num_bytes;
935 u64 cur_alloc_size;
936 u64 blocksize = root->sectorsize;
937 struct btrfs_key ins;
938 struct extent_map *em;
939 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
940 int ret = 0;
941
942 if (btrfs_is_free_space_inode(inode)) {
943 WARN_ON_ONCE(1);
944 ret = -EINVAL;
945 goto out_unlock;
946 }
947
948 num_bytes = ALIGN(end - start + 1, blocksize);
949 num_bytes = max(blocksize, num_bytes);
950 disk_num_bytes = num_bytes;
951
952
953 if (num_bytes < SZ_64K &&
954 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
955 btrfs_add_inode_defrag(NULL, inode);
956
957 if (start == 0) {
958
959 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
960 NULL);
961 if (ret == 0) {
962 extent_clear_unlock_delalloc(inode, start, end, NULL,
963 EXTENT_LOCKED | EXTENT_DELALLOC |
964 EXTENT_DEFRAG, PAGE_UNLOCK |
965 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
966 PAGE_END_WRITEBACK);
967
968 *nr_written = *nr_written +
969 (end - start + PAGE_SIZE) / PAGE_SIZE;
970 *page_started = 1;
971 goto out;
972 } else if (ret < 0) {
973 goto out_unlock;
974 }
975 }
976
977 BUG_ON(disk_num_bytes >
978 btrfs_super_total_bytes(root->fs_info->super_copy));
979
980 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
981 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
982
983 while (disk_num_bytes > 0) {
984 unsigned long op;
985
986 cur_alloc_size = disk_num_bytes;
987 ret = btrfs_reserve_extent(root, cur_alloc_size,
988 root->sectorsize, 0, alloc_hint,
989 &ins, 1, 1);
990 if (ret < 0)
991 goto out_unlock;
992
993 em = alloc_extent_map();
994 if (!em) {
995 ret = -ENOMEM;
996 goto out_reserve;
997 }
998 em->start = start;
999 em->orig_start = em->start;
1000 ram_size = ins.offset;
1001 em->len = ins.offset;
1002 em->mod_start = em->start;
1003 em->mod_len = em->len;
1004
1005 em->block_start = ins.objectid;
1006 em->block_len = ins.offset;
1007 em->orig_block_len = ins.offset;
1008 em->ram_bytes = ram_size;
1009 em->bdev = root->fs_info->fs_devices->latest_bdev;
1010 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1011 em->generation = -1;
1012
1013 while (1) {
1014 write_lock(&em_tree->lock);
1015 ret = add_extent_mapping(em_tree, em, 1);
1016 write_unlock(&em_tree->lock);
1017 if (ret != -EEXIST) {
1018 free_extent_map(em);
1019 break;
1020 }
1021 btrfs_drop_extent_cache(inode, start,
1022 start + ram_size - 1, 0);
1023 }
1024 if (ret)
1025 goto out_reserve;
1026
1027 cur_alloc_size = ins.offset;
1028 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
1029 ram_size, cur_alloc_size, 0);
1030 if (ret)
1031 goto out_drop_extent_cache;
1032
1033 if (root->root_key.objectid ==
1034 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1035 ret = btrfs_reloc_clone_csums(inode, start,
1036 cur_alloc_size);
1037 if (ret)
1038 goto out_drop_extent_cache;
1039 }
1040
1041 if (disk_num_bytes < cur_alloc_size)
1042 break;
1043
1044
1045
1046
1047
1048
1049
1050
1051 op = unlock ? PAGE_UNLOCK : 0;
1052 op |= PAGE_SET_PRIVATE2;
1053
1054 extent_clear_unlock_delalloc(inode, start,
1055 start + ram_size - 1, locked_page,
1056 EXTENT_LOCKED | EXTENT_DELALLOC,
1057 op);
1058 disk_num_bytes -= cur_alloc_size;
1059 num_bytes -= cur_alloc_size;
1060 alloc_hint = ins.objectid + ins.offset;
1061 start += cur_alloc_size;
1062 }
1063out:
1064 return ret;
1065
1066out_drop_extent_cache:
1067 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1068out_reserve:
1069 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
1070out_unlock:
1071 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1072 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1073 EXTENT_DELALLOC | EXTENT_DEFRAG,
1074 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1075 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1076 goto out;
1077}
1078
1079
1080
1081
1082static noinline void async_cow_start(struct btrfs_work *work)
1083{
1084 struct async_cow *async_cow;
1085 int num_added = 0;
1086 async_cow = container_of(work, struct async_cow, work);
1087
1088 compress_file_range(async_cow->inode, async_cow->locked_page,
1089 async_cow->start, async_cow->end, async_cow,
1090 &num_added);
1091 if (num_added == 0) {
1092 btrfs_add_delayed_iput(async_cow->inode);
1093 async_cow->inode = NULL;
1094 }
1095}
1096
1097
1098
1099
1100static noinline void async_cow_submit(struct btrfs_work *work)
1101{
1102 struct async_cow *async_cow;
1103 struct btrfs_root *root;
1104 unsigned long nr_pages;
1105
1106 async_cow = container_of(work, struct async_cow, work);
1107
1108 root = async_cow->root;
1109 nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
1110 PAGE_SHIFT;
1111
1112
1113
1114
1115 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1116 5 * SZ_1M &&
1117 waitqueue_active(&root->fs_info->async_submit_wait))
1118 wake_up(&root->fs_info->async_submit_wait);
1119
1120 if (async_cow->inode)
1121 submit_compressed_extents(async_cow->inode, async_cow);
1122}
1123
1124static noinline void async_cow_free(struct btrfs_work *work)
1125{
1126 struct async_cow *async_cow;
1127 async_cow = container_of(work, struct async_cow, work);
1128 if (async_cow->inode)
1129 btrfs_add_delayed_iput(async_cow->inode);
1130 kfree(async_cow);
1131}
1132
1133static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1134 u64 start, u64 end, int *page_started,
1135 unsigned long *nr_written)
1136{
1137 struct async_cow *async_cow;
1138 struct btrfs_root *root = BTRFS_I(inode)->root;
1139 unsigned long nr_pages;
1140 u64 cur_end;
1141 int limit = 10 * SZ_1M;
1142
1143 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1144 1, 0, NULL, GFP_NOFS);
1145 while (start < end) {
1146 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1147 BUG_ON(!async_cow);
1148 async_cow->inode = igrab(inode);
1149 async_cow->root = root;
1150 async_cow->locked_page = locked_page;
1151 async_cow->start = start;
1152
1153 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1154 !btrfs_test_opt(root, FORCE_COMPRESS))
1155 cur_end = end;
1156 else
1157 cur_end = min(end, start + SZ_512K - 1);
1158
1159 async_cow->end = cur_end;
1160 INIT_LIST_HEAD(&async_cow->extents);
1161
1162 btrfs_init_work(&async_cow->work,
1163 btrfs_delalloc_helper,
1164 async_cow_start, async_cow_submit,
1165 async_cow_free);
1166
1167 nr_pages = (cur_end - start + PAGE_SIZE) >>
1168 PAGE_SHIFT;
1169 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
1170
1171 btrfs_queue_work(root->fs_info->delalloc_workers,
1172 &async_cow->work);
1173
1174 if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
1175 wait_event(root->fs_info->async_submit_wait,
1176 (atomic_read(&root->fs_info->async_delalloc_pages) <
1177 limit));
1178 }
1179
1180 while (atomic_read(&root->fs_info->async_submit_draining) &&
1181 atomic_read(&root->fs_info->async_delalloc_pages)) {
1182 wait_event(root->fs_info->async_submit_wait,
1183 (atomic_read(&root->fs_info->async_delalloc_pages) ==
1184 0));
1185 }
1186
1187 *nr_written += nr_pages;
1188 start = cur_end + 1;
1189 }
1190 *page_started = 1;
1191 return 0;
1192}
1193
1194static noinline int csum_exist_in_range(struct btrfs_root *root,
1195 u64 bytenr, u64 num_bytes)
1196{
1197 int ret;
1198 struct btrfs_ordered_sum *sums;
1199 LIST_HEAD(list);
1200
1201 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1202 bytenr + num_bytes - 1, &list, 0);
1203 if (ret == 0 && list_empty(&list))
1204 return 0;
1205
1206 while (!list_empty(&list)) {
1207 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1208 list_del(&sums->list);
1209 kfree(sums);
1210 }
1211 return 1;
1212}
1213
1214
1215
1216
1217
1218
1219
1220
1221static noinline int run_delalloc_nocow(struct inode *inode,
1222 struct page *locked_page,
1223 u64 start, u64 end, int *page_started, int force,
1224 unsigned long *nr_written)
1225{
1226 struct btrfs_root *root = BTRFS_I(inode)->root;
1227 struct btrfs_trans_handle *trans;
1228 struct extent_buffer *leaf;
1229 struct btrfs_path *path;
1230 struct btrfs_file_extent_item *fi;
1231 struct btrfs_key found_key;
1232 u64 cow_start;
1233 u64 cur_offset;
1234 u64 extent_end;
1235 u64 extent_offset;
1236 u64 disk_bytenr;
1237 u64 num_bytes;
1238 u64 disk_num_bytes;
1239 u64 ram_bytes;
1240 int extent_type;
1241 int ret, err;
1242 int type;
1243 int nocow;
1244 int check_prev = 1;
1245 bool nolock;
1246 u64 ino = btrfs_ino(inode);
1247
1248 path = btrfs_alloc_path();
1249 if (!path) {
1250 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1251 EXTENT_LOCKED | EXTENT_DELALLOC |
1252 EXTENT_DO_ACCOUNTING |
1253 EXTENT_DEFRAG, PAGE_UNLOCK |
1254 PAGE_CLEAR_DIRTY |
1255 PAGE_SET_WRITEBACK |
1256 PAGE_END_WRITEBACK);
1257 return -ENOMEM;
1258 }
1259
1260 nolock = btrfs_is_free_space_inode(inode);
1261
1262 if (nolock)
1263 trans = btrfs_join_transaction_nolock(root);
1264 else
1265 trans = btrfs_join_transaction(root);
1266
1267 if (IS_ERR(trans)) {
1268 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1269 EXTENT_LOCKED | EXTENT_DELALLOC |
1270 EXTENT_DO_ACCOUNTING |
1271 EXTENT_DEFRAG, PAGE_UNLOCK |
1272 PAGE_CLEAR_DIRTY |
1273 PAGE_SET_WRITEBACK |
1274 PAGE_END_WRITEBACK);
1275 btrfs_free_path(path);
1276 return PTR_ERR(trans);
1277 }
1278
1279 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1280
1281 cow_start = (u64)-1;
1282 cur_offset = start;
1283 while (1) {
1284 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1285 cur_offset, 0);
1286 if (ret < 0)
1287 goto error;
1288 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1289 leaf = path->nodes[0];
1290 btrfs_item_key_to_cpu(leaf, &found_key,
1291 path->slots[0] - 1);
1292 if (found_key.objectid == ino &&
1293 found_key.type == BTRFS_EXTENT_DATA_KEY)
1294 path->slots[0]--;
1295 }
1296 check_prev = 0;
1297next_slot:
1298 leaf = path->nodes[0];
1299 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1300 ret = btrfs_next_leaf(root, path);
1301 if (ret < 0)
1302 goto error;
1303 if (ret > 0)
1304 break;
1305 leaf = path->nodes[0];
1306 }
1307
1308 nocow = 0;
1309 disk_bytenr = 0;
1310 num_bytes = 0;
1311 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1312
1313 if (found_key.objectid > ino)
1314 break;
1315 if (WARN_ON_ONCE(found_key.objectid < ino) ||
1316 found_key.type < BTRFS_EXTENT_DATA_KEY) {
1317 path->slots[0]++;
1318 goto next_slot;
1319 }
1320 if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
1321 found_key.offset > end)
1322 break;
1323
1324 if (found_key.offset > cur_offset) {
1325 extent_end = found_key.offset;
1326 extent_type = 0;
1327 goto out_check;
1328 }
1329
1330 fi = btrfs_item_ptr(leaf, path->slots[0],
1331 struct btrfs_file_extent_item);
1332 extent_type = btrfs_file_extent_type(leaf, fi);
1333
1334 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1335 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1336 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1337 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1338 extent_offset = btrfs_file_extent_offset(leaf, fi);
1339 extent_end = found_key.offset +
1340 btrfs_file_extent_num_bytes(leaf, fi);
1341 disk_num_bytes =
1342 btrfs_file_extent_disk_num_bytes(leaf, fi);
1343 if (extent_end <= start) {
1344 path->slots[0]++;
1345 goto next_slot;
1346 }
1347 if (disk_bytenr == 0)
1348 goto out_check;
1349 if (btrfs_file_extent_compression(leaf, fi) ||
1350 btrfs_file_extent_encryption(leaf, fi) ||
1351 btrfs_file_extent_other_encoding(leaf, fi))
1352 goto out_check;
1353 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1354 goto out_check;
1355 if (btrfs_extent_readonly(root, disk_bytenr))
1356 goto out_check;
1357 if (btrfs_cross_ref_exist(trans, root, ino,
1358 found_key.offset -
1359 extent_offset, disk_bytenr))
1360 goto out_check;
1361 disk_bytenr += extent_offset;
1362 disk_bytenr += cur_offset - found_key.offset;
1363 num_bytes = min(end + 1, extent_end) - cur_offset;
1364
1365
1366
1367
1368 if (!nolock) {
1369 err = btrfs_start_write_no_snapshoting(root);
1370 if (!err)
1371 goto out_check;
1372 }
1373
1374
1375
1376
1377
1378 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
1379 goto out_check;
1380 nocow = 1;
1381 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1382 extent_end = found_key.offset +
1383 btrfs_file_extent_inline_len(leaf,
1384 path->slots[0], fi);
1385 extent_end = ALIGN(extent_end, root->sectorsize);
1386 } else {
1387 BUG_ON(1);
1388 }
1389out_check:
1390 if (extent_end <= start) {
1391 path->slots[0]++;
1392 if (!nolock && nocow)
1393 btrfs_end_write_no_snapshoting(root);
1394 goto next_slot;
1395 }
1396 if (!nocow) {
1397 if (cow_start == (u64)-1)
1398 cow_start = cur_offset;
1399 cur_offset = extent_end;
1400 if (cur_offset > end)
1401 break;
1402 path->slots[0]++;
1403 goto next_slot;
1404 }
1405
1406 btrfs_release_path(path);
1407 if (cow_start != (u64)-1) {
1408 ret = cow_file_range(inode, locked_page,
1409 cow_start, found_key.offset - 1,
1410 page_started, nr_written, 1);
1411 if (ret) {
1412 if (!nolock && nocow)
1413 btrfs_end_write_no_snapshoting(root);
1414 goto error;
1415 }
1416 cow_start = (u64)-1;
1417 }
1418
1419 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1420 struct extent_map *em;
1421 struct extent_map_tree *em_tree;
1422 em_tree = &BTRFS_I(inode)->extent_tree;
1423 em = alloc_extent_map();
1424 BUG_ON(!em);
1425 em->start = cur_offset;
1426 em->orig_start = found_key.offset - extent_offset;
1427 em->len = num_bytes;
1428 em->block_len = num_bytes;
1429 em->block_start = disk_bytenr;
1430 em->orig_block_len = disk_num_bytes;
1431 em->ram_bytes = ram_bytes;
1432 em->bdev = root->fs_info->fs_devices->latest_bdev;
1433 em->mod_start = em->start;
1434 em->mod_len = em->len;
1435 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1436 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1437 em->generation = -1;
1438 while (1) {
1439 write_lock(&em_tree->lock);
1440 ret = add_extent_mapping(em_tree, em, 1);
1441 write_unlock(&em_tree->lock);
1442 if (ret != -EEXIST) {
1443 free_extent_map(em);
1444 break;
1445 }
1446 btrfs_drop_extent_cache(inode, em->start,
1447 em->start + em->len - 1, 0);
1448 }
1449 type = BTRFS_ORDERED_PREALLOC;
1450 } else {
1451 type = BTRFS_ORDERED_NOCOW;
1452 }
1453
1454 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1455 num_bytes, num_bytes, type);
1456 BUG_ON(ret);
1457
1458 if (root->root_key.objectid ==
1459 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1460 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1461 num_bytes);
1462 if (ret) {
1463 if (!nolock && nocow)
1464 btrfs_end_write_no_snapshoting(root);
1465 goto error;
1466 }
1467 }
1468
1469 extent_clear_unlock_delalloc(inode, cur_offset,
1470 cur_offset + num_bytes - 1,
1471 locked_page, EXTENT_LOCKED |
1472 EXTENT_DELALLOC, PAGE_UNLOCK |
1473 PAGE_SET_PRIVATE2);
1474 if (!nolock && nocow)
1475 btrfs_end_write_no_snapshoting(root);
1476 cur_offset = extent_end;
1477 if (cur_offset > end)
1478 break;
1479 }
1480 btrfs_release_path(path);
1481
1482 if (cur_offset <= end && cow_start == (u64)-1) {
1483 cow_start = cur_offset;
1484 cur_offset = end;
1485 }
1486
1487 if (cow_start != (u64)-1) {
1488 ret = cow_file_range(inode, locked_page, cow_start, end,
1489 page_started, nr_written, 1);
1490 if (ret)
1491 goto error;
1492 }
1493
1494error:
1495 err = btrfs_end_transaction(trans, root);
1496 if (!ret)
1497 ret = err;
1498
1499 if (ret && cur_offset < end)
1500 extent_clear_unlock_delalloc(inode, cur_offset, end,
1501 locked_page, EXTENT_LOCKED |
1502 EXTENT_DELALLOC | EXTENT_DEFRAG |
1503 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1504 PAGE_CLEAR_DIRTY |
1505 PAGE_SET_WRITEBACK |
1506 PAGE_END_WRITEBACK);
1507 btrfs_free_path(path);
1508 return ret;
1509}
1510
1511static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
1512{
1513
1514 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
1515 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
1516 return 0;
1517
1518
1519
1520
1521
1522
1523 if (BTRFS_I(inode)->defrag_bytes &&
1524 test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1525 EXTENT_DEFRAG, 0, NULL))
1526 return 1;
1527
1528 return 0;
1529}
1530
1531
1532
1533
1534static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1535 u64 start, u64 end, int *page_started,
1536 unsigned long *nr_written)
1537{
1538 int ret;
1539 int force_cow = need_force_cow(inode, start, end);
1540
1541 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1542 ret = run_delalloc_nocow(inode, locked_page, start, end,
1543 page_started, 1, nr_written);
1544 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1545 ret = run_delalloc_nocow(inode, locked_page, start, end,
1546 page_started, 0, nr_written);
1547 } else if (!inode_need_compress(inode)) {
1548 ret = cow_file_range(inode, locked_page, start, end,
1549 page_started, nr_written, 1);
1550 } else {
1551 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1552 &BTRFS_I(inode)->runtime_flags);
1553 ret = cow_file_range_async(inode, locked_page, start, end,
1554 page_started, nr_written);
1555 }
1556 return ret;
1557}
1558
1559static void btrfs_split_extent_hook(struct inode *inode,
1560 struct extent_state *orig, u64 split)
1561{
1562 u64 size;
1563
1564
1565 if (!(orig->state & EXTENT_DELALLOC))
1566 return;
1567
1568 size = orig->end - orig->start + 1;
1569 if (size > BTRFS_MAX_EXTENT_SIZE) {
1570 u64 num_extents;
1571 u64 new_size;
1572
1573
1574
1575
1576
1577 new_size = orig->end - split + 1;
1578 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1579 BTRFS_MAX_EXTENT_SIZE);
1580 new_size = split - orig->start;
1581 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1582 BTRFS_MAX_EXTENT_SIZE);
1583 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1584 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1585 return;
1586 }
1587
1588 spin_lock(&BTRFS_I(inode)->lock);
1589 BTRFS_I(inode)->outstanding_extents++;
1590 spin_unlock(&BTRFS_I(inode)->lock);
1591}
1592
1593
1594
1595
1596
1597
1598
1599static void btrfs_merge_extent_hook(struct inode *inode,
1600 struct extent_state *new,
1601 struct extent_state *other)
1602{
1603 u64 new_size, old_size;
1604 u64 num_extents;
1605
1606
1607 if (!(other->state & EXTENT_DELALLOC))
1608 return;
1609
1610 if (new->start > other->start)
1611 new_size = new->end - other->start + 1;
1612 else
1613 new_size = other->end - new->start + 1;
1614
1615
1616 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
1617 spin_lock(&BTRFS_I(inode)->lock);
1618 BTRFS_I(inode)->outstanding_extents--;
1619 spin_unlock(&BTRFS_I(inode)->lock);
1620 return;
1621 }
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641 old_size = other->end - other->start + 1;
1642 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1643 BTRFS_MAX_EXTENT_SIZE);
1644 old_size = new->end - new->start + 1;
1645 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1646 BTRFS_MAX_EXTENT_SIZE);
1647
1648 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1649 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1650 return;
1651
1652 spin_lock(&BTRFS_I(inode)->lock);
1653 BTRFS_I(inode)->outstanding_extents--;
1654 spin_unlock(&BTRFS_I(inode)->lock);
1655}
1656
1657static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1658 struct inode *inode)
1659{
1660 spin_lock(&root->delalloc_lock);
1661 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1662 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1663 &root->delalloc_inodes);
1664 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1665 &BTRFS_I(inode)->runtime_flags);
1666 root->nr_delalloc_inodes++;
1667 if (root->nr_delalloc_inodes == 1) {
1668 spin_lock(&root->fs_info->delalloc_root_lock);
1669 BUG_ON(!list_empty(&root->delalloc_root));
1670 list_add_tail(&root->delalloc_root,
1671 &root->fs_info->delalloc_roots);
1672 spin_unlock(&root->fs_info->delalloc_root_lock);
1673 }
1674 }
1675 spin_unlock(&root->delalloc_lock);
1676}
1677
1678static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1679 struct inode *inode)
1680{
1681 spin_lock(&root->delalloc_lock);
1682 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1683 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1684 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1685 &BTRFS_I(inode)->runtime_flags);
1686 root->nr_delalloc_inodes--;
1687 if (!root->nr_delalloc_inodes) {
1688 spin_lock(&root->fs_info->delalloc_root_lock);
1689 BUG_ON(list_empty(&root->delalloc_root));
1690 list_del_init(&root->delalloc_root);
1691 spin_unlock(&root->fs_info->delalloc_root_lock);
1692 }
1693 }
1694 spin_unlock(&root->delalloc_lock);
1695}
1696
1697
1698
1699
1700
1701
1702static void btrfs_set_bit_hook(struct inode *inode,
1703 struct extent_state *state, unsigned *bits)
1704{
1705
1706 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
1707 WARN_ON(1);
1708
1709
1710
1711
1712
1713 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1714 struct btrfs_root *root = BTRFS_I(inode)->root;
1715 u64 len = state->end + 1 - state->start;
1716 bool do_list = !btrfs_is_free_space_inode(inode);
1717
1718 if (*bits & EXTENT_FIRST_DELALLOC) {
1719 *bits &= ~EXTENT_FIRST_DELALLOC;
1720 } else {
1721 spin_lock(&BTRFS_I(inode)->lock);
1722 BTRFS_I(inode)->outstanding_extents++;
1723 spin_unlock(&BTRFS_I(inode)->lock);
1724 }
1725
1726
1727 if (btrfs_test_is_dummy_root(root))
1728 return;
1729
1730 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1731 root->fs_info->delalloc_batch);
1732 spin_lock(&BTRFS_I(inode)->lock);
1733 BTRFS_I(inode)->delalloc_bytes += len;
1734 if (*bits & EXTENT_DEFRAG)
1735 BTRFS_I(inode)->defrag_bytes += len;
1736 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1737 &BTRFS_I(inode)->runtime_flags))
1738 btrfs_add_delalloc_inodes(root, inode);
1739 spin_unlock(&BTRFS_I(inode)->lock);
1740 }
1741}
1742
1743
1744
1745
1746static void btrfs_clear_bit_hook(struct inode *inode,
1747 struct extent_state *state,
1748 unsigned *bits)
1749{
1750 u64 len = state->end + 1 - state->start;
1751 u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
1752 BTRFS_MAX_EXTENT_SIZE);
1753
1754 spin_lock(&BTRFS_I(inode)->lock);
1755 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
1756 BTRFS_I(inode)->defrag_bytes -= len;
1757 spin_unlock(&BTRFS_I(inode)->lock);
1758
1759
1760
1761
1762
1763
1764 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1765 struct btrfs_root *root = BTRFS_I(inode)->root;
1766 bool do_list = !btrfs_is_free_space_inode(inode);
1767
1768 if (*bits & EXTENT_FIRST_DELALLOC) {
1769 *bits &= ~EXTENT_FIRST_DELALLOC;
1770 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1771 spin_lock(&BTRFS_I(inode)->lock);
1772 BTRFS_I(inode)->outstanding_extents -= num_extents;
1773 spin_unlock(&BTRFS_I(inode)->lock);
1774 }
1775
1776
1777
1778
1779
1780
1781 if (*bits & EXTENT_DO_ACCOUNTING &&
1782 root != root->fs_info->tree_root)
1783 btrfs_delalloc_release_metadata(inode, len);
1784
1785
1786 if (btrfs_test_is_dummy_root(root))
1787 return;
1788
1789 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1790 && do_list && !(state->state & EXTENT_NORESERVE))
1791 btrfs_free_reserved_data_space_noquota(inode,
1792 state->start, len);
1793
1794 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1795 root->fs_info->delalloc_batch);
1796 spin_lock(&BTRFS_I(inode)->lock);
1797 BTRFS_I(inode)->delalloc_bytes -= len;
1798 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1799 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1800 &BTRFS_I(inode)->runtime_flags))
1801 btrfs_del_delalloc_inode(root, inode);
1802 spin_unlock(&BTRFS_I(inode)->lock);
1803 }
1804}
1805
1806
1807
1808
1809
1810int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1811 size_t size, struct bio *bio,
1812 unsigned long bio_flags)
1813{
1814 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1815 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1816 u64 length = 0;
1817 u64 map_length;
1818 int ret;
1819
1820 if (bio_flags & EXTENT_BIO_COMPRESSED)
1821 return 0;
1822
1823 length = bio->bi_iter.bi_size;
1824 map_length = length;
1825 ret = btrfs_map_block(root->fs_info, rw, logical,
1826 &map_length, NULL, 0);
1827
1828 BUG_ON(ret < 0);
1829 if (map_length < length + size)
1830 return 1;
1831 return 0;
1832}
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1843 struct bio *bio, int mirror_num,
1844 unsigned long bio_flags,
1845 u64 bio_offset)
1846{
1847 struct btrfs_root *root = BTRFS_I(inode)->root;
1848 int ret = 0;
1849
1850 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1851 BUG_ON(ret);
1852 return 0;
1853}
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1864 int mirror_num, unsigned long bio_flags,
1865 u64 bio_offset)
1866{
1867 struct btrfs_root *root = BTRFS_I(inode)->root;
1868 int ret;
1869
1870 ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
1871 if (ret) {
1872 bio->bi_error = ret;
1873 bio_endio(bio);
1874 }
1875 return ret;
1876}
1877
1878
1879
1880
1881
1882static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1883 int mirror_num, unsigned long bio_flags,
1884 u64 bio_offset)
1885{
1886 struct btrfs_root *root = BTRFS_I(inode)->root;
1887 enum btrfs_wq_endio_type metadata = BTRFS_WQ_ENDIO_DATA;
1888 int ret = 0;
1889 int skip_sum;
1890 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1891
1892 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1893
1894 if (btrfs_is_free_space_inode(inode))
1895 metadata = BTRFS_WQ_ENDIO_FREE_SPACE;
1896
1897 if (!(rw & REQ_WRITE)) {
1898 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1899 if (ret)
1900 goto out;
1901
1902 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1903 ret = btrfs_submit_compressed_read(inode, bio,
1904 mirror_num,
1905 bio_flags);
1906 goto out;
1907 } else if (!skip_sum) {
1908 ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
1909 if (ret)
1910 goto out;
1911 }
1912 goto mapit;
1913 } else if (async && !skip_sum) {
1914
1915 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1916 goto mapit;
1917
1918 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1919 inode, rw, bio, mirror_num,
1920 bio_flags, bio_offset,
1921 __btrfs_submit_bio_start,
1922 __btrfs_submit_bio_done);
1923 goto out;
1924 } else if (!skip_sum) {
1925 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1926 if (ret)
1927 goto out;
1928 }
1929
1930mapit:
1931 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
1932
1933out:
1934 if (ret < 0) {
1935 bio->bi_error = ret;
1936 bio_endio(bio);
1937 }
1938 return ret;
1939}
1940
1941
1942
1943
1944
1945static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1946 struct inode *inode, u64 file_offset,
1947 struct list_head *list)
1948{
1949 struct btrfs_ordered_sum *sum;
1950
1951 list_for_each_entry(sum, list, list) {
1952 trans->adding_csums = 1;
1953 btrfs_csum_file_blocks(trans,
1954 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1955 trans->adding_csums = 0;
1956 }
1957 return 0;
1958}
1959
1960int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1961 struct extent_state **cached_state)
1962{
1963 WARN_ON((end & (PAGE_SIZE - 1)) == 0);
1964 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1965 cached_state, GFP_NOFS);
1966}
1967
1968
1969struct btrfs_writepage_fixup {
1970 struct page *page;
1971 struct btrfs_work work;
1972};
1973
1974static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1975{
1976 struct btrfs_writepage_fixup *fixup;
1977 struct btrfs_ordered_extent *ordered;
1978 struct extent_state *cached_state = NULL;
1979 struct page *page;
1980 struct inode *inode;
1981 u64 page_start;
1982 u64 page_end;
1983 int ret;
1984
1985 fixup = container_of(work, struct btrfs_writepage_fixup, work);
1986 page = fixup->page;
1987again:
1988 lock_page(page);
1989 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
1990 ClearPageChecked(page);
1991 goto out_page;
1992 }
1993
1994 inode = page->mapping->host;
1995 page_start = page_offset(page);
1996 page_end = page_offset(page) + PAGE_SIZE - 1;
1997
1998 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
1999 &cached_state);
2000
2001
2002 if (PagePrivate2(page))
2003 goto out;
2004
2005 ordered = btrfs_lookup_ordered_range(inode, page_start,
2006 PAGE_SIZE);
2007 if (ordered) {
2008 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
2009 page_end, &cached_state, GFP_NOFS);
2010 unlock_page(page);
2011 btrfs_start_ordered_extent(inode, ordered, 1);
2012 btrfs_put_ordered_extent(ordered);
2013 goto again;
2014 }
2015
2016 ret = btrfs_delalloc_reserve_space(inode, page_start,
2017 PAGE_SIZE);
2018 if (ret) {
2019 mapping_set_error(page->mapping, ret);
2020 end_extent_writepage(page, ret, page_start, page_end);
2021 ClearPageChecked(page);
2022 goto out;
2023 }
2024
2025 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
2026 ClearPageChecked(page);
2027 set_page_dirty(page);
2028out:
2029 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
2030 &cached_state, GFP_NOFS);
2031out_page:
2032 unlock_page(page);
2033 put_page(page);
2034 kfree(fixup);
2035}
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
2049{
2050 struct inode *inode = page->mapping->host;
2051 struct btrfs_writepage_fixup *fixup;
2052 struct btrfs_root *root = BTRFS_I(inode)->root;
2053
2054
2055 if (TestClearPagePrivate2(page))
2056 return 0;
2057
2058 if (PageChecked(page))
2059 return -EAGAIN;
2060
2061 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
2062 if (!fixup)
2063 return -EAGAIN;
2064
2065 SetPageChecked(page);
2066 get_page(page);
2067 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
2068 btrfs_writepage_fixup_worker, NULL, NULL);
2069 fixup->page = page;
2070 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
2071 return -EBUSY;
2072}
2073
2074static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
2075 struct inode *inode, u64 file_pos,
2076 u64 disk_bytenr, u64 disk_num_bytes,
2077 u64 num_bytes, u64 ram_bytes,
2078 u8 compression, u8 encryption,
2079 u16 other_encoding, int extent_type)
2080{
2081 struct btrfs_root *root = BTRFS_I(inode)->root;
2082 struct btrfs_file_extent_item *fi;
2083 struct btrfs_path *path;
2084 struct extent_buffer *leaf;
2085 struct btrfs_key ins;
2086 int extent_inserted = 0;
2087 int ret;
2088
2089 path = btrfs_alloc_path();
2090 if (!path)
2091 return -ENOMEM;
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
2103 file_pos + num_bytes, NULL, 0,
2104 1, sizeof(*fi), &extent_inserted);
2105 if (ret)
2106 goto out;
2107
2108 if (!extent_inserted) {
2109 ins.objectid = btrfs_ino(inode);
2110 ins.offset = file_pos;
2111 ins.type = BTRFS_EXTENT_DATA_KEY;
2112
2113 path->leave_spinning = 1;
2114 ret = btrfs_insert_empty_item(trans, root, path, &ins,
2115 sizeof(*fi));
2116 if (ret)
2117 goto out;
2118 }
2119 leaf = path->nodes[0];
2120 fi = btrfs_item_ptr(leaf, path->slots[0],
2121 struct btrfs_file_extent_item);
2122 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
2123 btrfs_set_file_extent_type(leaf, fi, extent_type);
2124 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
2125 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
2126 btrfs_set_file_extent_offset(leaf, fi, 0);
2127 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2128 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
2129 btrfs_set_file_extent_compression(leaf, fi, compression);
2130 btrfs_set_file_extent_encryption(leaf, fi, encryption);
2131 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
2132
2133 btrfs_mark_buffer_dirty(leaf);
2134 btrfs_release_path(path);
2135
2136 inode_add_bytes(inode, num_bytes);
2137
2138 ins.objectid = disk_bytenr;
2139 ins.offset = disk_num_bytes;
2140 ins.type = BTRFS_EXTENT_ITEM_KEY;
2141 ret = btrfs_alloc_reserved_file_extent(trans, root,
2142 root->root_key.objectid,
2143 btrfs_ino(inode), file_pos,
2144 ram_bytes, &ins);
2145
2146
2147
2148
2149 btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
2150out:
2151 btrfs_free_path(path);
2152
2153 return ret;
2154}
2155
2156
2157struct sa_defrag_extent_backref {
2158 struct rb_node node;
2159 struct old_sa_defrag_extent *old;
2160 u64 root_id;
2161 u64 inum;
2162 u64 file_pos;
2163 u64 extent_offset;
2164 u64 num_bytes;
2165 u64 generation;
2166};
2167
2168struct old_sa_defrag_extent {
2169 struct list_head list;
2170 struct new_sa_defrag_extent *new;
2171
2172 u64 extent_offset;
2173 u64 bytenr;
2174 u64 offset;
2175 u64 len;
2176 int count;
2177};
2178
2179struct new_sa_defrag_extent {
2180 struct rb_root root;
2181 struct list_head head;
2182 struct btrfs_path *path;
2183 struct inode *inode;
2184 u64 file_pos;
2185 u64 len;
2186 u64 bytenr;
2187 u64 disk_len;
2188 u8 compress_type;
2189};
2190
2191static int backref_comp(struct sa_defrag_extent_backref *b1,
2192 struct sa_defrag_extent_backref *b2)
2193{
2194 if (b1->root_id < b2->root_id)
2195 return -1;
2196 else if (b1->root_id > b2->root_id)
2197 return 1;
2198
2199 if (b1->inum < b2->inum)
2200 return -1;
2201 else if (b1->inum > b2->inum)
2202 return 1;
2203
2204 if (b1->file_pos < b2->file_pos)
2205 return -1;
2206 else if (b1->file_pos > b2->file_pos)
2207 return 1;
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221 return 0;
2222}
2223
2224static void backref_insert(struct rb_root *root,
2225 struct sa_defrag_extent_backref *backref)
2226{
2227 struct rb_node **p = &root->rb_node;
2228 struct rb_node *parent = NULL;
2229 struct sa_defrag_extent_backref *entry;
2230 int ret;
2231
2232 while (*p) {
2233 parent = *p;
2234 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2235
2236 ret = backref_comp(backref, entry);
2237 if (ret < 0)
2238 p = &(*p)->rb_left;
2239 else
2240 p = &(*p)->rb_right;
2241 }
2242
2243 rb_link_node(&backref->node, parent, p);
2244 rb_insert_color(&backref->node, root);
2245}
2246
2247
2248
2249
2250static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2251 void *ctx)
2252{
2253 struct btrfs_file_extent_item *extent;
2254 struct btrfs_fs_info *fs_info;
2255 struct old_sa_defrag_extent *old = ctx;
2256 struct new_sa_defrag_extent *new = old->new;
2257 struct btrfs_path *path = new->path;
2258 struct btrfs_key key;
2259 struct btrfs_root *root;
2260 struct sa_defrag_extent_backref *backref;
2261 struct extent_buffer *leaf;
2262 struct inode *inode = new->inode;
2263 int slot;
2264 int ret;
2265 u64 extent_offset;
2266 u64 num_bytes;
2267
2268 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2269 inum == btrfs_ino(inode))
2270 return 0;
2271
2272 key.objectid = root_id;
2273 key.type = BTRFS_ROOT_ITEM_KEY;
2274 key.offset = (u64)-1;
2275
2276 fs_info = BTRFS_I(inode)->root->fs_info;
2277 root = btrfs_read_fs_root_no_name(fs_info, &key);
2278 if (IS_ERR(root)) {
2279 if (PTR_ERR(root) == -ENOENT)
2280 return 0;
2281 WARN_ON(1);
2282 pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
2283 inum, offset, root_id);
2284 return PTR_ERR(root);
2285 }
2286
2287 key.objectid = inum;
2288 key.type = BTRFS_EXTENT_DATA_KEY;
2289 if (offset > (u64)-1 << 32)
2290 key.offset = 0;
2291 else
2292 key.offset = offset;
2293
2294 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2295 if (WARN_ON(ret < 0))
2296 return ret;
2297 ret = 0;
2298
2299 while (1) {
2300 cond_resched();
2301
2302 leaf = path->nodes[0];
2303 slot = path->slots[0];
2304
2305 if (slot >= btrfs_header_nritems(leaf)) {
2306 ret = btrfs_next_leaf(root, path);
2307 if (ret < 0) {
2308 goto out;
2309 } else if (ret > 0) {
2310 ret = 0;
2311 goto out;
2312 }
2313 continue;
2314 }
2315
2316 path->slots[0]++;
2317
2318 btrfs_item_key_to_cpu(leaf, &key, slot);
2319
2320 if (key.objectid > inum)
2321 goto out;
2322
2323 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2324 continue;
2325
2326 extent = btrfs_item_ptr(leaf, slot,
2327 struct btrfs_file_extent_item);
2328
2329 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2330 continue;
2331
2332
2333
2334
2335
2336
2337 if (key.offset != offset)
2338 continue;
2339
2340 extent_offset = btrfs_file_extent_offset(leaf, extent);
2341 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2342
2343 if (extent_offset >= old->extent_offset + old->offset +
2344 old->len || extent_offset + num_bytes <=
2345 old->extent_offset + old->offset)
2346 continue;
2347 break;
2348 }
2349
2350 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2351 if (!backref) {
2352 ret = -ENOENT;
2353 goto out;
2354 }
2355
2356 backref->root_id = root_id;
2357 backref->inum = inum;
2358 backref->file_pos = offset;
2359 backref->num_bytes = num_bytes;
2360 backref->extent_offset = extent_offset;
2361 backref->generation = btrfs_file_extent_generation(leaf, extent);
2362 backref->old = old;
2363 backref_insert(&new->root, backref);
2364 old->count++;
2365out:
2366 btrfs_release_path(path);
2367 WARN_ON(ret);
2368 return ret;
2369}
2370
2371static noinline bool record_extent_backrefs(struct btrfs_path *path,
2372 struct new_sa_defrag_extent *new)
2373{
2374 struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
2375 struct old_sa_defrag_extent *old, *tmp;
2376 int ret;
2377
2378 new->path = path;
2379
2380 list_for_each_entry_safe(old, tmp, &new->head, list) {
2381 ret = iterate_inodes_from_logical(old->bytenr +
2382 old->extent_offset, fs_info,
2383 path, record_one_backref,
2384 old);
2385 if (ret < 0 && ret != -ENOENT)
2386 return false;
2387
2388
2389 if (!old->count) {
2390 list_del(&old->list);
2391 kfree(old);
2392 }
2393 }
2394
2395 if (list_empty(&new->head))
2396 return false;
2397
2398 return true;
2399}
2400
2401static int relink_is_mergable(struct extent_buffer *leaf,
2402 struct btrfs_file_extent_item *fi,
2403 struct new_sa_defrag_extent *new)
2404{
2405 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2406 return 0;
2407
2408 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2409 return 0;
2410
2411 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2412 return 0;
2413
2414 if (btrfs_file_extent_encryption(leaf, fi) ||
2415 btrfs_file_extent_other_encoding(leaf, fi))
2416 return 0;
2417
2418 return 1;
2419}
2420
2421
2422
2423
2424static noinline int relink_extent_backref(struct btrfs_path *path,
2425 struct sa_defrag_extent_backref *prev,
2426 struct sa_defrag_extent_backref *backref)
2427{
2428 struct btrfs_file_extent_item *extent;
2429 struct btrfs_file_extent_item *item;
2430 struct btrfs_ordered_extent *ordered;
2431 struct btrfs_trans_handle *trans;
2432 struct btrfs_fs_info *fs_info;
2433 struct btrfs_root *root;
2434 struct btrfs_key key;
2435 struct extent_buffer *leaf;
2436 struct old_sa_defrag_extent *old = backref->old;
2437 struct new_sa_defrag_extent *new = old->new;
2438 struct inode *src_inode = new->inode;
2439 struct inode *inode;
2440 struct extent_state *cached = NULL;
2441 int ret = 0;
2442 u64 start;
2443 u64 len;
2444 u64 lock_start;
2445 u64 lock_end;
2446 bool merge = false;
2447 int index;
2448
2449 if (prev && prev->root_id == backref->root_id &&
2450 prev->inum == backref->inum &&
2451 prev->file_pos + prev->num_bytes == backref->file_pos)
2452 merge = true;
2453
2454
2455 key.objectid = backref->root_id;
2456 key.type = BTRFS_ROOT_ITEM_KEY;
2457 key.offset = (u64)-1;
2458
2459 fs_info = BTRFS_I(src_inode)->root->fs_info;
2460 index = srcu_read_lock(&fs_info->subvol_srcu);
2461
2462 root = btrfs_read_fs_root_no_name(fs_info, &key);
2463 if (IS_ERR(root)) {
2464 srcu_read_unlock(&fs_info->subvol_srcu, index);
2465 if (PTR_ERR(root) == -ENOENT)
2466 return 0;
2467 return PTR_ERR(root);
2468 }
2469
2470 if (btrfs_root_readonly(root)) {
2471 srcu_read_unlock(&fs_info->subvol_srcu, index);
2472 return 0;
2473 }
2474
2475
2476 key.objectid = backref->inum;
2477 key.type = BTRFS_INODE_ITEM_KEY;
2478 key.offset = 0;
2479
2480 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2481 if (IS_ERR(inode)) {
2482 srcu_read_unlock(&fs_info->subvol_srcu, index);
2483 return 0;
2484 }
2485
2486 srcu_read_unlock(&fs_info->subvol_srcu, index);
2487
2488
2489 lock_start = backref->file_pos;
2490 lock_end = backref->file_pos + backref->num_bytes - 1;
2491 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2492 &cached);
2493
2494 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2495 if (ordered) {
2496 btrfs_put_ordered_extent(ordered);
2497 goto out_unlock;
2498 }
2499
2500 trans = btrfs_join_transaction(root);
2501 if (IS_ERR(trans)) {
2502 ret = PTR_ERR(trans);
2503 goto out_unlock;
2504 }
2505
2506 key.objectid = backref->inum;
2507 key.type = BTRFS_EXTENT_DATA_KEY;
2508 key.offset = backref->file_pos;
2509
2510 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2511 if (ret < 0) {
2512 goto out_free_path;
2513 } else if (ret > 0) {
2514 ret = 0;
2515 goto out_free_path;
2516 }
2517
2518 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2519 struct btrfs_file_extent_item);
2520
2521 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2522 backref->generation)
2523 goto out_free_path;
2524
2525 btrfs_release_path(path);
2526
2527 start = backref->file_pos;
2528 if (backref->extent_offset < old->extent_offset + old->offset)
2529 start += old->extent_offset + old->offset -
2530 backref->extent_offset;
2531
2532 len = min(backref->extent_offset + backref->num_bytes,
2533 old->extent_offset + old->offset + old->len);
2534 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2535
2536 ret = btrfs_drop_extents(trans, root, inode, start,
2537 start + len, 1);
2538 if (ret)
2539 goto out_free_path;
2540again:
2541 key.objectid = btrfs_ino(inode);
2542 key.type = BTRFS_EXTENT_DATA_KEY;
2543 key.offset = start;
2544
2545 path->leave_spinning = 1;
2546 if (merge) {
2547 struct btrfs_file_extent_item *fi;
2548 u64 extent_len;
2549 struct btrfs_key found_key;
2550
2551 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2552 if (ret < 0)
2553 goto out_free_path;
2554
2555 path->slots[0]--;
2556 leaf = path->nodes[0];
2557 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2558
2559 fi = btrfs_item_ptr(leaf, path->slots[0],
2560 struct btrfs_file_extent_item);
2561 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2562
2563 if (extent_len + found_key.offset == start &&
2564 relink_is_mergable(leaf, fi, new)) {
2565 btrfs_set_file_extent_num_bytes(leaf, fi,
2566 extent_len + len);
2567 btrfs_mark_buffer_dirty(leaf);
2568 inode_add_bytes(inode, len);
2569
2570 ret = 1;
2571 goto out_free_path;
2572 } else {
2573 merge = false;
2574 btrfs_release_path(path);
2575 goto again;
2576 }
2577 }
2578
2579 ret = btrfs_insert_empty_item(trans, root, path, &key,
2580 sizeof(*extent));
2581 if (ret) {
2582 btrfs_abort_transaction(trans, root, ret);
2583 goto out_free_path;
2584 }
2585
2586 leaf = path->nodes[0];
2587 item = btrfs_item_ptr(leaf, path->slots[0],
2588 struct btrfs_file_extent_item);
2589 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2590 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2591 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2592 btrfs_set_file_extent_num_bytes(leaf, item, len);
2593 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2594 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2595 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2596 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2597 btrfs_set_file_extent_encryption(leaf, item, 0);
2598 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2599
2600 btrfs_mark_buffer_dirty(leaf);
2601 inode_add_bytes(inode, len);
2602 btrfs_release_path(path);
2603
2604 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2605 new->disk_len, 0,
2606 backref->root_id, backref->inum,
2607 new->file_pos);
2608 if (ret) {
2609 btrfs_abort_transaction(trans, root, ret);
2610 goto out_free_path;
2611 }
2612
2613 ret = 1;
2614out_free_path:
2615 btrfs_release_path(path);
2616 path->leave_spinning = 0;
2617 btrfs_end_transaction(trans, root);
2618out_unlock:
2619 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2620 &cached, GFP_NOFS);
2621 iput(inode);
2622 return ret;
2623}
2624
2625static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2626{
2627 struct old_sa_defrag_extent *old, *tmp;
2628
2629 if (!new)
2630 return;
2631
2632 list_for_each_entry_safe(old, tmp, &new->head, list) {
2633 kfree(old);
2634 }
2635 kfree(new);
2636}
2637
2638static void relink_file_extents(struct new_sa_defrag_extent *new)
2639{
2640 struct btrfs_path *path;
2641 struct sa_defrag_extent_backref *backref;
2642 struct sa_defrag_extent_backref *prev = NULL;
2643 struct inode *inode;
2644 struct btrfs_root *root;
2645 struct rb_node *node;
2646 int ret;
2647
2648 inode = new->inode;
2649 root = BTRFS_I(inode)->root;
2650
2651 path = btrfs_alloc_path();
2652 if (!path)
2653 return;
2654
2655 if (!record_extent_backrefs(path, new)) {
2656 btrfs_free_path(path);
2657 goto out;
2658 }
2659 btrfs_release_path(path);
2660
2661 while (1) {
2662 node = rb_first(&new->root);
2663 if (!node)
2664 break;
2665 rb_erase(node, &new->root);
2666
2667 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2668
2669 ret = relink_extent_backref(path, prev, backref);
2670 WARN_ON(ret < 0);
2671
2672 kfree(prev);
2673
2674 if (ret == 1)
2675 prev = backref;
2676 else
2677 prev = NULL;
2678 cond_resched();
2679 }
2680 kfree(prev);
2681
2682 btrfs_free_path(path);
2683out:
2684 free_sa_defrag_extent(new);
2685
2686 atomic_dec(&root->fs_info->defrag_running);
2687 wake_up(&root->fs_info->transaction_wait);
2688}
2689
2690static struct new_sa_defrag_extent *
2691record_old_file_extents(struct inode *inode,
2692 struct btrfs_ordered_extent *ordered)
2693{
2694 struct btrfs_root *root = BTRFS_I(inode)->root;
2695 struct btrfs_path *path;
2696 struct btrfs_key key;
2697 struct old_sa_defrag_extent *old;
2698 struct new_sa_defrag_extent *new;
2699 int ret;
2700
2701 new = kmalloc(sizeof(*new), GFP_NOFS);
2702 if (!new)
2703 return NULL;
2704
2705 new->inode = inode;
2706 new->file_pos = ordered->file_offset;
2707 new->len = ordered->len;
2708 new->bytenr = ordered->start;
2709 new->disk_len = ordered->disk_len;
2710 new->compress_type = ordered->compress_type;
2711 new->root = RB_ROOT;
2712 INIT_LIST_HEAD(&new->head);
2713
2714 path = btrfs_alloc_path();
2715 if (!path)
2716 goto out_kfree;
2717
2718 key.objectid = btrfs_ino(inode);
2719 key.type = BTRFS_EXTENT_DATA_KEY;
2720 key.offset = new->file_pos;
2721
2722 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2723 if (ret < 0)
2724 goto out_free_path;
2725 if (ret > 0 && path->slots[0] > 0)
2726 path->slots[0]--;
2727
2728
2729 while (1) {
2730 struct btrfs_file_extent_item *extent;
2731 struct extent_buffer *l;
2732 int slot;
2733 u64 num_bytes;
2734 u64 offset;
2735 u64 end;
2736 u64 disk_bytenr;
2737 u64 extent_offset;
2738
2739 l = path->nodes[0];
2740 slot = path->slots[0];
2741
2742 if (slot >= btrfs_header_nritems(l)) {
2743 ret = btrfs_next_leaf(root, path);
2744 if (ret < 0)
2745 goto out_free_path;
2746 else if (ret > 0)
2747 break;
2748 continue;
2749 }
2750
2751 btrfs_item_key_to_cpu(l, &key, slot);
2752
2753 if (key.objectid != btrfs_ino(inode))
2754 break;
2755 if (key.type != BTRFS_EXTENT_DATA_KEY)
2756 break;
2757 if (key.offset >= new->file_pos + new->len)
2758 break;
2759
2760 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2761
2762 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2763 if (key.offset + num_bytes < new->file_pos)
2764 goto next;
2765
2766 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2767 if (!disk_bytenr)
2768 goto next;
2769
2770 extent_offset = btrfs_file_extent_offset(l, extent);
2771
2772 old = kmalloc(sizeof(*old), GFP_NOFS);
2773 if (!old)
2774 goto out_free_path;
2775
2776 offset = max(new->file_pos, key.offset);
2777 end = min(new->file_pos + new->len, key.offset + num_bytes);
2778
2779 old->bytenr = disk_bytenr;
2780 old->extent_offset = extent_offset;
2781 old->offset = offset - key.offset;
2782 old->len = end - offset;
2783 old->new = new;
2784 old->count = 0;
2785 list_add_tail(&old->list, &new->head);
2786next:
2787 path->slots[0]++;
2788 cond_resched();
2789 }
2790
2791 btrfs_free_path(path);
2792 atomic_inc(&root->fs_info->defrag_running);
2793
2794 return new;
2795
2796out_free_path:
2797 btrfs_free_path(path);
2798out_kfree:
2799 free_sa_defrag_extent(new);
2800 return NULL;
2801}
2802
2803static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
2804 u64 start, u64 len)
2805{
2806 struct btrfs_block_group_cache *cache;
2807
2808 cache = btrfs_lookup_block_group(root->fs_info, start);
2809 ASSERT(cache);
2810
2811 spin_lock(&cache->lock);
2812 cache->delalloc_bytes -= len;
2813 spin_unlock(&cache->lock);
2814
2815 btrfs_put_block_group(cache);
2816}
2817
2818
2819
2820
2821
2822static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2823{
2824 struct inode *inode = ordered_extent->inode;
2825 struct btrfs_root *root = BTRFS_I(inode)->root;
2826 struct btrfs_trans_handle *trans = NULL;
2827 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2828 struct extent_state *cached_state = NULL;
2829 struct new_sa_defrag_extent *new = NULL;
2830 int compress_type = 0;
2831 int ret = 0;
2832 u64 logical_len = ordered_extent->len;
2833 bool nolock;
2834 bool truncated = false;
2835
2836 nolock = btrfs_is_free_space_inode(inode);
2837
2838 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2839 ret = -EIO;
2840 goto out;
2841 }
2842
2843 btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
2844 ordered_extent->file_offset +
2845 ordered_extent->len - 1);
2846
2847 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2848 truncated = true;
2849 logical_len = ordered_extent->truncated_len;
2850
2851 if (!logical_len)
2852 goto out;
2853 }
2854
2855 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2856 BUG_ON(!list_empty(&ordered_extent->list));
2857
2858
2859
2860
2861
2862
2863 btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
2864 ordered_extent->len);
2865 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2866 if (nolock)
2867 trans = btrfs_join_transaction_nolock(root);
2868 else
2869 trans = btrfs_join_transaction(root);
2870 if (IS_ERR(trans)) {
2871 ret = PTR_ERR(trans);
2872 trans = NULL;
2873 goto out;
2874 }
2875 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
2876 ret = btrfs_update_inode_fallback(trans, root, inode);
2877 if (ret)
2878 btrfs_abort_transaction(trans, root, ret);
2879 goto out;
2880 }
2881
2882 lock_extent_bits(io_tree, ordered_extent->file_offset,
2883 ordered_extent->file_offset + ordered_extent->len - 1,
2884 &cached_state);
2885
2886 ret = test_range_bit(io_tree, ordered_extent->file_offset,
2887 ordered_extent->file_offset + ordered_extent->len - 1,
2888 EXTENT_DEFRAG, 1, cached_state);
2889 if (ret) {
2890 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2891 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
2892
2893 new = record_old_file_extents(inode, ordered_extent);
2894
2895 clear_extent_bit(io_tree, ordered_extent->file_offset,
2896 ordered_extent->file_offset + ordered_extent->len - 1,
2897 EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
2898 }
2899
2900 if (nolock)
2901 trans = btrfs_join_transaction_nolock(root);
2902 else
2903 trans = btrfs_join_transaction(root);
2904 if (IS_ERR(trans)) {
2905 ret = PTR_ERR(trans);
2906 trans = NULL;
2907 goto out_unlock;
2908 }
2909
2910 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
2911
2912 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
2913 compress_type = ordered_extent->compress_type;
2914 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
2915 BUG_ON(compress_type);
2916 ret = btrfs_mark_extent_written(trans, inode,
2917 ordered_extent->file_offset,
2918 ordered_extent->file_offset +
2919 logical_len);
2920 } else {
2921 BUG_ON(root == root->fs_info->tree_root);
2922 ret = insert_reserved_file_extent(trans, inode,
2923 ordered_extent->file_offset,
2924 ordered_extent->start,
2925 ordered_extent->disk_len,
2926 logical_len, logical_len,
2927 compress_type, 0, 0,
2928 BTRFS_FILE_EXTENT_REG);
2929 if (!ret)
2930 btrfs_release_delalloc_bytes(root,
2931 ordered_extent->start,
2932 ordered_extent->disk_len);
2933 }
2934 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
2935 ordered_extent->file_offset, ordered_extent->len,
2936 trans->transid);
2937 if (ret < 0) {
2938 btrfs_abort_transaction(trans, root, ret);
2939 goto out_unlock;
2940 }
2941
2942 add_pending_csums(trans, inode, ordered_extent->file_offset,
2943 &ordered_extent->list);
2944
2945 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2946 ret = btrfs_update_inode_fallback(trans, root, inode);
2947 if (ret) {
2948 btrfs_abort_transaction(trans, root, ret);
2949 goto out_unlock;
2950 }
2951 ret = 0;
2952out_unlock:
2953 unlock_extent_cached(io_tree, ordered_extent->file_offset,
2954 ordered_extent->file_offset +
2955 ordered_extent->len - 1, &cached_state, GFP_NOFS);
2956out:
2957 if (root != root->fs_info->tree_root)
2958 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
2959 if (trans)
2960 btrfs_end_transaction(trans, root);
2961
2962 if (ret || truncated) {
2963 u64 start, end;
2964
2965 if (truncated)
2966 start = ordered_extent->file_offset + logical_len;
2967 else
2968 start = ordered_extent->file_offset;
2969 end = ordered_extent->file_offset + ordered_extent->len - 1;
2970 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
2971
2972
2973 btrfs_drop_extent_cache(inode, start, end, 0);
2974
2975
2976
2977
2978
2979
2980
2981 if ((ret || !logical_len) &&
2982 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2983 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2984 btrfs_free_reserved_extent(root, ordered_extent->start,
2985 ordered_extent->disk_len, 1);
2986 }
2987
2988
2989
2990
2991
2992
2993 btrfs_remove_ordered_extent(inode, ordered_extent);
2994
2995
2996 if (new) {
2997 if (ret) {
2998 free_sa_defrag_extent(new);
2999 atomic_dec(&root->fs_info->defrag_running);
3000 } else {
3001 relink_file_extents(new);
3002 }
3003 }
3004
3005
3006 btrfs_put_ordered_extent(ordered_extent);
3007
3008 btrfs_put_ordered_extent(ordered_extent);
3009
3010 return ret;
3011}
3012
3013static void finish_ordered_fn(struct btrfs_work *work)
3014{
3015 struct btrfs_ordered_extent *ordered_extent;
3016 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
3017 btrfs_finish_ordered_io(ordered_extent);
3018}
3019
3020static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
3021 struct extent_state *state, int uptodate)
3022{
3023 struct inode *inode = page->mapping->host;
3024 struct btrfs_root *root = BTRFS_I(inode)->root;
3025 struct btrfs_ordered_extent *ordered_extent = NULL;
3026 struct btrfs_workqueue *wq;
3027 btrfs_work_func_t func;
3028
3029 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
3030
3031 ClearPagePrivate2(page);
3032 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
3033 end - start + 1, uptodate))
3034 return 0;
3035
3036 if (btrfs_is_free_space_inode(inode)) {
3037 wq = root->fs_info->endio_freespace_worker;
3038 func = btrfs_freespace_write_helper;
3039 } else {
3040 wq = root->fs_info->endio_write_workers;
3041 func = btrfs_endio_write_helper;
3042 }
3043
3044 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
3045 NULL);
3046 btrfs_queue_work(wq, &ordered_extent->work);
3047
3048 return 0;
3049}
3050
3051static int __readpage_endio_check(struct inode *inode,
3052 struct btrfs_io_bio *io_bio,
3053 int icsum, struct page *page,
3054 int pgoff, u64 start, size_t len)
3055{
3056 char *kaddr;
3057 u32 csum_expected;
3058 u32 csum = ~(u32)0;
3059
3060 csum_expected = *(((u32 *)io_bio->csum) + icsum);
3061
3062 kaddr = kmap_atomic(page);
3063 csum = btrfs_csum_data(kaddr + pgoff, csum, len);
3064 btrfs_csum_final(csum, (char *)&csum);
3065 if (csum != csum_expected)
3066 goto zeroit;
3067
3068 kunmap_atomic(kaddr);
3069 return 0;
3070zeroit:
3071 btrfs_warn_rl(BTRFS_I(inode)->root->fs_info,
3072 "csum failed ino %llu off %llu csum %u expected csum %u",
3073 btrfs_ino(inode), start, csum, csum_expected);
3074 memset(kaddr + pgoff, 1, len);
3075 flush_dcache_page(page);
3076 kunmap_atomic(kaddr);
3077 if (csum_expected == 0)
3078 return 0;
3079 return -EIO;
3080}
3081
3082
3083
3084
3085
3086
3087static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
3088 u64 phy_offset, struct page *page,
3089 u64 start, u64 end, int mirror)
3090{
3091 size_t offset = start - page_offset(page);
3092 struct inode *inode = page->mapping->host;
3093 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3094 struct btrfs_root *root = BTRFS_I(inode)->root;
3095
3096 if (PageChecked(page)) {
3097 ClearPageChecked(page);
3098 return 0;
3099 }
3100
3101 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
3102 return 0;
3103
3104 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
3105 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
3106 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
3107 GFP_NOFS);
3108 return 0;
3109 }
3110
3111 phy_offset >>= inode->i_sb->s_blocksize_bits;
3112 return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
3113 start, (size_t)(end - start + 1));
3114}
3115
3116void btrfs_add_delayed_iput(struct inode *inode)
3117{
3118 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
3119 struct btrfs_inode *binode = BTRFS_I(inode);
3120
3121 if (atomic_add_unless(&inode->i_count, -1, 1))
3122 return;
3123
3124 spin_lock(&fs_info->delayed_iput_lock);
3125 if (binode->delayed_iput_count == 0) {
3126 ASSERT(list_empty(&binode->delayed_iput));
3127 list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
3128 } else {
3129 binode->delayed_iput_count++;
3130 }
3131 spin_unlock(&fs_info->delayed_iput_lock);
3132}
3133
3134void btrfs_run_delayed_iputs(struct btrfs_root *root)
3135{
3136 struct btrfs_fs_info *fs_info = root->fs_info;
3137
3138 spin_lock(&fs_info->delayed_iput_lock);
3139 while (!list_empty(&fs_info->delayed_iputs)) {
3140 struct btrfs_inode *inode;
3141
3142 inode = list_first_entry(&fs_info->delayed_iputs,
3143 struct btrfs_inode, delayed_iput);
3144 if (inode->delayed_iput_count) {
3145 inode->delayed_iput_count--;
3146 list_move_tail(&inode->delayed_iput,
3147 &fs_info->delayed_iputs);
3148 } else {
3149 list_del_init(&inode->delayed_iput);
3150 }
3151 spin_unlock(&fs_info->delayed_iput_lock);
3152 iput(&inode->vfs_inode);
3153 spin_lock(&fs_info->delayed_iput_lock);
3154 }
3155 spin_unlock(&fs_info->delayed_iput_lock);
3156}
3157
3158
3159
3160
3161
3162
3163void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
3164 struct btrfs_root *root)
3165{
3166 struct btrfs_block_rsv *block_rsv;
3167 int ret;
3168
3169 if (atomic_read(&root->orphan_inodes) ||
3170 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
3171 return;
3172
3173 spin_lock(&root->orphan_lock);
3174 if (atomic_read(&root->orphan_inodes)) {
3175 spin_unlock(&root->orphan_lock);
3176 return;
3177 }
3178
3179 if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
3180 spin_unlock(&root->orphan_lock);
3181 return;
3182 }
3183
3184 block_rsv = root->orphan_block_rsv;
3185 root->orphan_block_rsv = NULL;
3186 spin_unlock(&root->orphan_lock);
3187
3188 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
3189 btrfs_root_refs(&root->root_item) > 0) {
3190 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
3191 root->root_key.objectid);
3192 if (ret)
3193 btrfs_abort_transaction(trans, root, ret);
3194 else
3195 clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
3196 &root->state);
3197 }
3198
3199 if (block_rsv) {
3200 WARN_ON(block_rsv->size > 0);
3201 btrfs_free_block_rsv(root, block_rsv);
3202 }
3203}
3204
3205
3206
3207
3208
3209
3210
3211
3212int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3213{
3214 struct btrfs_root *root = BTRFS_I(inode)->root;
3215 struct btrfs_block_rsv *block_rsv = NULL;
3216 int reserve = 0;
3217 int insert = 0;
3218 int ret;
3219
3220 if (!root->orphan_block_rsv) {
3221 block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3222 if (!block_rsv)
3223 return -ENOMEM;
3224 }
3225
3226 spin_lock(&root->orphan_lock);
3227 if (!root->orphan_block_rsv) {
3228 root->orphan_block_rsv = block_rsv;
3229 } else if (block_rsv) {
3230 btrfs_free_block_rsv(root, block_rsv);
3231 block_rsv = NULL;
3232 }
3233
3234 if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3235 &BTRFS_I(inode)->runtime_flags)) {
3236#if 0
3237
3238
3239
3240
3241
3242 if (!xchg(&root->orphan_item_inserted, 1))
3243 insert = 2;
3244 else
3245 insert = 1;
3246#endif
3247 insert = 1;
3248 atomic_inc(&root->orphan_inodes);
3249 }
3250
3251 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3252 &BTRFS_I(inode)->runtime_flags))
3253 reserve = 1;
3254 spin_unlock(&root->orphan_lock);
3255
3256
3257 if (reserve) {
3258 ret = btrfs_orphan_reserve_metadata(trans, inode);
3259 BUG_ON(ret);
3260 }
3261
3262
3263 if (insert >= 1) {
3264 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3265 if (ret) {
3266 atomic_dec(&root->orphan_inodes);
3267 if (reserve) {
3268 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3269 &BTRFS_I(inode)->runtime_flags);
3270 btrfs_orphan_release_metadata(inode);
3271 }
3272 if (ret != -EEXIST) {
3273 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3274 &BTRFS_I(inode)->runtime_flags);
3275 btrfs_abort_transaction(trans, root, ret);
3276 return ret;
3277 }
3278 }
3279 ret = 0;
3280 }
3281
3282
3283 if (insert >= 2) {
3284 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
3285 root->root_key.objectid);
3286 if (ret && ret != -EEXIST) {
3287 btrfs_abort_transaction(trans, root, ret);
3288 return ret;
3289 }
3290 }
3291 return 0;
3292}
3293
3294
3295
3296
3297
3298static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3299 struct inode *inode)
3300{
3301 struct btrfs_root *root = BTRFS_I(inode)->root;
3302 int delete_item = 0;
3303 int release_rsv = 0;
3304 int ret = 0;
3305
3306 spin_lock(&root->orphan_lock);
3307 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3308 &BTRFS_I(inode)->runtime_flags))
3309 delete_item = 1;
3310
3311 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3312 &BTRFS_I(inode)->runtime_flags))
3313 release_rsv = 1;
3314 spin_unlock(&root->orphan_lock);
3315
3316 if (delete_item) {
3317 atomic_dec(&root->orphan_inodes);
3318 if (trans)
3319 ret = btrfs_del_orphan_item(trans, root,
3320 btrfs_ino(inode));
3321 }
3322
3323 if (release_rsv)
3324 btrfs_orphan_release_metadata(inode);
3325
3326 return ret;
3327}
3328
3329
3330
3331
3332
3333int btrfs_orphan_cleanup(struct btrfs_root *root)
3334{
3335 struct btrfs_path *path;
3336 struct extent_buffer *leaf;
3337 struct btrfs_key key, found_key;
3338 struct btrfs_trans_handle *trans;
3339 struct inode *inode;
3340 u64 last_objectid = 0;
3341 int ret = 0, nr_unlink = 0, nr_truncate = 0;
3342
3343 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3344 return 0;
3345
3346 path = btrfs_alloc_path();
3347 if (!path) {
3348 ret = -ENOMEM;
3349 goto out;
3350 }
3351 path->reada = READA_BACK;
3352
3353 key.objectid = BTRFS_ORPHAN_OBJECTID;
3354 key.type = BTRFS_ORPHAN_ITEM_KEY;
3355 key.offset = (u64)-1;
3356
3357 while (1) {
3358 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3359 if (ret < 0)
3360 goto out;
3361
3362
3363
3364
3365
3366
3367 if (ret > 0) {
3368 ret = 0;
3369 if (path->slots[0] == 0)
3370 break;
3371 path->slots[0]--;
3372 }
3373
3374
3375 leaf = path->nodes[0];
3376 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3377
3378
3379 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3380 break;
3381 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3382 break;
3383
3384
3385 btrfs_release_path(path);
3386
3387
3388
3389
3390
3391
3392
3393 if (found_key.offset == last_objectid) {
3394 btrfs_err(root->fs_info,
3395 "Error removing orphan entry, stopping orphan cleanup");
3396 ret = -EINVAL;
3397 goto out;
3398 }
3399
3400 last_objectid = found_key.offset;
3401
3402 found_key.objectid = found_key.offset;
3403 found_key.type = BTRFS_INODE_ITEM_KEY;
3404 found_key.offset = 0;
3405 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
3406 ret = PTR_ERR_OR_ZERO(inode);
3407 if (ret && ret != -ESTALE)
3408 goto out;
3409
3410 if (ret == -ESTALE && root == root->fs_info->tree_root) {
3411 struct btrfs_root *dead_root;
3412 struct btrfs_fs_info *fs_info = root->fs_info;
3413 int is_dead_root = 0;
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426 spin_lock(&fs_info->trans_lock);
3427 list_for_each_entry(dead_root, &fs_info->dead_roots,
3428 root_list) {
3429 if (dead_root->root_key.objectid ==
3430 found_key.objectid) {
3431 is_dead_root = 1;
3432 break;
3433 }
3434 }
3435 spin_unlock(&fs_info->trans_lock);
3436 if (is_dead_root) {
3437
3438 key.offset = found_key.objectid - 1;
3439 continue;
3440 }
3441 }
3442
3443
3444
3445
3446 if (ret == -ESTALE) {
3447 trans = btrfs_start_transaction(root, 1);
3448 if (IS_ERR(trans)) {
3449 ret = PTR_ERR(trans);
3450 goto out;
3451 }
3452 btrfs_debug(root->fs_info, "auto deleting %Lu",
3453 found_key.objectid);
3454 ret = btrfs_del_orphan_item(trans, root,
3455 found_key.objectid);
3456 btrfs_end_transaction(trans, root);
3457 if (ret)
3458 goto out;
3459 continue;
3460 }
3461
3462
3463
3464
3465
3466 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3467 &BTRFS_I(inode)->runtime_flags);
3468 atomic_inc(&root->orphan_inodes);
3469
3470
3471 if (inode->i_nlink) {
3472 if (WARN_ON(!S_ISREG(inode->i_mode))) {
3473 iput(inode);
3474 continue;
3475 }
3476 nr_truncate++;
3477
3478
3479 trans = btrfs_start_transaction(root, 1);
3480 if (IS_ERR(trans)) {
3481 iput(inode);
3482 ret = PTR_ERR(trans);
3483 goto out;
3484 }
3485 ret = btrfs_orphan_add(trans, inode);
3486 btrfs_end_transaction(trans, root);
3487 if (ret) {
3488 iput(inode);
3489 goto out;
3490 }
3491
3492 ret = btrfs_truncate(inode);
3493 if (ret)
3494 btrfs_orphan_del(NULL, inode);
3495 } else {
3496 nr_unlink++;
3497 }
3498
3499
3500 iput(inode);
3501 if (ret)
3502 goto out;
3503 }
3504
3505 btrfs_release_path(path);
3506
3507 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3508
3509 if (root->orphan_block_rsv)
3510 btrfs_block_rsv_release(root, root->orphan_block_rsv,
3511 (u64)-1);
3512
3513 if (root->orphan_block_rsv ||
3514 test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3515 trans = btrfs_join_transaction(root);
3516 if (!IS_ERR(trans))
3517 btrfs_end_transaction(trans, root);
3518 }
3519
3520 if (nr_unlink)
3521 btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
3522 if (nr_truncate)
3523 btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
3524
3525out:
3526 if (ret)
3527 btrfs_err(root->fs_info,
3528 "could not do orphan cleanup %d", ret);
3529 btrfs_free_path(path);
3530 return ret;
3531}
3532
3533
3534
3535
3536
3537
3538
3539static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3540 int slot, u64 objectid,
3541 int *first_xattr_slot)
3542{
3543 u32 nritems = btrfs_header_nritems(leaf);
3544 struct btrfs_key found_key;
3545 static u64 xattr_access = 0;
3546 static u64 xattr_default = 0;
3547 int scanned = 0;
3548
3549 if (!xattr_access) {
3550 xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
3551 strlen(XATTR_NAME_POSIX_ACL_ACCESS));
3552 xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
3553 strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
3554 }
3555
3556 slot++;
3557 *first_xattr_slot = -1;
3558 while (slot < nritems) {
3559 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3560
3561
3562 if (found_key.objectid != objectid)
3563 return 0;
3564
3565
3566 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3567 if (*first_xattr_slot == -1)
3568 *first_xattr_slot = slot;
3569 if (found_key.offset == xattr_access ||
3570 found_key.offset == xattr_default)
3571 return 1;
3572 }
3573
3574
3575
3576
3577
3578 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3579 return 0;
3580
3581 slot++;
3582 scanned++;
3583
3584
3585
3586
3587
3588
3589
3590 if (scanned >= 8)
3591 break;
3592 }
3593
3594
3595
3596
3597 if (*first_xattr_slot == -1)
3598 *first_xattr_slot = slot;
3599 return 1;
3600}
3601
3602
3603
3604
3605static void btrfs_read_locked_inode(struct inode *inode)
3606{
3607 struct btrfs_path *path;
3608 struct extent_buffer *leaf;
3609 struct btrfs_inode_item *inode_item;
3610 struct btrfs_root *root = BTRFS_I(inode)->root;
3611 struct btrfs_key location;
3612 unsigned long ptr;
3613 int maybe_acls;
3614 u32 rdev;
3615 int ret;
3616 bool filled = false;
3617 int first_xattr_slot;
3618
3619 ret = btrfs_fill_inode(inode, &rdev);
3620 if (!ret)
3621 filled = true;
3622
3623 path = btrfs_alloc_path();
3624 if (!path)
3625 goto make_bad;
3626
3627 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3628
3629 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3630 if (ret)
3631 goto make_bad;
3632
3633 leaf = path->nodes[0];
3634
3635 if (filled)
3636 goto cache_index;
3637
3638 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3639 struct btrfs_inode_item);
3640 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3641 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3642 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3643 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3644 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
3645
3646 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
3647 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
3648
3649 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
3650 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
3651
3652 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
3653 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
3654
3655 BTRFS_I(inode)->i_otime.tv_sec =
3656 btrfs_timespec_sec(leaf, &inode_item->otime);
3657 BTRFS_I(inode)->i_otime.tv_nsec =
3658 btrfs_timespec_nsec(leaf, &inode_item->otime);
3659
3660 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3661 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3662 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3663
3664 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3665 inode->i_generation = BTRFS_I(inode)->generation;
3666 inode->i_rdev = 0;
3667 rdev = btrfs_inode_rdev(leaf, inode_item);
3668
3669 BTRFS_I(inode)->index_cnt = (u64)-1;
3670 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3671
3672cache_index:
3673
3674
3675
3676
3677
3678
3679
3680
3681
3682 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
3683 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3684 &BTRFS_I(inode)->runtime_flags);
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
3709
3710
3711
3712
3713 BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
3714
3715 path->slots[0]++;
3716 if (inode->i_nlink != 1 ||
3717 path->slots[0] >= btrfs_header_nritems(leaf))
3718 goto cache_acl;
3719
3720 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3721 if (location.objectid != btrfs_ino(inode))
3722 goto cache_acl;
3723
3724 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3725 if (location.type == BTRFS_INODE_REF_KEY) {
3726 struct btrfs_inode_ref *ref;
3727
3728 ref = (struct btrfs_inode_ref *)ptr;
3729 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3730 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3731 struct btrfs_inode_extref *extref;
3732
3733 extref = (struct btrfs_inode_extref *)ptr;
3734 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3735 extref);
3736 }
3737cache_acl:
3738
3739
3740
3741
3742 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3743 btrfs_ino(inode), &first_xattr_slot);
3744 if (first_xattr_slot != -1) {
3745 path->slots[0] = first_xattr_slot;
3746 ret = btrfs_load_inode_props(inode, path);
3747 if (ret)
3748 btrfs_err(root->fs_info,
3749 "error loading props for ino %llu (root %llu): %d",
3750 btrfs_ino(inode),
3751 root->root_key.objectid, ret);
3752 }
3753 btrfs_free_path(path);
3754
3755 if (!maybe_acls)
3756 cache_no_acl(inode);
3757
3758 switch (inode->i_mode & S_IFMT) {
3759 case S_IFREG:
3760 inode->i_mapping->a_ops = &btrfs_aops;
3761 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3762 inode->i_fop = &btrfs_file_operations;
3763 inode->i_op = &btrfs_file_inode_operations;
3764 break;
3765 case S_IFDIR:
3766 inode->i_fop = &btrfs_dir_file_operations;
3767 if (root == root->fs_info->tree_root)
3768 inode->i_op = &btrfs_dir_ro_inode_operations;
3769 else
3770 inode->i_op = &btrfs_dir_inode_operations;
3771 break;
3772 case S_IFLNK:
3773 inode->i_op = &btrfs_symlink_inode_operations;
3774 inode_nohighmem(inode);
3775 inode->i_mapping->a_ops = &btrfs_symlink_aops;
3776 break;
3777 default:
3778 inode->i_op = &btrfs_special_inode_operations;
3779 init_special_inode(inode, inode->i_mode, rdev);
3780 break;
3781 }
3782
3783 btrfs_update_iflags(inode);
3784 return;
3785
3786make_bad:
3787 btrfs_free_path(path);
3788 make_bad_inode(inode);
3789}
3790
3791
3792
3793
3794static void fill_inode_item(struct btrfs_trans_handle *trans,
3795 struct extent_buffer *leaf,
3796 struct btrfs_inode_item *item,
3797 struct inode *inode)
3798{
3799 struct btrfs_map_token token;
3800
3801 btrfs_init_map_token(&token);
3802
3803 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3804 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3805 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3806 &token);
3807 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3808 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3809
3810 btrfs_set_token_timespec_sec(leaf, &item->atime,
3811 inode->i_atime.tv_sec, &token);
3812 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3813 inode->i_atime.tv_nsec, &token);
3814
3815 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3816 inode->i_mtime.tv_sec, &token);
3817 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3818 inode->i_mtime.tv_nsec, &token);
3819
3820 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3821 inode->i_ctime.tv_sec, &token);
3822 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3823 inode->i_ctime.tv_nsec, &token);
3824
3825 btrfs_set_token_timespec_sec(leaf, &item->otime,
3826 BTRFS_I(inode)->i_otime.tv_sec, &token);
3827 btrfs_set_token_timespec_nsec(leaf, &item->otime,
3828 BTRFS_I(inode)->i_otime.tv_nsec, &token);
3829
3830 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3831 &token);
3832 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
3833 &token);
3834 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
3835 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3836 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3837 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3838 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
3839}
3840
3841
3842
3843
3844static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3845 struct btrfs_root *root, struct inode *inode)
3846{
3847 struct btrfs_inode_item *inode_item;
3848 struct btrfs_path *path;
3849 struct extent_buffer *leaf;
3850 int ret;
3851
3852 path = btrfs_alloc_path();
3853 if (!path)
3854 return -ENOMEM;
3855
3856 path->leave_spinning = 1;
3857 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3858 1);
3859 if (ret) {
3860 if (ret > 0)
3861 ret = -ENOENT;
3862 goto failed;
3863 }
3864
3865 leaf = path->nodes[0];
3866 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3867 struct btrfs_inode_item);
3868
3869 fill_inode_item(trans, leaf, inode_item, inode);
3870 btrfs_mark_buffer_dirty(leaf);
3871 btrfs_set_inode_last_trans(trans, inode);
3872 ret = 0;
3873failed:
3874 btrfs_free_path(path);
3875 return ret;
3876}
3877
3878
3879
3880
3881noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3882 struct btrfs_root *root, struct inode *inode)
3883{
3884 int ret;
3885
3886
3887
3888
3889
3890
3891
3892
3893 if (!btrfs_is_free_space_inode(inode)
3894 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3895 && !root->fs_info->log_root_recovering) {
3896 btrfs_update_root_times(trans, root);
3897
3898 ret = btrfs_delayed_update_inode(trans, root, inode);
3899 if (!ret)
3900 btrfs_set_inode_last_trans(trans, inode);
3901 return ret;
3902 }
3903
3904 return btrfs_update_inode_item(trans, root, inode);
3905}
3906
3907noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3908 struct btrfs_root *root,
3909 struct inode *inode)
3910{
3911 int ret;
3912
3913 ret = btrfs_update_inode(trans, root, inode);
3914 if (ret == -ENOSPC)
3915 return btrfs_update_inode_item(trans, root, inode);
3916 return ret;
3917}
3918
3919
3920
3921
3922
3923
3924static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3925 struct btrfs_root *root,
3926 struct inode *dir, struct inode *inode,
3927 const char *name, int name_len)
3928{
3929 struct btrfs_path *path;
3930 int ret = 0;
3931 struct extent_buffer *leaf;
3932 struct btrfs_dir_item *di;
3933 struct btrfs_key key;
3934 u64 index;
3935 u64 ino = btrfs_ino(inode);
3936 u64 dir_ino = btrfs_ino(dir);
3937
3938 path = btrfs_alloc_path();
3939 if (!path) {
3940 ret = -ENOMEM;
3941 goto out;
3942 }
3943
3944 path->leave_spinning = 1;
3945 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3946 name, name_len, -1);
3947 if (IS_ERR(di)) {
3948 ret = PTR_ERR(di);
3949 goto err;
3950 }
3951 if (!di) {
3952 ret = -ENOENT;
3953 goto err;
3954 }
3955 leaf = path->nodes[0];
3956 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3957 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3958 if (ret)
3959 goto err;
3960 btrfs_release_path(path);
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972 if (BTRFS_I(inode)->dir_index) {
3973 ret = btrfs_delayed_delete_inode_ref(inode);
3974 if (!ret) {
3975 index = BTRFS_I(inode)->dir_index;
3976 goto skip_backref;
3977 }
3978 }
3979
3980 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3981 dir_ino, &index);
3982 if (ret) {
3983 btrfs_info(root->fs_info,
3984 "failed to delete reference to %.*s, inode %llu parent %llu",
3985 name_len, name, ino, dir_ino);
3986 btrfs_abort_transaction(trans, root, ret);
3987 goto err;
3988 }
3989skip_backref:
3990 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3991 if (ret) {
3992 btrfs_abort_transaction(trans, root, ret);
3993 goto err;
3994 }
3995
3996 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
3997 inode, dir_ino);
3998 if (ret != 0 && ret != -ENOENT) {
3999 btrfs_abort_transaction(trans, root, ret);
4000 goto err;
4001 }
4002
4003 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
4004 dir, index);
4005 if (ret == -ENOENT)
4006 ret = 0;
4007 else if (ret)
4008 btrfs_abort_transaction(trans, root, ret);
4009err:
4010 btrfs_free_path(path);
4011 if (ret)
4012 goto out;
4013
4014 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4015 inode_inc_iversion(inode);
4016 inode_inc_iversion(dir);
4017 inode->i_ctime = dir->i_mtime =
4018 dir->i_ctime = current_fs_time(inode->i_sb);
4019 ret = btrfs_update_inode(trans, root, dir);
4020out:
4021 return ret;
4022}
4023
4024int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
4025 struct btrfs_root *root,
4026 struct inode *dir, struct inode *inode,
4027 const char *name, int name_len)
4028{
4029 int ret;
4030 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
4031 if (!ret) {
4032 drop_nlink(inode);
4033 ret = btrfs_update_inode(trans, root, inode);
4034 }
4035 return ret;
4036}
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
4047{
4048 struct btrfs_root *root = BTRFS_I(dir)->root;
4049
4050
4051
4052
4053
4054
4055
4056
4057 return btrfs_start_transaction_fallback_global_rsv(root, 5, 5);
4058}
4059
4060static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
4061{
4062 struct btrfs_root *root = BTRFS_I(dir)->root;
4063 struct btrfs_trans_handle *trans;
4064 struct inode *inode = d_inode(dentry);
4065 int ret;
4066
4067 trans = __unlink_start_trans(dir);
4068 if (IS_ERR(trans))
4069 return PTR_ERR(trans);
4070
4071 btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
4072
4073 ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
4074 dentry->d_name.name, dentry->d_name.len);
4075 if (ret)
4076 goto out;
4077
4078 if (inode->i_nlink == 0) {
4079 ret = btrfs_orphan_add(trans, inode);
4080 if (ret)
4081 goto out;
4082 }
4083
4084out:
4085 btrfs_end_transaction(trans, root);
4086 btrfs_btree_balance_dirty(root);
4087 return ret;
4088}
4089
4090int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
4091 struct btrfs_root *root,
4092 struct inode *dir, u64 objectid,
4093 const char *name, int name_len)
4094{
4095 struct btrfs_path *path;
4096 struct extent_buffer *leaf;
4097 struct btrfs_dir_item *di;
4098 struct btrfs_key key;
4099 u64 index;
4100 int ret;
4101 u64 dir_ino = btrfs_ino(dir);
4102
4103 path = btrfs_alloc_path();
4104 if (!path)
4105 return -ENOMEM;
4106
4107 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
4108 name, name_len, -1);
4109 if (IS_ERR_OR_NULL(di)) {
4110 if (!di)
4111 ret = -ENOENT;
4112 else
4113 ret = PTR_ERR(di);
4114 goto out;
4115 }
4116
4117 leaf = path->nodes[0];
4118 btrfs_dir_item_key_to_cpu(leaf, di, &key);
4119 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
4120 ret = btrfs_delete_one_dir_name(trans, root, path, di);
4121 if (ret) {
4122 btrfs_abort_transaction(trans, root, ret);
4123 goto out;
4124 }
4125 btrfs_release_path(path);
4126
4127 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
4128 objectid, root->root_key.objectid,
4129 dir_ino, &index, name, name_len);
4130 if (ret < 0) {
4131 if (ret != -ENOENT) {
4132 btrfs_abort_transaction(trans, root, ret);
4133 goto out;
4134 }
4135 di = btrfs_search_dir_index_item(root, path, dir_ino,
4136 name, name_len);
4137 if (IS_ERR_OR_NULL(di)) {
4138 if (!di)
4139 ret = -ENOENT;
4140 else
4141 ret = PTR_ERR(di);
4142 btrfs_abort_transaction(trans, root, ret);
4143 goto out;
4144 }
4145
4146 leaf = path->nodes[0];
4147 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4148 btrfs_release_path(path);
4149 index = key.offset;
4150 }
4151 btrfs_release_path(path);
4152
4153 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
4154 if (ret) {
4155 btrfs_abort_transaction(trans, root, ret);
4156 goto out;
4157 }
4158
4159 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4160 inode_inc_iversion(dir);
4161 dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
4162 ret = btrfs_update_inode_fallback(trans, root, dir);
4163 if (ret)
4164 btrfs_abort_transaction(trans, root, ret);
4165out:
4166 btrfs_free_path(path);
4167 return ret;
4168}
4169
4170static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4171{
4172 struct inode *inode = d_inode(dentry);
4173 int err = 0;
4174 struct btrfs_root *root = BTRFS_I(dir)->root;
4175 struct btrfs_trans_handle *trans;
4176
4177 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4178 return -ENOTEMPTY;
4179 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
4180 return -EPERM;
4181
4182 trans = __unlink_start_trans(dir);
4183 if (IS_ERR(trans))
4184 return PTR_ERR(trans);
4185
4186 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4187 err = btrfs_unlink_subvol(trans, root, dir,
4188 BTRFS_I(inode)->location.objectid,
4189 dentry->d_name.name,
4190 dentry->d_name.len);
4191 goto out;
4192 }
4193
4194 err = btrfs_orphan_add(trans, inode);
4195 if (err)
4196 goto out;
4197
4198
4199 err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
4200 dentry->d_name.name, dentry->d_name.len);
4201 if (!err)
4202 btrfs_i_size_write(inode, 0);
4203out:
4204 btrfs_end_transaction(trans, root);
4205 btrfs_btree_balance_dirty(root);
4206
4207 return err;
4208}
4209
4210static int truncate_space_check(struct btrfs_trans_handle *trans,
4211 struct btrfs_root *root,
4212 u64 bytes_deleted)
4213{
4214 int ret;
4215
4216
4217
4218
4219
4220 bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
4221 bytes_deleted *= root->nodesize;
4222 ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
4223 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
4224 if (!ret) {
4225 trace_btrfs_space_reservation(root->fs_info, "transaction",
4226 trans->transid,
4227 bytes_deleted, 1);
4228 trans->bytes_reserved += bytes_deleted;
4229 }
4230 return ret;
4231
4232}
4233
4234static int truncate_inline_extent(struct inode *inode,
4235 struct btrfs_path *path,
4236 struct btrfs_key *found_key,
4237 const u64 item_end,
4238 const u64 new_size)
4239{
4240 struct extent_buffer *leaf = path->nodes[0];
4241 int slot = path->slots[0];
4242 struct btrfs_file_extent_item *fi;
4243 u32 size = (u32)(new_size - found_key->offset);
4244 struct btrfs_root *root = BTRFS_I(inode)->root;
4245
4246 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
4247
4248 if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
4249 loff_t offset = new_size;
4250 loff_t page_end = ALIGN(offset, PAGE_SIZE);
4251
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261 btrfs_release_path(path);
4262 return btrfs_truncate_block(inode, offset, page_end - offset,
4263 0);
4264 }
4265
4266 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4267 size = btrfs_file_extent_calc_inline_size(size);
4268 btrfs_truncate_item(root, path, size, 1);
4269
4270 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4271 inode_sub_bytes(inode, item_end + 1 - new_size);
4272
4273 return 0;
4274}
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4288 struct btrfs_root *root,
4289 struct inode *inode,
4290 u64 new_size, u32 min_type)
4291{
4292 struct btrfs_path *path;
4293 struct extent_buffer *leaf;
4294 struct btrfs_file_extent_item *fi;
4295 struct btrfs_key key;
4296 struct btrfs_key found_key;
4297 u64 extent_start = 0;
4298 u64 extent_num_bytes = 0;
4299 u64 extent_offset = 0;
4300 u64 item_end = 0;
4301 u64 last_size = new_size;
4302 u32 found_type = (u8)-1;
4303 int found_extent;
4304 int del_item;
4305 int pending_del_nr = 0;
4306 int pending_del_slot = 0;
4307 int extent_type = -1;
4308 int ret;
4309 int err = 0;
4310 u64 ino = btrfs_ino(inode);
4311 u64 bytes_deleted = 0;
4312 bool be_nice = 0;
4313 bool should_throttle = 0;
4314 bool should_end = 0;
4315
4316 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4317
4318
4319
4320
4321
4322 if (!btrfs_is_free_space_inode(inode) &&
4323 test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4324 be_nice = 1;
4325
4326 path = btrfs_alloc_path();
4327 if (!path)
4328 return -ENOMEM;
4329 path->reada = READA_BACK;
4330
4331
4332
4333
4334
4335
4336 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4337 root == root->fs_info->tree_root)
4338 btrfs_drop_extent_cache(inode, ALIGN(new_size,
4339 root->sectorsize), (u64)-1, 0);
4340
4341
4342
4343
4344
4345
4346
4347 if (min_type == 0 && root == BTRFS_I(inode)->root)
4348 btrfs_kill_delayed_inode_items(inode);
4349
4350 key.objectid = ino;
4351 key.offset = (u64)-1;
4352 key.type = (u8)-1;
4353
4354search_again:
4355
4356
4357
4358
4359
4360 if (be_nice && bytes_deleted > SZ_32M) {
4361 if (btrfs_should_end_transaction(trans, root)) {
4362 err = -EAGAIN;
4363 goto error;
4364 }
4365 }
4366
4367
4368 path->leave_spinning = 1;
4369 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4370 if (ret < 0) {
4371 err = ret;
4372 goto out;
4373 }
4374
4375 if (ret > 0) {
4376
4377
4378
4379 if (path->slots[0] == 0)
4380 goto out;
4381 path->slots[0]--;
4382 }
4383
4384 while (1) {
4385 fi = NULL;
4386 leaf = path->nodes[0];
4387 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4388 found_type = found_key.type;
4389
4390 if (found_key.objectid != ino)
4391 break;
4392
4393 if (found_type < min_type)
4394 break;
4395
4396 item_end = found_key.offset;
4397 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4398 fi = btrfs_item_ptr(leaf, path->slots[0],
4399 struct btrfs_file_extent_item);
4400 extent_type = btrfs_file_extent_type(leaf, fi);
4401 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4402 item_end +=
4403 btrfs_file_extent_num_bytes(leaf, fi);
4404 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4405 item_end += btrfs_file_extent_inline_len(leaf,
4406 path->slots[0], fi);
4407 }
4408 item_end--;
4409 }
4410 if (found_type > min_type) {
4411 del_item = 1;
4412 } else {
4413 if (item_end < new_size)
4414 break;
4415 if (found_key.offset >= new_size)
4416 del_item = 1;
4417 else
4418 del_item = 0;
4419 }
4420 found_extent = 0;
4421
4422 if (found_type != BTRFS_EXTENT_DATA_KEY)
4423 goto delete;
4424
4425 if (del_item)
4426 last_size = found_key.offset;
4427 else
4428 last_size = new_size;
4429
4430 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4431 u64 num_dec;
4432 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4433 if (!del_item) {
4434 u64 orig_num_bytes =
4435 btrfs_file_extent_num_bytes(leaf, fi);
4436 extent_num_bytes = ALIGN(new_size -
4437 found_key.offset,
4438 root->sectorsize);
4439 btrfs_set_file_extent_num_bytes(leaf, fi,
4440 extent_num_bytes);
4441 num_dec = (orig_num_bytes -
4442 extent_num_bytes);
4443 if (test_bit(BTRFS_ROOT_REF_COWS,
4444 &root->state) &&
4445 extent_start != 0)
4446 inode_sub_bytes(inode, num_dec);
4447 btrfs_mark_buffer_dirty(leaf);
4448 } else {
4449 extent_num_bytes =
4450 btrfs_file_extent_disk_num_bytes(leaf,
4451 fi);
4452 extent_offset = found_key.offset -
4453 btrfs_file_extent_offset(leaf, fi);
4454
4455
4456 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4457 if (extent_start != 0) {
4458 found_extent = 1;
4459 if (test_bit(BTRFS_ROOT_REF_COWS,
4460 &root->state))
4461 inode_sub_bytes(inode, num_dec);
4462 }
4463 }
4464 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4465
4466
4467
4468
4469 if (!del_item &&
4470 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4471 btrfs_file_extent_other_encoding(leaf, fi) == 0) {
4472
4473
4474
4475
4476
4477
4478 if (btrfs_file_extent_compression(leaf, fi) !=
4479 BTRFS_COMPRESS_NONE && pending_del_nr) {
4480 err = btrfs_del_items(trans, root, path,
4481 pending_del_slot,
4482 pending_del_nr);
4483 if (err) {
4484 btrfs_abort_transaction(trans,
4485 root,
4486 err);
4487 goto error;
4488 }
4489 pending_del_nr = 0;
4490 }
4491
4492 err = truncate_inline_extent(inode, path,
4493 &found_key,
4494 item_end,
4495 new_size);
4496 if (err) {
4497 btrfs_abort_transaction(trans,
4498 root, err);
4499 goto error;
4500 }
4501 } else if (test_bit(BTRFS_ROOT_REF_COWS,
4502 &root->state)) {
4503 inode_sub_bytes(inode, item_end + 1 - new_size);
4504 }
4505 }
4506delete:
4507 if (del_item) {
4508 if (!pending_del_nr) {
4509
4510 pending_del_slot = path->slots[0];
4511 pending_del_nr = 1;
4512 } else if (pending_del_nr &&
4513 path->slots[0] + 1 == pending_del_slot) {
4514
4515 pending_del_nr++;
4516 pending_del_slot = path->slots[0];
4517 } else {
4518 BUG();
4519 }
4520 } else {
4521 break;
4522 }
4523 should_throttle = 0;
4524
4525 if (found_extent &&
4526 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4527 root == root->fs_info->tree_root)) {
4528 btrfs_set_path_blocking(path);
4529 bytes_deleted += extent_num_bytes;
4530 ret = btrfs_free_extent(trans, root, extent_start,
4531 extent_num_bytes, 0,
4532 btrfs_header_owner(leaf),
4533 ino, extent_offset);
4534 BUG_ON(ret);
4535 if (btrfs_should_throttle_delayed_refs(trans, root))
4536 btrfs_async_run_delayed_refs(root,
4537 trans->delayed_ref_updates * 2, 0);
4538 if (be_nice) {
4539 if (truncate_space_check(trans, root,
4540 extent_num_bytes)) {
4541 should_end = 1;
4542 }
4543 if (btrfs_should_throttle_delayed_refs(trans,
4544 root)) {
4545 should_throttle = 1;
4546 }
4547 }
4548 }
4549
4550 if (found_type == BTRFS_INODE_ITEM_KEY)
4551 break;
4552
4553 if (path->slots[0] == 0 ||
4554 path->slots[0] != pending_del_slot ||
4555 should_throttle || should_end) {
4556 if (pending_del_nr) {
4557 ret = btrfs_del_items(trans, root, path,
4558 pending_del_slot,
4559 pending_del_nr);
4560 if (ret) {
4561 btrfs_abort_transaction(trans,
4562 root, ret);
4563 goto error;
4564 }
4565 pending_del_nr = 0;
4566 }
4567 btrfs_release_path(path);
4568 if (should_throttle) {
4569 unsigned long updates = trans->delayed_ref_updates;
4570 if (updates) {
4571 trans->delayed_ref_updates = 0;
4572 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4573 if (ret && !err)
4574 err = ret;
4575 }
4576 }
4577
4578
4579
4580
4581 if (should_end) {
4582 err = -EAGAIN;
4583 goto error;
4584 }
4585 goto search_again;
4586 } else {
4587 path->slots[0]--;
4588 }
4589 }
4590out:
4591 if (pending_del_nr) {
4592 ret = btrfs_del_items(trans, root, path, pending_del_slot,
4593 pending_del_nr);
4594 if (ret)
4595 btrfs_abort_transaction(trans, root, ret);
4596 }
4597error:
4598 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4599 btrfs_ordered_update_i_size(inode, last_size, NULL);
4600
4601 btrfs_free_path(path);
4602
4603 if (be_nice && bytes_deleted > SZ_32M) {
4604 unsigned long updates = trans->delayed_ref_updates;
4605 if (updates) {
4606 trans->delayed_ref_updates = 0;
4607 ret = btrfs_run_delayed_refs(trans, root, updates * 2);
4608 if (ret && !err)
4609 err = ret;
4610 }
4611 }
4612 return err;
4613}
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624
4625
4626int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
4627 int front)
4628{
4629 struct address_space *mapping = inode->i_mapping;
4630 struct btrfs_root *root = BTRFS_I(inode)->root;
4631 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4632 struct btrfs_ordered_extent *ordered;
4633 struct extent_state *cached_state = NULL;
4634 char *kaddr;
4635 u32 blocksize = root->sectorsize;
4636 pgoff_t index = from >> PAGE_SHIFT;
4637 unsigned offset = from & (blocksize - 1);
4638 struct page *page;
4639 gfp_t mask = btrfs_alloc_write_mask(mapping);
4640 int ret = 0;
4641 u64 block_start;
4642 u64 block_end;
4643
4644 if ((offset & (blocksize - 1)) == 0 &&
4645 (!len || ((len & (blocksize - 1)) == 0)))
4646 goto out;
4647
4648 ret = btrfs_delalloc_reserve_space(inode,
4649 round_down(from, blocksize), blocksize);
4650 if (ret)
4651 goto out;
4652
4653again:
4654 page = find_or_create_page(mapping, index, mask);
4655 if (!page) {
4656 btrfs_delalloc_release_space(inode,
4657 round_down(from, blocksize),
4658 blocksize);
4659 ret = -ENOMEM;
4660 goto out;
4661 }
4662
4663 block_start = round_down(from, blocksize);
4664 block_end = block_start + blocksize - 1;
4665
4666 if (!PageUptodate(page)) {
4667 ret = btrfs_readpage(NULL, page);
4668 lock_page(page);
4669 if (page->mapping != mapping) {
4670 unlock_page(page);
4671 put_page(page);
4672 goto again;
4673 }
4674 if (!PageUptodate(page)) {
4675 ret = -EIO;
4676 goto out_unlock;
4677 }
4678 }
4679 wait_on_page_writeback(page);
4680
4681 lock_extent_bits(io_tree, block_start, block_end, &cached_state);
4682 set_page_extent_mapped(page);
4683
4684 ordered = btrfs_lookup_ordered_extent(inode, block_start);
4685 if (ordered) {
4686 unlock_extent_cached(io_tree, block_start, block_end,
4687 &cached_state, GFP_NOFS);
4688 unlock_page(page);
4689 put_page(page);
4690 btrfs_start_ordered_extent(inode, ordered, 1);
4691 btrfs_put_ordered_extent(ordered);
4692 goto again;
4693 }
4694
4695 clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
4696 EXTENT_DIRTY | EXTENT_DELALLOC |
4697 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4698 0, 0, &cached_state, GFP_NOFS);
4699
4700 ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
4701 &cached_state);
4702 if (ret) {
4703 unlock_extent_cached(io_tree, block_start, block_end,
4704 &cached_state, GFP_NOFS);
4705 goto out_unlock;
4706 }
4707
4708 if (offset != blocksize) {
4709 if (!len)
4710 len = blocksize - offset;
4711 kaddr = kmap(page);
4712 if (front)
4713 memset(kaddr + (block_start - page_offset(page)),
4714 0, offset);
4715 else
4716 memset(kaddr + (block_start - page_offset(page)) + offset,
4717 0, len);
4718 flush_dcache_page(page);
4719 kunmap(page);
4720 }
4721 ClearPageChecked(page);
4722 set_page_dirty(page);
4723 unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
4724 GFP_NOFS);
4725
4726out_unlock:
4727 if (ret)
4728 btrfs_delalloc_release_space(inode, block_start,
4729 blocksize);
4730 unlock_page(page);
4731 put_page(page);
4732out:
4733 return ret;
4734}
4735
4736static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4737 u64 offset, u64 len)
4738{
4739 struct btrfs_trans_handle *trans;
4740 int ret;
4741
4742
4743
4744
4745
4746 if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
4747 BTRFS_I(inode)->last_trans = root->fs_info->generation;
4748 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4749 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4750 return 0;
4751 }
4752
4753
4754
4755
4756
4757
4758 trans = btrfs_start_transaction(root, 3);
4759 if (IS_ERR(trans))
4760 return PTR_ERR(trans);
4761
4762 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4763 if (ret) {
4764 btrfs_abort_transaction(trans, root, ret);
4765 btrfs_end_transaction(trans, root);
4766 return ret;
4767 }
4768
4769 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
4770 0, 0, len, 0, len, 0, 0, 0);
4771 if (ret)
4772 btrfs_abort_transaction(trans, root, ret);
4773 else
4774 btrfs_update_inode(trans, root, inode);
4775 btrfs_end_transaction(trans, root);
4776 return ret;
4777}
4778
4779
4780
4781
4782
4783
4784
4785int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4786{
4787 struct btrfs_root *root = BTRFS_I(inode)->root;
4788 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4789 struct extent_map *em = NULL;
4790 struct extent_state *cached_state = NULL;
4791 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4792 u64 hole_start = ALIGN(oldsize, root->sectorsize);
4793 u64 block_end = ALIGN(size, root->sectorsize);
4794 u64 last_byte;
4795 u64 cur_offset;
4796 u64 hole_size;
4797 int err = 0;
4798
4799
4800
4801
4802
4803
4804 err = btrfs_truncate_block(inode, oldsize, 0, 0);
4805 if (err)
4806 return err;
4807
4808 if (size <= hole_start)
4809 return 0;
4810
4811 while (1) {
4812 struct btrfs_ordered_extent *ordered;
4813
4814 lock_extent_bits(io_tree, hole_start, block_end - 1,
4815 &cached_state);
4816 ordered = btrfs_lookup_ordered_range(inode, hole_start,
4817 block_end - hole_start);
4818 if (!ordered)
4819 break;
4820 unlock_extent_cached(io_tree, hole_start, block_end - 1,
4821 &cached_state, GFP_NOFS);
4822 btrfs_start_ordered_extent(inode, ordered, 1);
4823 btrfs_put_ordered_extent(ordered);
4824 }
4825
4826 cur_offset = hole_start;
4827 while (1) {
4828 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
4829 block_end - cur_offset, 0);
4830 if (IS_ERR(em)) {
4831 err = PTR_ERR(em);
4832 em = NULL;
4833 break;
4834 }
4835 last_byte = min(extent_map_end(em), block_end);
4836 last_byte = ALIGN(last_byte , root->sectorsize);
4837 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4838 struct extent_map *hole_em;
4839 hole_size = last_byte - cur_offset;
4840
4841 err = maybe_insert_hole(root, inode, cur_offset,
4842 hole_size);
4843 if (err)
4844 break;
4845 btrfs_drop_extent_cache(inode, cur_offset,
4846 cur_offset + hole_size - 1, 0);
4847 hole_em = alloc_extent_map();
4848 if (!hole_em) {
4849 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4850 &BTRFS_I(inode)->runtime_flags);
4851 goto next;
4852 }
4853 hole_em->start = cur_offset;
4854 hole_em->len = hole_size;
4855 hole_em->orig_start = cur_offset;
4856
4857 hole_em->block_start = EXTENT_MAP_HOLE;
4858 hole_em->block_len = 0;
4859 hole_em->orig_block_len = 0;
4860 hole_em->ram_bytes = hole_size;
4861 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4862 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4863 hole_em->generation = root->fs_info->generation;
4864
4865 while (1) {
4866 write_lock(&em_tree->lock);
4867 err = add_extent_mapping(em_tree, hole_em, 1);
4868 write_unlock(&em_tree->lock);
4869 if (err != -EEXIST)
4870 break;
4871 btrfs_drop_extent_cache(inode, cur_offset,
4872 cur_offset +
4873 hole_size - 1, 0);
4874 }
4875 free_extent_map(hole_em);
4876 }
4877next:
4878 free_extent_map(em);
4879 em = NULL;
4880 cur_offset = last_byte;
4881 if (cur_offset >= block_end)
4882 break;
4883 }
4884 free_extent_map(em);
4885 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
4886 GFP_NOFS);
4887 return err;
4888}
4889
4890static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4891{
4892 struct btrfs_root *root = BTRFS_I(inode)->root;
4893 struct btrfs_trans_handle *trans;
4894 loff_t oldsize = i_size_read(inode);
4895 loff_t newsize = attr->ia_size;
4896 int mask = attr->ia_valid;
4897 int ret;
4898
4899
4900
4901
4902
4903
4904
4905 if (newsize != oldsize) {
4906 inode_inc_iversion(inode);
4907 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
4908 inode->i_ctime = inode->i_mtime =
4909 current_fs_time(inode->i_sb);
4910 }
4911
4912 if (newsize > oldsize) {
4913
4914
4915
4916
4917
4918
4919
4920 btrfs_wait_for_snapshot_creation(root);
4921 ret = btrfs_cont_expand(inode, oldsize, newsize);
4922 if (ret) {
4923 btrfs_end_write_no_snapshoting(root);
4924 return ret;
4925 }
4926
4927 trans = btrfs_start_transaction(root, 1);
4928 if (IS_ERR(trans)) {
4929 btrfs_end_write_no_snapshoting(root);
4930 return PTR_ERR(trans);
4931 }
4932
4933 i_size_write(inode, newsize);
4934 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
4935 pagecache_isize_extended(inode, oldsize, newsize);
4936 ret = btrfs_update_inode(trans, root, inode);
4937 btrfs_end_write_no_snapshoting(root);
4938 btrfs_end_transaction(trans, root);
4939 } else {
4940
4941
4942
4943
4944
4945
4946 if (newsize == 0)
4947 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
4948 &BTRFS_I(inode)->runtime_flags);
4949
4950
4951
4952
4953
4954 trans = btrfs_start_transaction(root, 2);
4955 if (IS_ERR(trans))
4956 return PTR_ERR(trans);
4957
4958
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968 ret = btrfs_orphan_add(trans, inode);
4969 btrfs_end_transaction(trans, root);
4970 if (ret)
4971 return ret;
4972
4973
4974 truncate_setsize(inode, newsize);
4975
4976
4977 btrfs_inode_block_unlocked_dio(inode);
4978 inode_dio_wait(inode);
4979 btrfs_inode_resume_unlocked_dio(inode);
4980
4981 ret = btrfs_truncate(inode);
4982 if (ret && inode->i_nlink) {
4983 int err;
4984
4985
4986
4987
4988
4989
4990
4991 trans = btrfs_join_transaction(root);
4992 if (IS_ERR(trans)) {
4993 btrfs_orphan_del(NULL, inode);
4994 return ret;
4995 }
4996 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
4997 err = btrfs_orphan_del(trans, inode);
4998 if (err)
4999 btrfs_abort_transaction(trans, root, err);
5000 btrfs_end_transaction(trans, root);
5001 }
5002 }
5003
5004 return ret;
5005}
5006
5007static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
5008{
5009 struct inode *inode = d_inode(dentry);
5010 struct btrfs_root *root = BTRFS_I(inode)->root;
5011 int err;
5012
5013 if (btrfs_root_readonly(root))
5014 return -EROFS;
5015
5016 err = inode_change_ok(inode, attr);
5017 if (err)
5018 return err;
5019
5020 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
5021 err = btrfs_setsize(inode, attr);
5022 if (err)
5023 return err;
5024 }
5025
5026 if (attr->ia_valid) {
5027 setattr_copy(inode, attr);
5028 inode_inc_iversion(inode);
5029 err = btrfs_dirty_inode(inode);
5030
5031 if (!err && attr->ia_valid & ATTR_MODE)
5032 err = posix_acl_chmod(inode, inode->i_mode);
5033 }
5034
5035 return err;
5036}
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050static void evict_inode_truncate_pages(struct inode *inode)
5051{
5052 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5053 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
5054 struct rb_node *node;
5055
5056 ASSERT(inode->i_state & I_FREEING);
5057 truncate_inode_pages_final(&inode->i_data);
5058
5059 write_lock(&map_tree->lock);
5060 while (!RB_EMPTY_ROOT(&map_tree->map)) {
5061 struct extent_map *em;
5062
5063 node = rb_first(&map_tree->map);
5064 em = rb_entry(node, struct extent_map, rb_node);
5065 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
5066 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
5067 remove_extent_mapping(map_tree, em);
5068 free_extent_map(em);
5069 if (need_resched()) {
5070 write_unlock(&map_tree->lock);
5071 cond_resched();
5072 write_lock(&map_tree->lock);
5073 }
5074 }
5075 write_unlock(&map_tree->lock);
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093 spin_lock(&io_tree->lock);
5094 while (!RB_EMPTY_ROOT(&io_tree->state)) {
5095 struct extent_state *state;
5096 struct extent_state *cached_state = NULL;
5097 u64 start;
5098 u64 end;
5099
5100 node = rb_first(&io_tree->state);
5101 state = rb_entry(node, struct extent_state, rb_node);
5102 start = state->start;
5103 end = state->end;
5104 spin_unlock(&io_tree->lock);
5105
5106 lock_extent_bits(io_tree, start, end, &cached_state);
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116 if (state->state & EXTENT_DELALLOC)
5117 btrfs_qgroup_free_data(inode, start, end - start + 1);
5118
5119 clear_extent_bit(io_tree, start, end,
5120 EXTENT_LOCKED | EXTENT_DIRTY |
5121 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
5122 EXTENT_DEFRAG, 1, 1,
5123 &cached_state, GFP_NOFS);
5124
5125 cond_resched();
5126 spin_lock(&io_tree->lock);
5127 }
5128 spin_unlock(&io_tree->lock);
5129}
5130
5131void btrfs_evict_inode(struct inode *inode)
5132{
5133 struct btrfs_trans_handle *trans;
5134 struct btrfs_root *root = BTRFS_I(inode)->root;
5135 struct btrfs_block_rsv *rsv, *global_rsv;
5136 int steal_from_global = 0;
5137 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
5138 int ret;
5139
5140 trace_btrfs_inode_evict(inode);
5141
5142 evict_inode_truncate_pages(inode);
5143
5144 if (inode->i_nlink &&
5145 ((btrfs_root_refs(&root->root_item) != 0 &&
5146 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
5147 btrfs_is_free_space_inode(inode)))
5148 goto no_delete;
5149
5150 if (is_bad_inode(inode)) {
5151 btrfs_orphan_del(NULL, inode);
5152 goto no_delete;
5153 }
5154
5155 if (!special_file(inode->i_mode))
5156 btrfs_wait_ordered_range(inode, 0, (u64)-1);
5157
5158 btrfs_free_io_failure_record(inode, 0, (u64)-1);
5159
5160 if (root->fs_info->log_root_recovering) {
5161 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
5162 &BTRFS_I(inode)->runtime_flags));
5163 goto no_delete;
5164 }
5165
5166 if (inode->i_nlink > 0) {
5167 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
5168 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
5169 goto no_delete;
5170 }
5171
5172 ret = btrfs_commit_inode_delayed_inode(inode);
5173 if (ret) {
5174 btrfs_orphan_del(NULL, inode);
5175 goto no_delete;
5176 }
5177
5178 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
5179 if (!rsv) {
5180 btrfs_orphan_del(NULL, inode);
5181 goto no_delete;
5182 }
5183 rsv->size = min_size;
5184 rsv->failfast = 1;
5185 global_rsv = &root->fs_info->global_block_rsv;
5186
5187 btrfs_i_size_write(inode, 0);
5188
5189
5190
5191
5192
5193
5194
5195 while (1) {
5196 ret = btrfs_block_rsv_refill(root, rsv, min_size,
5197 BTRFS_RESERVE_FLUSH_LIMIT);
5198
5199
5200
5201
5202
5203
5204 if (ret)
5205 steal_from_global++;
5206 else
5207 steal_from_global = 0;
5208 ret = 0;
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218 if (steal_from_global > 2) {
5219 btrfs_warn(root->fs_info,
5220 "Could not get space for a delete, will truncate on mount %d",
5221 ret);
5222 btrfs_orphan_del(NULL, inode);
5223 btrfs_free_block_rsv(root, rsv);
5224 goto no_delete;
5225 }
5226
5227 trans = btrfs_join_transaction(root);
5228 if (IS_ERR(trans)) {
5229 btrfs_orphan_del(NULL, inode);
5230 btrfs_free_block_rsv(root, rsv);
5231 goto no_delete;
5232 }
5233
5234
5235
5236
5237
5238
5239 if (steal_from_global) {
5240 if (!btrfs_check_space_for_delayed_refs(trans, root))
5241 ret = btrfs_block_rsv_migrate(global_rsv, rsv,
5242 min_size);
5243 else
5244 ret = -ENOSPC;
5245 }
5246
5247
5248
5249
5250
5251
5252 if (ret) {
5253 ret = btrfs_commit_transaction(trans, root);
5254 if (ret) {
5255 btrfs_orphan_del(NULL, inode);
5256 btrfs_free_block_rsv(root, rsv);
5257 goto no_delete;
5258 }
5259 continue;
5260 } else {
5261 steal_from_global = 0;
5262 }
5263
5264 trans->block_rsv = rsv;
5265
5266 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
5267 if (ret != -ENOSPC && ret != -EAGAIN)
5268 break;
5269
5270 trans->block_rsv = &root->fs_info->trans_block_rsv;
5271 btrfs_end_transaction(trans, root);
5272 trans = NULL;
5273 btrfs_btree_balance_dirty(root);
5274 }
5275
5276 btrfs_free_block_rsv(root, rsv);
5277
5278
5279
5280
5281
5282 if (ret == 0) {
5283 trans->block_rsv = root->orphan_block_rsv;
5284 btrfs_orphan_del(trans, inode);
5285 } else {
5286 btrfs_orphan_del(NULL, inode);
5287 }
5288
5289 trans->block_rsv = &root->fs_info->trans_block_rsv;
5290 if (!(root == root->fs_info->tree_root ||
5291 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
5292 btrfs_return_ino(root, btrfs_ino(inode));
5293
5294 btrfs_end_transaction(trans, root);
5295 btrfs_btree_balance_dirty(root);
5296no_delete:
5297 btrfs_remove_delayed_node(inode);
5298 clear_inode(inode);
5299}
5300
5301
5302
5303
5304
5305static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
5306 struct btrfs_key *location)
5307{
5308 const char *name = dentry->d_name.name;
5309 int namelen = dentry->d_name.len;
5310 struct btrfs_dir_item *di;
5311 struct btrfs_path *path;
5312 struct btrfs_root *root = BTRFS_I(dir)->root;
5313 int ret = 0;
5314
5315 path = btrfs_alloc_path();
5316 if (!path)
5317 return -ENOMEM;
5318
5319 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
5320 namelen, 0);
5321 if (IS_ERR(di))
5322 ret = PTR_ERR(di);
5323
5324 if (IS_ERR_OR_NULL(di))
5325 goto out_err;
5326
5327 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
5328out:
5329 btrfs_free_path(path);
5330 return ret;
5331out_err:
5332 location->objectid = 0;
5333 goto out;
5334}
5335
5336
5337
5338
5339
5340
5341static int fixup_tree_root_location(struct btrfs_root *root,
5342 struct inode *dir,
5343 struct dentry *dentry,
5344 struct btrfs_key *location,
5345 struct btrfs_root **sub_root)
5346{
5347 struct btrfs_path *path;
5348 struct btrfs_root *new_root;
5349 struct btrfs_root_ref *ref;
5350 struct extent_buffer *leaf;
5351 struct btrfs_key key;
5352 int ret;
5353 int err = 0;
5354
5355 path = btrfs_alloc_path();
5356 if (!path) {
5357 err = -ENOMEM;
5358 goto out;
5359 }
5360
5361 err = -ENOENT;
5362 key.objectid = BTRFS_I(dir)->root->root_key.objectid;
5363 key.type = BTRFS_ROOT_REF_KEY;
5364 key.offset = location->objectid;
5365
5366 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
5367 0, 0);
5368 if (ret) {
5369 if (ret < 0)
5370 err = ret;
5371 goto out;
5372 }
5373
5374 leaf = path->nodes[0];
5375 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
5376 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
5377 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
5378 goto out;
5379
5380 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
5381 (unsigned long)(ref + 1),
5382 dentry->d_name.len);
5383 if (ret)
5384 goto out;
5385
5386 btrfs_release_path(path);
5387
5388 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
5389 if (IS_ERR(new_root)) {
5390 err = PTR_ERR(new_root);
5391 goto out;
5392 }
5393
5394 *sub_root = new_root;
5395 location->objectid = btrfs_root_dirid(&new_root->root_item);
5396 location->type = BTRFS_INODE_ITEM_KEY;
5397 location->offset = 0;
5398 err = 0;
5399out:
5400 btrfs_free_path(path);
5401 return err;
5402}
5403
5404static void inode_tree_add(struct inode *inode)
5405{
5406 struct btrfs_root *root = BTRFS_I(inode)->root;
5407 struct btrfs_inode *entry;
5408 struct rb_node **p;
5409 struct rb_node *parent;
5410 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5411 u64 ino = btrfs_ino(inode);
5412
5413 if (inode_unhashed(inode))
5414 return;
5415 parent = NULL;
5416 spin_lock(&root->inode_lock);
5417 p = &root->inode_tree.rb_node;
5418 while (*p) {
5419 parent = *p;
5420 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5421
5422 if (ino < btrfs_ino(&entry->vfs_inode))
5423 p = &parent->rb_left;
5424 else if (ino > btrfs_ino(&entry->vfs_inode))
5425 p = &parent->rb_right;
5426 else {
5427 WARN_ON(!(entry->vfs_inode.i_state &
5428 (I_WILL_FREE | I_FREEING)));
5429 rb_replace_node(parent, new, &root->inode_tree);
5430 RB_CLEAR_NODE(parent);
5431 spin_unlock(&root->inode_lock);
5432 return;
5433 }
5434 }
5435 rb_link_node(new, parent, p);
5436 rb_insert_color(new, &root->inode_tree);
5437 spin_unlock(&root->inode_lock);
5438}
5439
5440static void inode_tree_del(struct inode *inode)
5441{
5442 struct btrfs_root *root = BTRFS_I(inode)->root;
5443 int empty = 0;
5444
5445 spin_lock(&root->inode_lock);
5446 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5447 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5448 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5449 empty = RB_EMPTY_ROOT(&root->inode_tree);
5450 }
5451 spin_unlock(&root->inode_lock);
5452
5453 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5454 synchronize_srcu(&root->fs_info->subvol_srcu);
5455 spin_lock(&root->inode_lock);
5456 empty = RB_EMPTY_ROOT(&root->inode_tree);
5457 spin_unlock(&root->inode_lock);
5458 if (empty)
5459 btrfs_add_dead_root(root);
5460 }
5461}
5462
5463void btrfs_invalidate_inodes(struct btrfs_root *root)
5464{
5465 struct rb_node *node;
5466 struct rb_node *prev;
5467 struct btrfs_inode *entry;
5468 struct inode *inode;
5469 u64 objectid = 0;
5470
5471 if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
5472 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
5473
5474 spin_lock(&root->inode_lock);
5475again:
5476 node = root->inode_tree.rb_node;
5477 prev = NULL;
5478 while (node) {
5479 prev = node;
5480 entry = rb_entry(node, struct btrfs_inode, rb_node);
5481
5482 if (objectid < btrfs_ino(&entry->vfs_inode))
5483 node = node->rb_left;
5484 else if (objectid > btrfs_ino(&entry->vfs_inode))
5485 node = node->rb_right;
5486 else
5487 break;
5488 }
5489 if (!node) {
5490 while (prev) {
5491 entry = rb_entry(prev, struct btrfs_inode, rb_node);
5492 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
5493 node = prev;
5494 break;
5495 }
5496 prev = rb_next(prev);
5497 }
5498 }
5499 while (node) {
5500 entry = rb_entry(node, struct btrfs_inode, rb_node);
5501 objectid = btrfs_ino(&entry->vfs_inode) + 1;
5502 inode = igrab(&entry->vfs_inode);
5503 if (inode) {
5504 spin_unlock(&root->inode_lock);
5505 if (atomic_read(&inode->i_count) > 1)
5506 d_prune_aliases(inode);
5507
5508
5509
5510
5511
5512 iput(inode);
5513 cond_resched();
5514 spin_lock(&root->inode_lock);
5515 goto again;
5516 }
5517
5518 if (cond_resched_lock(&root->inode_lock))
5519 goto again;
5520
5521 node = rb_next(node);
5522 }
5523 spin_unlock(&root->inode_lock);
5524}
5525
5526static int btrfs_init_locked_inode(struct inode *inode, void *p)
5527{
5528 struct btrfs_iget_args *args = p;
5529 inode->i_ino = args->location->objectid;
5530 memcpy(&BTRFS_I(inode)->location, args->location,
5531 sizeof(*args->location));
5532 BTRFS_I(inode)->root = args->root;
5533 return 0;
5534}
5535
5536static int btrfs_find_actor(struct inode *inode, void *opaque)
5537{
5538 struct btrfs_iget_args *args = opaque;
5539 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
5540 args->root == BTRFS_I(inode)->root;
5541}
5542
5543static struct inode *btrfs_iget_locked(struct super_block *s,
5544 struct btrfs_key *location,
5545 struct btrfs_root *root)
5546{
5547 struct inode *inode;
5548 struct btrfs_iget_args args;
5549 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
5550
5551 args.location = location;
5552 args.root = root;
5553
5554 inode = iget5_locked(s, hashval, btrfs_find_actor,
5555 btrfs_init_locked_inode,
5556 (void *)&args);
5557 return inode;
5558}
5559
5560
5561
5562
5563struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5564 struct btrfs_root *root, int *new)
5565{
5566 struct inode *inode;
5567
5568 inode = btrfs_iget_locked(s, location, root);
5569 if (!inode)
5570 return ERR_PTR(-ENOMEM);
5571
5572 if (inode->i_state & I_NEW) {
5573 btrfs_read_locked_inode(inode);
5574 if (!is_bad_inode(inode)) {
5575 inode_tree_add(inode);
5576 unlock_new_inode(inode);
5577 if (new)
5578 *new = 1;
5579 } else {
5580 unlock_new_inode(inode);
5581 iput(inode);
5582 inode = ERR_PTR(-ESTALE);
5583 }
5584 }
5585
5586 return inode;
5587}
5588
5589static struct inode *new_simple_dir(struct super_block *s,
5590 struct btrfs_key *key,
5591 struct btrfs_root *root)
5592{
5593 struct inode *inode = new_inode(s);
5594
5595 if (!inode)
5596 return ERR_PTR(-ENOMEM);
5597
5598 BTRFS_I(inode)->root = root;
5599 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5600 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5601
5602 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5603 inode->i_op = &btrfs_dir_ro_inode_operations;
5604 inode->i_fop = &simple_dir_operations;
5605 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5606 inode->i_mtime = current_fs_time(inode->i_sb);
5607 inode->i_atime = inode->i_mtime;
5608 inode->i_ctime = inode->i_mtime;
5609 BTRFS_I(inode)->i_otime = inode->i_mtime;
5610
5611 return inode;
5612}
5613
5614struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5615{
5616 struct inode *inode;
5617 struct btrfs_root *root = BTRFS_I(dir)->root;
5618 struct btrfs_root *sub_root = root;
5619 struct btrfs_key location;
5620 int index;
5621 int ret = 0;
5622
5623 if (dentry->d_name.len > BTRFS_NAME_LEN)
5624 return ERR_PTR(-ENAMETOOLONG);
5625
5626 ret = btrfs_inode_by_name(dir, dentry, &location);
5627 if (ret < 0)
5628 return ERR_PTR(ret);
5629
5630 if (location.objectid == 0)
5631 return ERR_PTR(-ENOENT);
5632
5633 if (location.type == BTRFS_INODE_ITEM_KEY) {
5634 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5635 return inode;
5636 }
5637
5638 BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
5639
5640 index = srcu_read_lock(&root->fs_info->subvol_srcu);
5641 ret = fixup_tree_root_location(root, dir, dentry,
5642 &location, &sub_root);
5643 if (ret < 0) {
5644 if (ret != -ENOENT)
5645 inode = ERR_PTR(ret);
5646 else
5647 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5648 } else {
5649 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
5650 }
5651 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
5652
5653 if (!IS_ERR(inode) && root != sub_root) {
5654 down_read(&root->fs_info->cleanup_work_sem);
5655 if (!(inode->i_sb->s_flags & MS_RDONLY))
5656 ret = btrfs_orphan_cleanup(sub_root);
5657 up_read(&root->fs_info->cleanup_work_sem);
5658 if (ret) {
5659 iput(inode);
5660 inode = ERR_PTR(ret);
5661 }
5662 }
5663
5664 return inode;
5665}
5666
5667static int btrfs_dentry_delete(const struct dentry *dentry)
5668{
5669 struct btrfs_root *root;
5670 struct inode *inode = d_inode(dentry);
5671
5672 if (!inode && !IS_ROOT(dentry))
5673 inode = d_inode(dentry->d_parent);
5674
5675 if (inode) {
5676 root = BTRFS_I(inode)->root;
5677 if (btrfs_root_refs(&root->root_item) == 0)
5678 return 1;
5679
5680 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5681 return 1;
5682 }
5683 return 0;
5684}
5685
5686static void btrfs_dentry_release(struct dentry *dentry)
5687{
5688 kfree(dentry->d_fsdata);
5689}
5690
5691static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5692 unsigned int flags)
5693{
5694 struct inode *inode;
5695
5696 inode = btrfs_lookup_dentry(dir, dentry);
5697 if (IS_ERR(inode)) {
5698 if (PTR_ERR(inode) == -ENOENT)
5699 inode = NULL;
5700 else
5701 return ERR_CAST(inode);
5702 }
5703
5704 return d_splice_alias(inode, dentry);
5705}
5706
5707unsigned char btrfs_filetype_table[] = {
5708 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5709};
5710
5711static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5712{
5713 struct inode *inode = file_inode(file);
5714 struct btrfs_root *root = BTRFS_I(inode)->root;
5715 struct btrfs_item *item;
5716 struct btrfs_dir_item *di;
5717 struct btrfs_key key;
5718 struct btrfs_key found_key;
5719 struct btrfs_path *path;
5720 struct list_head ins_list;
5721 struct list_head del_list;
5722 int ret;
5723 struct extent_buffer *leaf;
5724 int slot;
5725 unsigned char d_type;
5726 int over = 0;
5727 u32 di_cur;
5728 u32 di_total;
5729 u32 di_len;
5730 int key_type = BTRFS_DIR_INDEX_KEY;
5731 char tmp_name[32];
5732 char *name_ptr;
5733 int name_len;
5734 int is_curr = 0;
5735 bool emitted;
5736
5737
5738 if (root->fs_info->tree_root == root)
5739 key_type = BTRFS_DIR_ITEM_KEY;
5740
5741 if (!dir_emit_dots(file, ctx))
5742 return 0;
5743
5744 path = btrfs_alloc_path();
5745 if (!path)
5746 return -ENOMEM;
5747
5748 path->reada = READA_FORWARD;
5749
5750 if (key_type == BTRFS_DIR_INDEX_KEY) {
5751 INIT_LIST_HEAD(&ins_list);
5752 INIT_LIST_HEAD(&del_list);
5753 btrfs_get_delayed_items(inode, &ins_list, &del_list);
5754 }
5755
5756 key.type = key_type;
5757 key.offset = ctx->pos;
5758 key.objectid = btrfs_ino(inode);
5759
5760 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5761 if (ret < 0)
5762 goto err;
5763
5764 emitted = false;
5765 while (1) {
5766 leaf = path->nodes[0];
5767 slot = path->slots[0];
5768 if (slot >= btrfs_header_nritems(leaf)) {
5769 ret = btrfs_next_leaf(root, path);
5770 if (ret < 0)
5771 goto err;
5772 else if (ret > 0)
5773 break;
5774 continue;
5775 }
5776
5777 item = btrfs_item_nr(slot);
5778 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5779
5780 if (found_key.objectid != key.objectid)
5781 break;
5782 if (found_key.type != key_type)
5783 break;
5784 if (found_key.offset < ctx->pos)
5785 goto next;
5786 if (key_type == BTRFS_DIR_INDEX_KEY &&
5787 btrfs_should_delete_dir_index(&del_list,
5788 found_key.offset))
5789 goto next;
5790
5791 ctx->pos = found_key.offset;
5792 is_curr = 1;
5793
5794 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5795 di_cur = 0;
5796 di_total = btrfs_item_size(leaf, item);
5797
5798 while (di_cur < di_total) {
5799 struct btrfs_key location;
5800
5801 if (verify_dir_item(root, leaf, di))
5802 break;
5803
5804 name_len = btrfs_dir_name_len(leaf, di);
5805 if (name_len <= sizeof(tmp_name)) {
5806 name_ptr = tmp_name;
5807 } else {
5808 name_ptr = kmalloc(name_len, GFP_KERNEL);
5809 if (!name_ptr) {
5810 ret = -ENOMEM;
5811 goto err;
5812 }
5813 }
5814 read_extent_buffer(leaf, name_ptr,
5815 (unsigned long)(di + 1), name_len);
5816
5817 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
5818 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5819
5820
5821
5822
5823
5824
5825
5826
5827
5828
5829
5830 if (location.type == BTRFS_ROOT_ITEM_KEY &&
5831 location.objectid == root->root_key.objectid) {
5832 over = 0;
5833 goto skip;
5834 }
5835 over = !dir_emit(ctx, name_ptr, name_len,
5836 location.objectid, d_type);
5837
5838skip:
5839 if (name_ptr != tmp_name)
5840 kfree(name_ptr);
5841
5842 if (over)
5843 goto nopos;
5844 emitted = true;
5845 di_len = btrfs_dir_name_len(leaf, di) +
5846 btrfs_dir_data_len(leaf, di) + sizeof(*di);
5847 di_cur += di_len;
5848 di = (struct btrfs_dir_item *)((char *)di + di_len);
5849 }
5850next:
5851 path->slots[0]++;
5852 }
5853
5854 if (key_type == BTRFS_DIR_INDEX_KEY) {
5855 if (is_curr)
5856 ctx->pos++;
5857 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
5858 if (ret)
5859 goto nopos;
5860 }
5861
5862
5863
5864
5865
5866
5867
5868 if (ctx->pos > 2 && !emitted)
5869 goto nopos;
5870
5871
5872 ctx->pos++;
5873
5874
5875
5876
5877
5878
5879
5880
5881
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891 if (key_type == BTRFS_DIR_INDEX_KEY) {
5892 if (ctx->pos >= INT_MAX)
5893 ctx->pos = LLONG_MAX;
5894 else
5895 ctx->pos = INT_MAX;
5896 }
5897nopos:
5898 ret = 0;
5899err:
5900 if (key_type == BTRFS_DIR_INDEX_KEY)
5901 btrfs_put_delayed_items(&ins_list, &del_list);
5902 btrfs_free_path(path);
5903 return ret;
5904}
5905
5906int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
5907{
5908 struct btrfs_root *root = BTRFS_I(inode)->root;
5909 struct btrfs_trans_handle *trans;
5910 int ret = 0;
5911 bool nolock = false;
5912
5913 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
5914 return 0;
5915
5916 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
5917 nolock = true;
5918
5919 if (wbc->sync_mode == WB_SYNC_ALL) {
5920 if (nolock)
5921 trans = btrfs_join_transaction_nolock(root);
5922 else
5923 trans = btrfs_join_transaction(root);
5924 if (IS_ERR(trans))
5925 return PTR_ERR(trans);
5926 ret = btrfs_commit_transaction(trans, root);
5927 }
5928 return ret;
5929}
5930
5931
5932
5933
5934
5935
5936
5937static int btrfs_dirty_inode(struct inode *inode)
5938{
5939 struct btrfs_root *root = BTRFS_I(inode)->root;
5940 struct btrfs_trans_handle *trans;
5941 int ret;
5942
5943 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
5944 return 0;
5945
5946 trans = btrfs_join_transaction(root);
5947 if (IS_ERR(trans))
5948 return PTR_ERR(trans);
5949
5950 ret = btrfs_update_inode(trans, root, inode);
5951 if (ret && ret == -ENOSPC) {
5952
5953 btrfs_end_transaction(trans, root);
5954 trans = btrfs_start_transaction(root, 1);
5955 if (IS_ERR(trans))
5956 return PTR_ERR(trans);
5957
5958 ret = btrfs_update_inode(trans, root, inode);
5959 }
5960 btrfs_end_transaction(trans, root);
5961 if (BTRFS_I(inode)->delayed_node)
5962 btrfs_balance_delayed_items(root);
5963
5964 return ret;
5965}
5966
5967
5968
5969
5970
5971static int btrfs_update_time(struct inode *inode, struct timespec *now,
5972 int flags)
5973{
5974 struct btrfs_root *root = BTRFS_I(inode)->root;
5975
5976 if (btrfs_root_readonly(root))
5977 return -EROFS;
5978
5979 if (flags & S_VERSION)
5980 inode_inc_iversion(inode);
5981 if (flags & S_CTIME)
5982 inode->i_ctime = *now;
5983 if (flags & S_MTIME)
5984 inode->i_mtime = *now;
5985 if (flags & S_ATIME)
5986 inode->i_atime = *now;
5987 return btrfs_dirty_inode(inode);
5988}
5989
5990
5991
5992
5993
5994
5995static int btrfs_set_inode_index_count(struct inode *inode)
5996{
5997 struct btrfs_root *root = BTRFS_I(inode)->root;
5998 struct btrfs_key key, found_key;
5999 struct btrfs_path *path;
6000 struct extent_buffer *leaf;
6001 int ret;
6002
6003 key.objectid = btrfs_ino(inode);
6004 key.type = BTRFS_DIR_INDEX_KEY;
6005 key.offset = (u64)-1;
6006
6007 path = btrfs_alloc_path();
6008 if (!path)
6009 return -ENOMEM;
6010
6011 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6012 if (ret < 0)
6013 goto out;
6014
6015 if (ret == 0)
6016 goto out;
6017 ret = 0;
6018
6019
6020
6021
6022
6023
6024
6025 if (path->slots[0] == 0) {
6026 BTRFS_I(inode)->index_cnt = 2;
6027 goto out;
6028 }
6029
6030 path->slots[0]--;
6031
6032 leaf = path->nodes[0];
6033 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6034
6035 if (found_key.objectid != btrfs_ino(inode) ||
6036 found_key.type != BTRFS_DIR_INDEX_KEY) {
6037 BTRFS_I(inode)->index_cnt = 2;
6038 goto out;
6039 }
6040
6041 BTRFS_I(inode)->index_cnt = found_key.offset + 1;
6042out:
6043 btrfs_free_path(path);
6044 return ret;
6045}
6046
6047
6048
6049
6050
6051int btrfs_set_inode_index(struct inode *dir, u64 *index)
6052{
6053 int ret = 0;
6054
6055 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
6056 ret = btrfs_inode_delayed_dir_index_count(dir);
6057 if (ret) {
6058 ret = btrfs_set_inode_index_count(dir);
6059 if (ret)
6060 return ret;
6061 }
6062 }
6063
6064 *index = BTRFS_I(dir)->index_cnt;
6065 BTRFS_I(dir)->index_cnt++;
6066
6067 return ret;
6068}
6069
6070static int btrfs_insert_inode_locked(struct inode *inode)
6071{
6072 struct btrfs_iget_args args;
6073 args.location = &BTRFS_I(inode)->location;
6074 args.root = BTRFS_I(inode)->root;
6075
6076 return insert_inode_locked4(inode,
6077 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
6078 btrfs_find_actor, &args);
6079}
6080
6081static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
6082 struct btrfs_root *root,
6083 struct inode *dir,
6084 const char *name, int name_len,
6085 u64 ref_objectid, u64 objectid,
6086 umode_t mode, u64 *index)
6087{
6088 struct inode *inode;
6089 struct btrfs_inode_item *inode_item;
6090 struct btrfs_key *location;
6091 struct btrfs_path *path;
6092 struct btrfs_inode_ref *ref;
6093 struct btrfs_key key[2];
6094 u32 sizes[2];
6095 int nitems = name ? 2 : 1;
6096 unsigned long ptr;
6097 int ret;
6098
6099 path = btrfs_alloc_path();
6100 if (!path)
6101 return ERR_PTR(-ENOMEM);
6102
6103 inode = new_inode(root->fs_info->sb);
6104 if (!inode) {
6105 btrfs_free_path(path);
6106 return ERR_PTR(-ENOMEM);
6107 }
6108
6109
6110
6111
6112
6113 if (!name)
6114 set_nlink(inode, 0);
6115
6116
6117
6118
6119
6120 inode->i_ino = objectid;
6121
6122 if (dir && name) {
6123 trace_btrfs_inode_request(dir);
6124
6125 ret = btrfs_set_inode_index(dir, index);
6126 if (ret) {
6127 btrfs_free_path(path);
6128 iput(inode);
6129 return ERR_PTR(ret);
6130 }
6131 } else if (dir) {
6132 *index = 0;
6133 }
6134
6135
6136
6137
6138
6139 BTRFS_I(inode)->index_cnt = 2;
6140 BTRFS_I(inode)->dir_index = *index;
6141 BTRFS_I(inode)->root = root;
6142 BTRFS_I(inode)->generation = trans->transid;
6143 inode->i_generation = BTRFS_I(inode)->generation;
6144
6145
6146
6147
6148
6149
6150
6151 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
6152
6153 key[0].objectid = objectid;
6154 key[0].type = BTRFS_INODE_ITEM_KEY;
6155 key[0].offset = 0;
6156
6157 sizes[0] = sizeof(struct btrfs_inode_item);
6158
6159 if (name) {
6160
6161
6162
6163
6164
6165
6166 key[1].objectid = objectid;
6167 key[1].type = BTRFS_INODE_REF_KEY;
6168 key[1].offset = ref_objectid;
6169
6170 sizes[1] = name_len + sizeof(*ref);
6171 }
6172
6173 location = &BTRFS_I(inode)->location;
6174 location->objectid = objectid;
6175 location->offset = 0;
6176 location->type = BTRFS_INODE_ITEM_KEY;
6177
6178 ret = btrfs_insert_inode_locked(inode);
6179 if (ret < 0)
6180 goto fail;
6181
6182 path->leave_spinning = 1;
6183 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
6184 if (ret != 0)
6185 goto fail_unlock;
6186
6187 inode_init_owner(inode, dir, mode);
6188 inode_set_bytes(inode, 0);
6189
6190 inode->i_mtime = current_fs_time(inode->i_sb);
6191 inode->i_atime = inode->i_mtime;
6192 inode->i_ctime = inode->i_mtime;
6193 BTRFS_I(inode)->i_otime = inode->i_mtime;
6194
6195 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
6196 struct btrfs_inode_item);
6197 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
6198 sizeof(*inode_item));
6199 fill_inode_item(trans, path->nodes[0], inode_item, inode);
6200
6201 if (name) {
6202 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
6203 struct btrfs_inode_ref);
6204 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
6205 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
6206 ptr = (unsigned long)(ref + 1);
6207 write_extent_buffer(path->nodes[0], name, ptr, name_len);
6208 }
6209
6210 btrfs_mark_buffer_dirty(path->nodes[0]);
6211 btrfs_free_path(path);
6212
6213 btrfs_inherit_iflags(inode, dir);
6214
6215 if (S_ISREG(mode)) {
6216 if (btrfs_test_opt(root, NODATASUM))
6217 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
6218 if (btrfs_test_opt(root, NODATACOW))
6219 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
6220 BTRFS_INODE_NODATASUM;
6221 }
6222
6223 inode_tree_add(inode);
6224
6225 trace_btrfs_inode_new(inode);
6226 btrfs_set_inode_last_trans(trans, inode);
6227
6228 btrfs_update_root_times(trans, root);
6229
6230 ret = btrfs_inode_inherit_props(trans, inode, dir);
6231 if (ret)
6232 btrfs_err(root->fs_info,
6233 "error inheriting props for ino %llu (root %llu): %d",
6234 btrfs_ino(inode), root->root_key.objectid, ret);
6235
6236 return inode;
6237
6238fail_unlock:
6239 unlock_new_inode(inode);
6240fail:
6241 if (dir && name)
6242 BTRFS_I(dir)->index_cnt--;
6243 btrfs_free_path(path);
6244 iput(inode);
6245 return ERR_PTR(ret);
6246}
6247
6248static inline u8 btrfs_inode_type(struct inode *inode)
6249{
6250 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
6251}
6252
6253
6254
6255
6256
6257
6258
6259int btrfs_add_link(struct btrfs_trans_handle *trans,
6260 struct inode *parent_inode, struct inode *inode,
6261 const char *name, int name_len, int add_backref, u64 index)
6262{
6263 int ret = 0;
6264 struct btrfs_key key;
6265 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
6266 u64 ino = btrfs_ino(inode);
6267 u64 parent_ino = btrfs_ino(parent_inode);
6268
6269 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6270 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
6271 } else {
6272 key.objectid = ino;
6273 key.type = BTRFS_INODE_ITEM_KEY;
6274 key.offset = 0;
6275 }
6276
6277 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6278 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
6279 key.objectid, root->root_key.objectid,
6280 parent_ino, index, name, name_len);
6281 } else if (add_backref) {
6282 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
6283 parent_ino, index);
6284 }
6285
6286
6287 if (ret)
6288 return ret;
6289
6290 ret = btrfs_insert_dir_item(trans, root, name, name_len,
6291 parent_inode, &key,
6292 btrfs_inode_type(inode), index);
6293 if (ret == -EEXIST || ret == -EOVERFLOW)
6294 goto fail_dir_item;
6295 else if (ret) {
6296 btrfs_abort_transaction(trans, root, ret);
6297 return ret;
6298 }
6299
6300 btrfs_i_size_write(parent_inode, parent_inode->i_size +
6301 name_len * 2);
6302 inode_inc_iversion(parent_inode);
6303 parent_inode->i_mtime = parent_inode->i_ctime =
6304 current_fs_time(parent_inode->i_sb);
6305 ret = btrfs_update_inode(trans, root, parent_inode);
6306 if (ret)
6307 btrfs_abort_transaction(trans, root, ret);
6308 return ret;
6309
6310fail_dir_item:
6311 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
6312 u64 local_index;
6313 int err;
6314 err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
6315 key.objectid, root->root_key.objectid,
6316 parent_ino, &local_index, name, name_len);
6317
6318 } else if (add_backref) {
6319 u64 local_index;
6320 int err;
6321
6322 err = btrfs_del_inode_ref(trans, root, name, name_len,
6323 ino, parent_ino, &local_index);
6324 }
6325 return ret;
6326}
6327
6328static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
6329 struct inode *dir, struct dentry *dentry,
6330 struct inode *inode, int backref, u64 index)
6331{
6332 int err = btrfs_add_link(trans, dir, inode,
6333 dentry->d_name.name, dentry->d_name.len,
6334 backref, index);
6335 if (err > 0)
6336 err = -EEXIST;
6337 return err;
6338}
6339
6340static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
6341 umode_t mode, dev_t rdev)
6342{
6343 struct btrfs_trans_handle *trans;
6344 struct btrfs_root *root = BTRFS_I(dir)->root;
6345 struct inode *inode = NULL;
6346 int err;
6347 int drop_inode = 0;
6348 u64 objectid;
6349 u64 index = 0;
6350
6351
6352
6353
6354
6355
6356 trans = btrfs_start_transaction(root, 5);
6357 if (IS_ERR(trans))
6358 return PTR_ERR(trans);
6359
6360 err = btrfs_find_free_ino(root, &objectid);
6361 if (err)
6362 goto out_unlock;
6363
6364 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6365 dentry->d_name.len, btrfs_ino(dir), objectid,
6366 mode, &index);
6367 if (IS_ERR(inode)) {
6368 err = PTR_ERR(inode);
6369 goto out_unlock;
6370 }
6371
6372
6373
6374
6375
6376
6377
6378 inode->i_op = &btrfs_special_inode_operations;
6379 init_special_inode(inode, inode->i_mode, rdev);
6380
6381 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6382 if (err)
6383 goto out_unlock_inode;
6384
6385 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6386 if (err) {
6387 goto out_unlock_inode;
6388 } else {
6389 btrfs_update_inode(trans, root, inode);
6390 unlock_new_inode(inode);
6391 d_instantiate(dentry, inode);
6392 }
6393
6394out_unlock:
6395 btrfs_end_transaction(trans, root);
6396 btrfs_balance_delayed_items(root);
6397 btrfs_btree_balance_dirty(root);
6398 if (drop_inode) {
6399 inode_dec_link_count(inode);
6400 iput(inode);
6401 }
6402 return err;
6403
6404out_unlock_inode:
6405 drop_inode = 1;
6406 unlock_new_inode(inode);
6407 goto out_unlock;
6408
6409}
6410
6411static int btrfs_create(struct inode *dir, struct dentry *dentry,
6412 umode_t mode, bool excl)
6413{
6414 struct btrfs_trans_handle *trans;
6415 struct btrfs_root *root = BTRFS_I(dir)->root;
6416 struct inode *inode = NULL;
6417 int drop_inode_on_err = 0;
6418 int err;
6419 u64 objectid;
6420 u64 index = 0;
6421
6422
6423
6424
6425
6426
6427 trans = btrfs_start_transaction(root, 5);
6428 if (IS_ERR(trans))
6429 return PTR_ERR(trans);
6430
6431 err = btrfs_find_free_ino(root, &objectid);
6432 if (err)
6433 goto out_unlock;
6434
6435 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6436 dentry->d_name.len, btrfs_ino(dir), objectid,
6437 mode, &index);
6438 if (IS_ERR(inode)) {
6439 err = PTR_ERR(inode);
6440 goto out_unlock;
6441 }
6442 drop_inode_on_err = 1;
6443
6444
6445
6446
6447
6448
6449 inode->i_fop = &btrfs_file_operations;
6450 inode->i_op = &btrfs_file_inode_operations;
6451 inode->i_mapping->a_ops = &btrfs_aops;
6452
6453 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6454 if (err)
6455 goto out_unlock_inode;
6456
6457 err = btrfs_update_inode(trans, root, inode);
6458 if (err)
6459 goto out_unlock_inode;
6460
6461 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6462 if (err)
6463 goto out_unlock_inode;
6464
6465 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6466 unlock_new_inode(inode);
6467 d_instantiate(dentry, inode);
6468
6469out_unlock:
6470 btrfs_end_transaction(trans, root);
6471 if (err && drop_inode_on_err) {
6472 inode_dec_link_count(inode);
6473 iput(inode);
6474 }
6475 btrfs_balance_delayed_items(root);
6476 btrfs_btree_balance_dirty(root);
6477 return err;
6478
6479out_unlock_inode:
6480 unlock_new_inode(inode);
6481 goto out_unlock;
6482
6483}
6484
6485static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6486 struct dentry *dentry)
6487{
6488 struct btrfs_trans_handle *trans = NULL;
6489 struct btrfs_root *root = BTRFS_I(dir)->root;
6490 struct inode *inode = d_inode(old_dentry);
6491 u64 index;
6492 int err;
6493 int drop_inode = 0;
6494
6495
6496 if (root->objectid != BTRFS_I(inode)->root->objectid)
6497 return -EXDEV;
6498
6499 if (inode->i_nlink >= BTRFS_LINK_MAX)
6500 return -EMLINK;
6501
6502 err = btrfs_set_inode_index(dir, &index);
6503 if (err)
6504 goto fail;
6505
6506
6507
6508
6509
6510
6511 trans = btrfs_start_transaction(root, 5);
6512 if (IS_ERR(trans)) {
6513 err = PTR_ERR(trans);
6514 trans = NULL;
6515 goto fail;
6516 }
6517
6518
6519 BTRFS_I(inode)->dir_index = 0ULL;
6520 inc_nlink(inode);
6521 inode_inc_iversion(inode);
6522 inode->i_ctime = current_fs_time(inode->i_sb);
6523 ihold(inode);
6524 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6525
6526 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
6527
6528 if (err) {
6529 drop_inode = 1;
6530 } else {
6531 struct dentry *parent = dentry->d_parent;
6532 err = btrfs_update_inode(trans, root, inode);
6533 if (err)
6534 goto fail;
6535 if (inode->i_nlink == 1) {
6536
6537
6538
6539
6540 err = btrfs_orphan_del(trans, inode);
6541 if (err)
6542 goto fail;
6543 }
6544 d_instantiate(dentry, inode);
6545 btrfs_log_new_name(trans, inode, NULL, parent);
6546 }
6547
6548 btrfs_balance_delayed_items(root);
6549fail:
6550 if (trans)
6551 btrfs_end_transaction(trans, root);
6552 if (drop_inode) {
6553 inode_dec_link_count(inode);
6554 iput(inode);
6555 }
6556 btrfs_btree_balance_dirty(root);
6557 return err;
6558}
6559
6560static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6561{
6562 struct inode *inode = NULL;
6563 struct btrfs_trans_handle *trans;
6564 struct btrfs_root *root = BTRFS_I(dir)->root;
6565 int err = 0;
6566 int drop_on_err = 0;
6567 u64 objectid = 0;
6568 u64 index = 0;
6569
6570
6571
6572
6573
6574
6575 trans = btrfs_start_transaction(root, 5);
6576 if (IS_ERR(trans))
6577 return PTR_ERR(trans);
6578
6579 err = btrfs_find_free_ino(root, &objectid);
6580 if (err)
6581 goto out_fail;
6582
6583 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6584 dentry->d_name.len, btrfs_ino(dir), objectid,
6585 S_IFDIR | mode, &index);
6586 if (IS_ERR(inode)) {
6587 err = PTR_ERR(inode);
6588 goto out_fail;
6589 }
6590
6591 drop_on_err = 1;
6592
6593 inode->i_op = &btrfs_dir_inode_operations;
6594 inode->i_fop = &btrfs_dir_file_operations;
6595
6596 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6597 if (err)
6598 goto out_fail_inode;
6599
6600 btrfs_i_size_write(inode, 0);
6601 err = btrfs_update_inode(trans, root, inode);
6602 if (err)
6603 goto out_fail_inode;
6604
6605 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
6606 dentry->d_name.len, 0, index);
6607 if (err)
6608 goto out_fail_inode;
6609
6610 d_instantiate(dentry, inode);
6611
6612
6613
6614
6615 unlock_new_inode(inode);
6616 drop_on_err = 0;
6617
6618out_fail:
6619 btrfs_end_transaction(trans, root);
6620 if (drop_on_err) {
6621 inode_dec_link_count(inode);
6622 iput(inode);
6623 }
6624 btrfs_balance_delayed_items(root);
6625 btrfs_btree_balance_dirty(root);
6626 return err;
6627
6628out_fail_inode:
6629 unlock_new_inode(inode);
6630 goto out_fail;
6631}
6632
6633
6634static struct extent_map *next_extent_map(struct extent_map *em)
6635{
6636 struct rb_node *next;
6637
6638 next = rb_next(&em->rb_node);
6639 if (!next)
6640 return NULL;
6641 return container_of(next, struct extent_map, rb_node);
6642}
6643
6644static struct extent_map *prev_extent_map(struct extent_map *em)
6645{
6646 struct rb_node *prev;
6647
6648 prev = rb_prev(&em->rb_node);
6649 if (!prev)
6650 return NULL;
6651 return container_of(prev, struct extent_map, rb_node);
6652}
6653
6654
6655
6656
6657
6658
6659static int merge_extent_mapping(struct extent_map_tree *em_tree,
6660 struct extent_map *existing,
6661 struct extent_map *em,
6662 u64 map_start)
6663{
6664 struct extent_map *prev;
6665 struct extent_map *next;
6666 u64 start;
6667 u64 end;
6668 u64 start_diff;
6669
6670 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6671
6672 if (existing->start > map_start) {
6673 next = existing;
6674 prev = prev_extent_map(next);
6675 } else {
6676 prev = existing;
6677 next = next_extent_map(prev);
6678 }
6679
6680 start = prev ? extent_map_end(prev) : em->start;
6681 start = max_t(u64, start, em->start);
6682 end = next ? next->start : extent_map_end(em);
6683 end = min_t(u64, end, extent_map_end(em));
6684 start_diff = start - em->start;
6685 em->start = start;
6686 em->len = end - start;
6687 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6688 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6689 em->block_start += start_diff;
6690 em->block_len -= start_diff;
6691 }
6692 return add_extent_mapping(em_tree, em, 0);
6693}
6694
6695static noinline int uncompress_inline(struct btrfs_path *path,
6696 struct page *page,
6697 size_t pg_offset, u64 extent_offset,
6698 struct btrfs_file_extent_item *item)
6699{
6700 int ret;
6701 struct extent_buffer *leaf = path->nodes[0];
6702 char *tmp;
6703 size_t max_size;
6704 unsigned long inline_size;
6705 unsigned long ptr;
6706 int compress_type;
6707
6708 WARN_ON(pg_offset != 0);
6709 compress_type = btrfs_file_extent_compression(leaf, item);
6710 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6711 inline_size = btrfs_file_extent_inline_item_len(leaf,
6712 btrfs_item_nr(path->slots[0]));
6713 tmp = kmalloc(inline_size, GFP_NOFS);
6714 if (!tmp)
6715 return -ENOMEM;
6716 ptr = btrfs_file_extent_inline_start(item);
6717
6718 read_extent_buffer(leaf, tmp, ptr, inline_size);
6719
6720 max_size = min_t(unsigned long, PAGE_SIZE, max_size);
6721 ret = btrfs_decompress(compress_type, tmp, page,
6722 extent_offset, inline_size, max_size);
6723 kfree(tmp);
6724 return ret;
6725}
6726
6727
6728
6729
6730
6731
6732
6733
6734
6735
6736struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
6737 size_t pg_offset, u64 start, u64 len,
6738 int create)
6739{
6740 int ret;
6741 int err = 0;
6742 u64 extent_start = 0;
6743 u64 extent_end = 0;
6744 u64 objectid = btrfs_ino(inode);
6745 u32 found_type;
6746 struct btrfs_path *path = NULL;
6747 struct btrfs_root *root = BTRFS_I(inode)->root;
6748 struct btrfs_file_extent_item *item;
6749 struct extent_buffer *leaf;
6750 struct btrfs_key found_key;
6751 struct extent_map *em = NULL;
6752 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
6753 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6754 struct btrfs_trans_handle *trans = NULL;
6755 const bool new_inline = !page || create;
6756
6757again:
6758 read_lock(&em_tree->lock);
6759 em = lookup_extent_mapping(em_tree, start, len);
6760 if (em)
6761 em->bdev = root->fs_info->fs_devices->latest_bdev;
6762 read_unlock(&em_tree->lock);
6763
6764 if (em) {
6765 if (em->start > start || em->start + em->len <= start)
6766 free_extent_map(em);
6767 else if (em->block_start == EXTENT_MAP_INLINE && page)
6768 free_extent_map(em);
6769 else
6770 goto out;
6771 }
6772 em = alloc_extent_map();
6773 if (!em) {
6774 err = -ENOMEM;
6775 goto out;
6776 }
6777 em->bdev = root->fs_info->fs_devices->latest_bdev;
6778 em->start = EXTENT_MAP_HOLE;
6779 em->orig_start = EXTENT_MAP_HOLE;
6780 em->len = (u64)-1;
6781 em->block_len = (u64)-1;
6782
6783 if (!path) {
6784 path = btrfs_alloc_path();
6785 if (!path) {
6786 err = -ENOMEM;
6787 goto out;
6788 }
6789
6790
6791
6792
6793 path->reada = READA_FORWARD;
6794 }
6795
6796 ret = btrfs_lookup_file_extent(trans, root, path,
6797 objectid, start, trans != NULL);
6798 if (ret < 0) {
6799 err = ret;
6800 goto out;
6801 }
6802
6803 if (ret != 0) {
6804 if (path->slots[0] == 0)
6805 goto not_found;
6806 path->slots[0]--;
6807 }
6808
6809 leaf = path->nodes[0];
6810 item = btrfs_item_ptr(leaf, path->slots[0],
6811 struct btrfs_file_extent_item);
6812
6813 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6814 found_type = found_key.type;
6815 if (found_key.objectid != objectid ||
6816 found_type != BTRFS_EXTENT_DATA_KEY) {
6817
6818
6819
6820
6821
6822
6823 extent_end = start;
6824 goto next;
6825 }
6826
6827 found_type = btrfs_file_extent_type(leaf, item);
6828 extent_start = found_key.offset;
6829 if (found_type == BTRFS_FILE_EXTENT_REG ||
6830 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6831 extent_end = extent_start +
6832 btrfs_file_extent_num_bytes(leaf, item);
6833 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6834 size_t size;
6835 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6836 extent_end = ALIGN(extent_start + size, root->sectorsize);
6837 }
6838next:
6839 if (start >= extent_end) {
6840 path->slots[0]++;
6841 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6842 ret = btrfs_next_leaf(root, path);
6843 if (ret < 0) {
6844 err = ret;
6845 goto out;
6846 }
6847 if (ret > 0)
6848 goto not_found;
6849 leaf = path->nodes[0];
6850 }
6851 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6852 if (found_key.objectid != objectid ||
6853 found_key.type != BTRFS_EXTENT_DATA_KEY)
6854 goto not_found;
6855 if (start + len <= found_key.offset)
6856 goto not_found;
6857 if (start > found_key.offset)
6858 goto next;
6859 em->start = start;
6860 em->orig_start = start;
6861 em->len = found_key.offset - start;
6862 goto not_found_em;
6863 }
6864
6865 btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
6866
6867 if (found_type == BTRFS_FILE_EXTENT_REG ||
6868 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6869 goto insert;
6870 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6871 unsigned long ptr;
6872 char *map;
6873 size_t size;
6874 size_t extent_offset;
6875 size_t copy_size;
6876
6877 if (new_inline)
6878 goto out;
6879
6880 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6881 extent_offset = page_offset(page) + pg_offset - extent_start;
6882 copy_size = min_t(u64, PAGE_SIZE - pg_offset,
6883 size - extent_offset);
6884 em->start = extent_start + extent_offset;
6885 em->len = ALIGN(copy_size, root->sectorsize);
6886 em->orig_block_len = em->len;
6887 em->orig_start = em->start;
6888 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6889 if (create == 0 && !PageUptodate(page)) {
6890 if (btrfs_file_extent_compression(leaf, item) !=
6891 BTRFS_COMPRESS_NONE) {
6892 ret = uncompress_inline(path, page, pg_offset,
6893 extent_offset, item);
6894 if (ret) {
6895 err = ret;
6896 goto out;
6897 }
6898 } else {
6899 map = kmap(page);
6900 read_extent_buffer(leaf, map + pg_offset, ptr,
6901 copy_size);
6902 if (pg_offset + copy_size < PAGE_SIZE) {
6903 memset(map + pg_offset + copy_size, 0,
6904 PAGE_SIZE - pg_offset -
6905 copy_size);
6906 }
6907 kunmap(page);
6908 }
6909 flush_dcache_page(page);
6910 } else if (create && PageUptodate(page)) {
6911 BUG();
6912 if (!trans) {
6913 kunmap(page);
6914 free_extent_map(em);
6915 em = NULL;
6916
6917 btrfs_release_path(path);
6918 trans = btrfs_join_transaction(root);
6919
6920 if (IS_ERR(trans))
6921 return ERR_CAST(trans);
6922 goto again;
6923 }
6924 map = kmap(page);
6925 write_extent_buffer(leaf, map + pg_offset, ptr,
6926 copy_size);
6927 kunmap(page);
6928 btrfs_mark_buffer_dirty(leaf);
6929 }
6930 set_extent_uptodate(io_tree, em->start,
6931 extent_map_end(em) - 1, NULL, GFP_NOFS);
6932 goto insert;
6933 }
6934not_found:
6935 em->start = start;
6936 em->orig_start = start;
6937 em->len = len;
6938not_found_em:
6939 em->block_start = EXTENT_MAP_HOLE;
6940 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
6941insert:
6942 btrfs_release_path(path);
6943 if (em->start > start || extent_map_end(em) <= start) {
6944 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6945 em->start, em->len, start, len);
6946 err = -EIO;
6947 goto out;
6948 }
6949
6950 err = 0;
6951 write_lock(&em_tree->lock);
6952 ret = add_extent_mapping(em_tree, em, 0);
6953
6954
6955
6956
6957 if (ret == -EEXIST) {
6958 struct extent_map *existing;
6959
6960 ret = 0;
6961
6962 existing = search_extent_mapping(em_tree, start, len);
6963
6964
6965
6966
6967 if (start >= extent_map_end(existing) ||
6968 start <= existing->start) {
6969
6970
6971
6972
6973 err = merge_extent_mapping(em_tree, existing,
6974 em, start);
6975 free_extent_map(existing);
6976 if (err) {
6977 free_extent_map(em);
6978 em = NULL;
6979 }
6980 } else {
6981 free_extent_map(em);
6982 em = existing;
6983 err = 0;
6984 }
6985 }
6986 write_unlock(&em_tree->lock);
6987out:
6988
6989 trace_btrfs_get_extent(root, em);
6990
6991 btrfs_free_path(path);
6992 if (trans) {
6993 ret = btrfs_end_transaction(trans, root);
6994 if (!err)
6995 err = ret;
6996 }
6997 if (err) {
6998 free_extent_map(em);
6999 return ERR_PTR(err);
7000 }
7001 BUG_ON(!em);
7002 return em;
7003}
7004
7005struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
7006 size_t pg_offset, u64 start, u64 len,
7007 int create)
7008{
7009 struct extent_map *em;
7010 struct extent_map *hole_em = NULL;
7011 u64 range_start = start;
7012 u64 end;
7013 u64 found;
7014 u64 found_end;
7015 int err = 0;
7016
7017 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
7018 if (IS_ERR(em))
7019 return em;
7020 if (em) {
7021
7022
7023
7024
7025
7026
7027 if (em->block_start != EXTENT_MAP_HOLE &&
7028 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7029 return em;
7030 else
7031 hole_em = em;
7032 }
7033
7034
7035 end = start + len;
7036 if (end < start)
7037 end = (u64)-1;
7038 else
7039 end -= 1;
7040
7041 em = NULL;
7042
7043
7044 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
7045 end, len, EXTENT_DELALLOC, 1);
7046 found_end = range_start + found;
7047 if (found_end < range_start)
7048 found_end = (u64)-1;
7049
7050
7051
7052
7053
7054 if (range_start > end || found_end <= start) {
7055 em = hole_em;
7056 hole_em = NULL;
7057 goto out;
7058 }
7059
7060
7061
7062
7063 range_start = max(start, range_start);
7064 found = found_end - range_start;
7065
7066 if (found > 0) {
7067 u64 hole_start = start;
7068 u64 hole_len = len;
7069
7070 em = alloc_extent_map();
7071 if (!em) {
7072 err = -ENOMEM;
7073 goto out;
7074 }
7075
7076
7077
7078
7079
7080
7081
7082
7083 if (hole_em) {
7084 u64 calc_end = extent_map_end(hole_em);
7085
7086 if (calc_end <= start || (hole_em->start > end)) {
7087 free_extent_map(hole_em);
7088 hole_em = NULL;
7089 } else {
7090 hole_start = max(hole_em->start, start);
7091 hole_len = calc_end - hole_start;
7092 }
7093 }
7094 em->bdev = NULL;
7095 if (hole_em && range_start > hole_start) {
7096
7097
7098
7099
7100 em->len = min(hole_len,
7101 range_start - hole_start);
7102 em->start = hole_start;
7103 em->orig_start = hole_start;
7104
7105
7106
7107
7108 em->block_start = hole_em->block_start;
7109 em->block_len = hole_len;
7110 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
7111 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
7112 } else {
7113 em->start = range_start;
7114 em->len = found;
7115 em->orig_start = range_start;
7116 em->block_start = EXTENT_MAP_DELALLOC;
7117 em->block_len = found;
7118 }
7119 } else if (hole_em) {
7120 return hole_em;
7121 }
7122out:
7123
7124 free_extent_map(hole_em);
7125 if (err) {
7126 free_extent_map(em);
7127 return ERR_PTR(err);
7128 }
7129 return em;
7130}
7131
7132static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7133 u64 start, u64 len)
7134{
7135 struct btrfs_root *root = BTRFS_I(inode)->root;
7136 struct extent_map *em;
7137 struct btrfs_key ins;
7138 u64 alloc_hint;
7139 int ret;
7140
7141 alloc_hint = get_extent_allocation_hint(inode, start, len);
7142 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
7143 alloc_hint, &ins, 1, 1);
7144 if (ret)
7145 return ERR_PTR(ret);
7146
7147
7148
7149
7150
7151
7152
7153
7154
7155
7156
7157
7158 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
7159 ins.offset, ins.offset, 0);
7160 if (ret) {
7161 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7162 return ERR_PTR(ret);
7163 }
7164
7165 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
7166 ins.offset, ins.offset, ins.offset, 0);
7167 if (IS_ERR(em)) {
7168 struct btrfs_ordered_extent *oe;
7169
7170 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7171 oe = btrfs_lookup_ordered_extent(inode, start);
7172 ASSERT(oe);
7173 if (WARN_ON(!oe))
7174 return em;
7175 set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
7176 set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
7177 btrfs_remove_ordered_extent(inode, oe);
7178
7179 btrfs_put_ordered_extent(oe);
7180 btrfs_put_ordered_extent(oe);
7181 }
7182 return em;
7183}
7184
7185
7186
7187
7188
7189noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
7190 u64 *orig_start, u64 *orig_block_len,
7191 u64 *ram_bytes)
7192{
7193 struct btrfs_trans_handle *trans;
7194 struct btrfs_path *path;
7195 int ret;
7196 struct extent_buffer *leaf;
7197 struct btrfs_root *root = BTRFS_I(inode)->root;
7198 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
7199 struct btrfs_file_extent_item *fi;
7200 struct btrfs_key key;
7201 u64 disk_bytenr;
7202 u64 backref_offset;
7203 u64 extent_end;
7204 u64 num_bytes;
7205 int slot;
7206 int found_type;
7207 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
7208
7209 path = btrfs_alloc_path();
7210 if (!path)
7211 return -ENOMEM;
7212
7213 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
7214 offset, 0);
7215 if (ret < 0)
7216 goto out;
7217
7218 slot = path->slots[0];
7219 if (ret == 1) {
7220 if (slot == 0) {
7221
7222 ret = 0;
7223 goto out;
7224 }
7225 slot--;
7226 }
7227 ret = 0;
7228 leaf = path->nodes[0];
7229 btrfs_item_key_to_cpu(leaf, &key, slot);
7230 if (key.objectid != btrfs_ino(inode) ||
7231 key.type != BTRFS_EXTENT_DATA_KEY) {
7232
7233 goto out;
7234 }
7235
7236 if (key.offset > offset) {
7237
7238 goto out;
7239 }
7240
7241 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
7242 found_type = btrfs_file_extent_type(leaf, fi);
7243 if (found_type != BTRFS_FILE_EXTENT_REG &&
7244 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
7245
7246 goto out;
7247 }
7248
7249 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
7250 goto out;
7251
7252 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
7253 if (extent_end <= offset)
7254 goto out;
7255
7256 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7257 if (disk_bytenr == 0)
7258 goto out;
7259
7260 if (btrfs_file_extent_compression(leaf, fi) ||
7261 btrfs_file_extent_encryption(leaf, fi) ||
7262 btrfs_file_extent_other_encoding(leaf, fi))
7263 goto out;
7264
7265 backref_offset = btrfs_file_extent_offset(leaf, fi);
7266
7267 if (orig_start) {
7268 *orig_start = key.offset - backref_offset;
7269 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
7270 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7271 }
7272
7273 if (btrfs_extent_readonly(root, disk_bytenr))
7274 goto out;
7275
7276 num_bytes = min(offset + *len, extent_end) - offset;
7277 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
7278 u64 range_end;
7279
7280 range_end = round_up(offset + num_bytes, root->sectorsize) - 1;
7281 ret = test_range_bit(io_tree, offset, range_end,
7282 EXTENT_DELALLOC, 0, NULL);
7283 if (ret) {
7284 ret = -EAGAIN;
7285 goto out;
7286 }
7287 }
7288
7289 btrfs_release_path(path);
7290
7291
7292
7293
7294
7295 trans = btrfs_join_transaction(root);
7296 if (IS_ERR(trans)) {
7297 ret = 0;
7298 goto out;
7299 }
7300
7301 ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
7302 key.offset - backref_offset, disk_bytenr);
7303 btrfs_end_transaction(trans, root);
7304 if (ret) {
7305 ret = 0;
7306 goto out;
7307 }
7308
7309
7310
7311
7312
7313
7314
7315 disk_bytenr += backref_offset;
7316 disk_bytenr += offset - key.offset;
7317 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
7318 goto out;
7319
7320
7321
7322
7323 *len = num_bytes;
7324 ret = 1;
7325out:
7326 btrfs_free_path(path);
7327 return ret;
7328}
7329
7330bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
7331{
7332 struct radix_tree_root *root = &inode->i_mapping->page_tree;
7333 int found = false;
7334 void **pagep = NULL;
7335 struct page *page = NULL;
7336 int start_idx;
7337 int end_idx;
7338
7339 start_idx = start >> PAGE_SHIFT;
7340
7341
7342
7343
7344 end_idx = end >> PAGE_SHIFT;
7345
7346 rcu_read_lock();
7347
7348
7349
7350
7351
7352
7353
7354 while (page == NULL &&
7355 radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
7356 page = radix_tree_deref_slot(pagep);
7357 if (unlikely(!page))
7358 break;
7359
7360 if (radix_tree_exception(page)) {
7361 if (radix_tree_deref_retry(page)) {
7362 page = NULL;
7363 continue;
7364 }
7365
7366
7367
7368
7369
7370 page = NULL;
7371 break;
7372 }
7373
7374 if (!page_cache_get_speculative(page)) {
7375 page = NULL;
7376 continue;
7377 }
7378
7379
7380
7381
7382
7383
7384 if (unlikely(page != *pagep)) {
7385 put_page(page);
7386 page = NULL;
7387 }
7388 }
7389
7390 if (page) {
7391 if (page->index <= end_idx)
7392 found = true;
7393 put_page(page);
7394 }
7395
7396 rcu_read_unlock();
7397 return found;
7398}
7399
7400static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
7401 struct extent_state **cached_state, int writing)
7402{
7403 struct btrfs_ordered_extent *ordered;
7404 int ret = 0;
7405
7406 while (1) {
7407 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7408 cached_state);
7409
7410
7411
7412
7413
7414 ordered = btrfs_lookup_ordered_range(inode, lockstart,
7415 lockend - lockstart + 1);
7416
7417
7418
7419
7420
7421
7422
7423
7424 if (!ordered &&
7425 (!writing ||
7426 !btrfs_page_exists_in_range(inode, lockstart, lockend)))
7427 break;
7428
7429 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7430 cached_state, GFP_NOFS);
7431
7432 if (ordered) {
7433
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448 if (writing ||
7449 test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
7450 btrfs_start_ordered_extent(inode, ordered, 1);
7451 else
7452 ret = -ENOTBLK;
7453 btrfs_put_ordered_extent(ordered);
7454 } else {
7455
7456
7457
7458
7459
7460
7461
7462
7463
7464
7465
7466
7467
7468 ret = -ENOTBLK;
7469 }
7470
7471 if (ret)
7472 break;
7473
7474 cond_resched();
7475 }
7476
7477 return ret;
7478}
7479
7480static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
7481 u64 len, u64 orig_start,
7482 u64 block_start, u64 block_len,
7483 u64 orig_block_len, u64 ram_bytes,
7484 int type)
7485{
7486 struct extent_map_tree *em_tree;
7487 struct extent_map *em;
7488 struct btrfs_root *root = BTRFS_I(inode)->root;
7489 int ret;
7490
7491 em_tree = &BTRFS_I(inode)->extent_tree;
7492 em = alloc_extent_map();
7493 if (!em)
7494 return ERR_PTR(-ENOMEM);
7495
7496 em->start = start;
7497 em->orig_start = orig_start;
7498 em->mod_start = start;
7499 em->mod_len = len;
7500 em->len = len;
7501 em->block_len = block_len;
7502 em->block_start = block_start;
7503 em->bdev = root->fs_info->fs_devices->latest_bdev;
7504 em->orig_block_len = orig_block_len;
7505 em->ram_bytes = ram_bytes;
7506 em->generation = -1;
7507 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7508 if (type == BTRFS_ORDERED_PREALLOC)
7509 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7510
7511 do {
7512 btrfs_drop_extent_cache(inode, em->start,
7513 em->start + em->len - 1, 0);
7514 write_lock(&em_tree->lock);
7515 ret = add_extent_mapping(em_tree, em, 1);
7516 write_unlock(&em_tree->lock);
7517 } while (ret == -EEXIST);
7518
7519 if (ret) {
7520 free_extent_map(em);
7521 return ERR_PTR(ret);
7522 }
7523
7524 return em;
7525}
7526
7527static void adjust_dio_outstanding_extents(struct inode *inode,
7528 struct btrfs_dio_data *dio_data,
7529 const u64 len)
7530{
7531 unsigned num_extents;
7532
7533 num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
7534 BTRFS_MAX_EXTENT_SIZE);
7535
7536
7537
7538
7539
7540 if (dio_data->outstanding_extents) {
7541 dio_data->outstanding_extents -= num_extents;
7542 } else {
7543 spin_lock(&BTRFS_I(inode)->lock);
7544 BTRFS_I(inode)->outstanding_extents += num_extents;
7545 spin_unlock(&BTRFS_I(inode)->lock);
7546 }
7547}
7548
7549static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7550 struct buffer_head *bh_result, int create)
7551{
7552 struct extent_map *em;
7553 struct btrfs_root *root = BTRFS_I(inode)->root;
7554 struct extent_state *cached_state = NULL;
7555 struct btrfs_dio_data *dio_data = NULL;
7556 u64 start = iblock << inode->i_blkbits;
7557 u64 lockstart, lockend;
7558 u64 len = bh_result->b_size;
7559 int unlock_bits = EXTENT_LOCKED;
7560 int ret = 0;
7561
7562 if (create)
7563 unlock_bits |= EXTENT_DIRTY;
7564 else
7565 len = min_t(u64, len, root->sectorsize);
7566
7567 lockstart = start;
7568 lockend = start + len - 1;
7569
7570 if (current->journal_info) {
7571
7572
7573
7574
7575
7576 dio_data = current->journal_info;
7577 current->journal_info = NULL;
7578 }
7579
7580
7581
7582
7583
7584 if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
7585 create)) {
7586 ret = -ENOTBLK;
7587 goto err;
7588 }
7589
7590 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7591 if (IS_ERR(em)) {
7592 ret = PTR_ERR(em);
7593 goto unlock_err;
7594 }
7595
7596
7597
7598
7599
7600
7601
7602
7603
7604
7605
7606
7607
7608
7609
7610 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7611 em->block_start == EXTENT_MAP_INLINE) {
7612 free_extent_map(em);
7613 ret = -ENOTBLK;
7614 goto unlock_err;
7615 }
7616
7617
7618 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
7619 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
7620 free_extent_map(em);
7621 goto unlock_err;
7622 }
7623
7624
7625
7626
7627
7628
7629
7630
7631
7632
7633 if (!create) {
7634 len = min(len, em->len - (start - em->start));
7635 lockstart = start + len;
7636 goto unlock;
7637 }
7638
7639 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7640 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7641 em->block_start != EXTENT_MAP_HOLE)) {
7642 int type;
7643 u64 block_start, orig_start, orig_block_len, ram_bytes;
7644
7645 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7646 type = BTRFS_ORDERED_PREALLOC;
7647 else
7648 type = BTRFS_ORDERED_NOCOW;
7649 len = min(len, em->len - (start - em->start));
7650 block_start = em->block_start + (start - em->start);
7651
7652 if (can_nocow_extent(inode, start, &len, &orig_start,
7653 &orig_block_len, &ram_bytes) == 1) {
7654 if (type == BTRFS_ORDERED_PREALLOC) {
7655 free_extent_map(em);
7656 em = create_pinned_em(inode, start, len,
7657 orig_start,
7658 block_start, len,
7659 orig_block_len,
7660 ram_bytes, type);
7661 if (IS_ERR(em)) {
7662 ret = PTR_ERR(em);
7663 goto unlock_err;
7664 }
7665 }
7666
7667 ret = btrfs_add_ordered_extent_dio(inode, start,
7668 block_start, len, len, type);
7669 if (ret) {
7670 free_extent_map(em);
7671 goto unlock_err;
7672 }
7673 goto unlock;
7674 }
7675 }
7676
7677
7678
7679
7680
7681 len = bh_result->b_size;
7682 free_extent_map(em);
7683 em = btrfs_new_extent_direct(inode, start, len);
7684 if (IS_ERR(em)) {
7685 ret = PTR_ERR(em);
7686 goto unlock_err;
7687 }
7688 len = min(len, em->len - (start - em->start));
7689unlock:
7690 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7691 inode->i_blkbits;
7692 bh_result->b_size = len;
7693 bh_result->b_bdev = em->bdev;
7694 set_buffer_mapped(bh_result);
7695 if (create) {
7696 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7697 set_buffer_new(bh_result);
7698
7699
7700
7701
7702
7703 if (start + len > i_size_read(inode))
7704 i_size_write(inode, start + len);
7705
7706 adjust_dio_outstanding_extents(inode, dio_data, len);
7707 btrfs_free_reserved_data_space(inode, start, len);
7708 WARN_ON(dio_data->reserve < len);
7709 dio_data->reserve -= len;
7710 dio_data->unsubmitted_oe_range_end = start + len;
7711 current->journal_info = dio_data;
7712 }
7713
7714
7715
7716
7717
7718
7719 if (lockstart < lockend) {
7720 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7721 lockend, unlock_bits, 1, 0,
7722 &cached_state, GFP_NOFS);
7723 } else {
7724 free_extent_state(cached_state);
7725 }
7726
7727 free_extent_map(em);
7728
7729 return 0;
7730
7731unlock_err:
7732 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7733 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7734err:
7735 if (dio_data)
7736 current->journal_info = dio_data;
7737
7738
7739
7740
7741
7742 if (create && dio_data)
7743 adjust_dio_outstanding_extents(inode, dio_data, len);
7744
7745 return ret;
7746}
7747
7748static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7749 int rw, int mirror_num)
7750{
7751 struct btrfs_root *root = BTRFS_I(inode)->root;
7752 int ret;
7753
7754 BUG_ON(rw & REQ_WRITE);
7755
7756 bio_get(bio);
7757
7758 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
7759 BTRFS_WQ_ENDIO_DIO_REPAIR);
7760 if (ret)
7761 goto err;
7762
7763 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
7764err:
7765 bio_put(bio);
7766 return ret;
7767}
7768
7769static int btrfs_check_dio_repairable(struct inode *inode,
7770 struct bio *failed_bio,
7771 struct io_failure_record *failrec,
7772 int failed_mirror)
7773{
7774 int num_copies;
7775
7776 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
7777 failrec->logical, failrec->len);
7778 if (num_copies == 1) {
7779
7780
7781
7782
7783
7784 pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
7785 num_copies, failrec->this_mirror, failed_mirror);
7786 return 0;
7787 }
7788
7789 failrec->failed_mirror = failed_mirror;
7790 failrec->this_mirror++;
7791 if (failrec->this_mirror == failed_mirror)
7792 failrec->this_mirror++;
7793
7794 if (failrec->this_mirror > num_copies) {
7795 pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
7796 num_copies, failrec->this_mirror, failed_mirror);
7797 return 0;
7798 }
7799
7800 return 1;
7801}
7802
7803static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7804 struct page *page, unsigned int pgoff,
7805 u64 start, u64 end, int failed_mirror,
7806 bio_end_io_t *repair_endio, void *repair_arg)
7807{
7808 struct io_failure_record *failrec;
7809 struct bio *bio;
7810 int isector;
7811 int read_mode;
7812 int ret;
7813
7814 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
7815
7816 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7817 if (ret)
7818 return ret;
7819
7820 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7821 failed_mirror);
7822 if (!ret) {
7823 free_io_failure(inode, failrec);
7824 return -EIO;
7825 }
7826
7827 if ((failed_bio->bi_vcnt > 1)
7828 || (failed_bio->bi_io_vec->bv_len
7829 > BTRFS_I(inode)->root->sectorsize))
7830 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
7831 else
7832 read_mode = READ_SYNC;
7833
7834 isector = start - btrfs_io_bio(failed_bio)->logical;
7835 isector >>= inode->i_sb->s_blocksize_bits;
7836 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7837 pgoff, isector, repair_endio, repair_arg);
7838 if (!bio) {
7839 free_io_failure(inode, failrec);
7840 return -EIO;
7841 }
7842
7843 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7844 "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
7845 read_mode, failrec->this_mirror, failrec->in_validation);
7846
7847 ret = submit_dio_repair_bio(inode, bio, read_mode,
7848 failrec->this_mirror);
7849 if (ret) {
7850 free_io_failure(inode, failrec);
7851 bio_put(bio);
7852 }
7853
7854 return ret;
7855}
7856
7857struct btrfs_retry_complete {
7858 struct completion done;
7859 struct inode *inode;
7860 u64 start;
7861 int uptodate;
7862};
7863
7864static void btrfs_retry_endio_nocsum(struct bio *bio)
7865{
7866 struct btrfs_retry_complete *done = bio->bi_private;
7867 struct inode *inode;
7868 struct bio_vec *bvec;
7869 int i;
7870
7871 if (bio->bi_error)
7872 goto end;
7873
7874 ASSERT(bio->bi_vcnt == 1);
7875 inode = bio->bi_io_vec->bv_page->mapping->host;
7876 ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
7877
7878 done->uptodate = 1;
7879 bio_for_each_segment_all(bvec, bio, i)
7880 clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
7881end:
7882 complete(&done->done);
7883 bio_put(bio);
7884}
7885
7886static int __btrfs_correct_data_nocsum(struct inode *inode,
7887 struct btrfs_io_bio *io_bio)
7888{
7889 struct btrfs_fs_info *fs_info;
7890 struct bio_vec *bvec;
7891 struct btrfs_retry_complete done;
7892 u64 start;
7893 unsigned int pgoff;
7894 u32 sectorsize;
7895 int nr_sectors;
7896 int i;
7897 int ret;
7898
7899 fs_info = BTRFS_I(inode)->root->fs_info;
7900 sectorsize = BTRFS_I(inode)->root->sectorsize;
7901
7902 start = io_bio->logical;
7903 done.inode = inode;
7904
7905 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7906 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
7907 pgoff = bvec->bv_offset;
7908
7909next_block_or_try_again:
7910 done.uptodate = 0;
7911 done.start = start;
7912 init_completion(&done.done);
7913
7914 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
7915 pgoff, start, start + sectorsize - 1,
7916 io_bio->mirror_num,
7917 btrfs_retry_endio_nocsum, &done);
7918 if (ret)
7919 return ret;
7920
7921 wait_for_completion(&done.done);
7922
7923 if (!done.uptodate) {
7924
7925 goto next_block_or_try_again;
7926 }
7927
7928 start += sectorsize;
7929
7930 if (nr_sectors--) {
7931 pgoff += sectorsize;
7932 goto next_block_or_try_again;
7933 }
7934 }
7935
7936 return 0;
7937}
7938
7939static void btrfs_retry_endio(struct bio *bio)
7940{
7941 struct btrfs_retry_complete *done = bio->bi_private;
7942 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7943 struct inode *inode;
7944 struct bio_vec *bvec;
7945 u64 start;
7946 int uptodate;
7947 int ret;
7948 int i;
7949
7950 if (bio->bi_error)
7951 goto end;
7952
7953 uptodate = 1;
7954
7955 start = done->start;
7956
7957 ASSERT(bio->bi_vcnt == 1);
7958 inode = bio->bi_io_vec->bv_page->mapping->host;
7959 ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
7960
7961 bio_for_each_segment_all(bvec, bio, i) {
7962 ret = __readpage_endio_check(done->inode, io_bio, i,
7963 bvec->bv_page, bvec->bv_offset,
7964 done->start, bvec->bv_len);
7965 if (!ret)
7966 clean_io_failure(done->inode, done->start,
7967 bvec->bv_page, bvec->bv_offset);
7968 else
7969 uptodate = 0;
7970 }
7971
7972 done->uptodate = uptodate;
7973end:
7974 complete(&done->done);
7975 bio_put(bio);
7976}
7977
7978static int __btrfs_subio_endio_read(struct inode *inode,
7979 struct btrfs_io_bio *io_bio, int err)
7980{
7981 struct btrfs_fs_info *fs_info;
7982 struct bio_vec *bvec;
7983 struct btrfs_retry_complete done;
7984 u64 start;
7985 u64 offset = 0;
7986 u32 sectorsize;
7987 int nr_sectors;
7988 unsigned int pgoff;
7989 int csum_pos;
7990 int i;
7991 int ret;
7992
7993 fs_info = BTRFS_I(inode)->root->fs_info;
7994 sectorsize = BTRFS_I(inode)->root->sectorsize;
7995
7996 err = 0;
7997 start = io_bio->logical;
7998 done.inode = inode;
7999
8000 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
8001 nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
8002
8003 pgoff = bvec->bv_offset;
8004next_block:
8005 csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
8006 ret = __readpage_endio_check(inode, io_bio, csum_pos,
8007 bvec->bv_page, pgoff, start,
8008 sectorsize);
8009 if (likely(!ret))
8010 goto next;
8011try_again:
8012 done.uptodate = 0;
8013 done.start = start;
8014 init_completion(&done.done);
8015
8016 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
8017 pgoff, start, start + sectorsize - 1,
8018 io_bio->mirror_num,
8019 btrfs_retry_endio, &done);
8020 if (ret) {
8021 err = ret;
8022 goto next;
8023 }
8024
8025 wait_for_completion(&done.done);
8026
8027 if (!done.uptodate) {
8028
8029 goto try_again;
8030 }
8031next:
8032 offset += sectorsize;
8033 start += sectorsize;
8034
8035 ASSERT(nr_sectors);
8036
8037 if (--nr_sectors) {
8038 pgoff += sectorsize;
8039 goto next_block;
8040 }
8041 }
8042
8043 return err;
8044}
8045
8046static int btrfs_subio_endio_read(struct inode *inode,
8047 struct btrfs_io_bio *io_bio, int err)
8048{
8049 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8050
8051 if (skip_csum) {
8052 if (unlikely(err))
8053 return __btrfs_correct_data_nocsum(inode, io_bio);
8054 else
8055 return 0;
8056 } else {
8057 return __btrfs_subio_endio_read(inode, io_bio, err);
8058 }
8059}
8060
8061static void btrfs_endio_direct_read(struct bio *bio)
8062{
8063 struct btrfs_dio_private *dip = bio->bi_private;
8064 struct inode *inode = dip->inode;
8065 struct bio *dio_bio;
8066 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8067 int err = bio->bi_error;
8068
8069 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
8070 err = btrfs_subio_endio_read(inode, io_bio, err);
8071
8072 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
8073 dip->logical_offset + dip->bytes - 1);
8074 dio_bio = dip->dio_bio;
8075
8076 kfree(dip);
8077
8078 dio_bio->bi_error = bio->bi_error;
8079 dio_end_io(dio_bio, bio->bi_error);
8080
8081 if (io_bio->end_io)
8082 io_bio->end_io(io_bio, err);
8083 bio_put(bio);
8084}
8085
8086static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
8087 const u64 offset,
8088 const u64 bytes,
8089 const int uptodate)
8090{
8091 struct btrfs_root *root = BTRFS_I(inode)->root;
8092 struct btrfs_ordered_extent *ordered = NULL;
8093 u64 ordered_offset = offset;
8094 u64 ordered_bytes = bytes;
8095 int ret;
8096
8097again:
8098 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
8099 &ordered_offset,
8100 ordered_bytes,
8101 uptodate);
8102 if (!ret)
8103 goto out_test;
8104
8105 btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
8106 finish_ordered_fn, NULL, NULL);
8107 btrfs_queue_work(root->fs_info->endio_write_workers,
8108 &ordered->work);
8109out_test:
8110
8111
8112
8113
8114 if (ordered_offset < offset + bytes) {
8115 ordered_bytes = offset + bytes - ordered_offset;
8116 ordered = NULL;
8117 goto again;
8118 }
8119}
8120
8121static void btrfs_endio_direct_write(struct bio *bio)
8122{
8123 struct btrfs_dio_private *dip = bio->bi_private;
8124 struct bio *dio_bio = dip->dio_bio;
8125
8126 btrfs_endio_direct_write_update_ordered(dip->inode,
8127 dip->logical_offset,
8128 dip->bytes,
8129 !bio->bi_error);
8130
8131 kfree(dip);
8132
8133 dio_bio->bi_error = bio->bi_error;
8134 dio_end_io(dio_bio, bio->bi_error);
8135 bio_put(bio);
8136}
8137
8138static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
8139 struct bio *bio, int mirror_num,
8140 unsigned long bio_flags, u64 offset)
8141{
8142 int ret;
8143 struct btrfs_root *root = BTRFS_I(inode)->root;
8144 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
8145 BUG_ON(ret);
8146 return 0;
8147}
8148
8149static void btrfs_end_dio_bio(struct bio *bio)
8150{
8151 struct btrfs_dio_private *dip = bio->bi_private;
8152 int err = bio->bi_error;
8153
8154 if (err)
8155 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
8156 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
8157 btrfs_ino(dip->inode), bio->bi_rw,
8158 (unsigned long long)bio->bi_iter.bi_sector,
8159 bio->bi_iter.bi_size, err);
8160
8161 if (dip->subio_endio)
8162 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
8163
8164 if (err) {
8165 dip->errors = 1;
8166
8167
8168
8169
8170
8171 smp_mb__before_atomic();
8172 }
8173
8174
8175 if (!atomic_dec_and_test(&dip->pending_bios))
8176 goto out;
8177
8178 if (dip->errors) {
8179 bio_io_error(dip->orig_bio);
8180 } else {
8181 dip->dio_bio->bi_error = 0;
8182 bio_endio(dip->orig_bio);
8183 }
8184out:
8185 bio_put(bio);
8186}
8187
8188static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
8189 u64 first_sector, gfp_t gfp_flags)
8190{
8191 struct bio *bio;
8192 bio = btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
8193 if (bio)
8194 bio_associate_current(bio);
8195 return bio;
8196}
8197
8198static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
8199 struct inode *inode,
8200 struct btrfs_dio_private *dip,
8201 struct bio *bio,
8202 u64 file_offset)
8203{
8204 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
8205 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
8206 int ret;
8207
8208
8209
8210
8211
8212
8213 if (dip->logical_offset == file_offset) {
8214 ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
8215 file_offset);
8216 if (ret)
8217 return ret;
8218 }
8219
8220 if (bio == dip->orig_bio)
8221 return 0;
8222
8223 file_offset -= dip->logical_offset;
8224 file_offset >>= inode->i_sb->s_blocksize_bits;
8225 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
8226
8227 return 0;
8228}
8229
8230static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
8231 int rw, u64 file_offset, int skip_sum,
8232 int async_submit)
8233{
8234 struct btrfs_dio_private *dip = bio->bi_private;
8235 int write = rw & REQ_WRITE;
8236 struct btrfs_root *root = BTRFS_I(inode)->root;
8237 int ret;
8238
8239 if (async_submit)
8240 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
8241
8242 bio_get(bio);
8243
8244 if (!write) {
8245 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
8246 BTRFS_WQ_ENDIO_DATA);
8247 if (ret)
8248 goto err;
8249 }
8250
8251 if (skip_sum)
8252 goto map;
8253
8254 if (write && async_submit) {
8255 ret = btrfs_wq_submit_bio(root->fs_info,
8256 inode, rw, bio, 0, 0,
8257 file_offset,
8258 __btrfs_submit_bio_start_direct_io,
8259 __btrfs_submit_bio_done);
8260 goto err;
8261 } else if (write) {
8262
8263
8264
8265
8266 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
8267 if (ret)
8268 goto err;
8269 } else {
8270 ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio,
8271 file_offset);
8272 if (ret)
8273 goto err;
8274 }
8275map:
8276 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
8277err:
8278 bio_put(bio);
8279 return ret;
8280}
8281
8282static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
8283 int skip_sum)
8284{
8285 struct inode *inode = dip->inode;
8286 struct btrfs_root *root = BTRFS_I(inode)->root;
8287 struct bio *bio;
8288 struct bio *orig_bio = dip->orig_bio;
8289 struct bio_vec *bvec = orig_bio->bi_io_vec;
8290 u64 start_sector = orig_bio->bi_iter.bi_sector;
8291 u64 file_offset = dip->logical_offset;
8292 u64 submit_len = 0;
8293 u64 map_length;
8294 u32 blocksize = root->sectorsize;
8295 int async_submit = 0;
8296 int nr_sectors;
8297 int ret;
8298 int i;
8299
8300 map_length = orig_bio->bi_iter.bi_size;
8301 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
8302 &map_length, NULL, 0);
8303 if (ret)
8304 return -EIO;
8305
8306 if (map_length >= orig_bio->bi_iter.bi_size) {
8307 bio = orig_bio;
8308 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
8309 goto submit;
8310 }
8311
8312
8313 if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
8314 async_submit = 0;
8315 else
8316 async_submit = 1;
8317
8318 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
8319 if (!bio)
8320 return -ENOMEM;
8321
8322 bio->bi_private = dip;
8323 bio->bi_end_io = btrfs_end_dio_bio;
8324 btrfs_io_bio(bio)->logical = file_offset;
8325 atomic_inc(&dip->pending_bios);
8326
8327 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
8328 nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
8329 i = 0;
8330next_block:
8331 if (unlikely(map_length < submit_len + blocksize ||
8332 bio_add_page(bio, bvec->bv_page, blocksize,
8333 bvec->bv_offset + (i * blocksize)) < blocksize)) {
8334
8335
8336
8337
8338
8339
8340 atomic_inc(&dip->pending_bios);
8341 ret = __btrfs_submit_dio_bio(bio, inode, rw,
8342 file_offset, skip_sum,
8343 async_submit);
8344 if (ret) {
8345 bio_put(bio);
8346 atomic_dec(&dip->pending_bios);
8347 goto out_err;
8348 }
8349
8350 start_sector += submit_len >> 9;
8351 file_offset += submit_len;
8352
8353 submit_len = 0;
8354
8355 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
8356 start_sector, GFP_NOFS);
8357 if (!bio)
8358 goto out_err;
8359 bio->bi_private = dip;
8360 bio->bi_end_io = btrfs_end_dio_bio;
8361 btrfs_io_bio(bio)->logical = file_offset;
8362
8363 map_length = orig_bio->bi_iter.bi_size;
8364 ret = btrfs_map_block(root->fs_info, rw,
8365 start_sector << 9,
8366 &map_length, NULL, 0);
8367 if (ret) {
8368 bio_put(bio);
8369 goto out_err;
8370 }
8371
8372 goto next_block;
8373 } else {
8374 submit_len += blocksize;
8375 if (--nr_sectors) {
8376 i++;
8377 goto next_block;
8378 }
8379 bvec++;
8380 }
8381 }
8382
8383submit:
8384 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
8385 async_submit);
8386 if (!ret)
8387 return 0;
8388
8389 bio_put(bio);
8390out_err:
8391 dip->errors = 1;
8392
8393
8394
8395
8396 smp_mb__before_atomic();
8397 if (atomic_dec_and_test(&dip->pending_bios))
8398 bio_io_error(dip->orig_bio);
8399
8400
8401 return 0;
8402}
8403
8404static void btrfs_submit_direct(int rw, struct bio *dio_bio,
8405 struct inode *inode, loff_t file_offset)
8406{
8407 struct btrfs_dio_private *dip = NULL;
8408 struct bio *io_bio = NULL;
8409 struct btrfs_io_bio *btrfs_bio;
8410 int skip_sum;
8411 int write = rw & REQ_WRITE;
8412 int ret = 0;
8413
8414 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
8415
8416 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
8417 if (!io_bio) {
8418 ret = -ENOMEM;
8419 goto free_ordered;
8420 }
8421
8422 dip = kzalloc(sizeof(*dip), GFP_NOFS);
8423 if (!dip) {
8424 ret = -ENOMEM;
8425 goto free_ordered;
8426 }
8427
8428 dip->private = dio_bio->bi_private;
8429 dip->inode = inode;
8430 dip->logical_offset = file_offset;
8431 dip->bytes = dio_bio->bi_iter.bi_size;
8432 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
8433 io_bio->bi_private = dip;
8434 dip->orig_bio = io_bio;
8435 dip->dio_bio = dio_bio;
8436 atomic_set(&dip->pending_bios, 0);
8437 btrfs_bio = btrfs_io_bio(io_bio);
8438 btrfs_bio->logical = file_offset;
8439
8440 if (write) {
8441 io_bio->bi_end_io = btrfs_endio_direct_write;
8442 } else {
8443 io_bio->bi_end_io = btrfs_endio_direct_read;
8444 dip->subio_endio = btrfs_subio_endio_read;
8445 }
8446
8447
8448
8449
8450
8451
8452
8453 if (write) {
8454 struct btrfs_dio_data *dio_data = current->journal_info;
8455
8456 dio_data->unsubmitted_oe_range_end = dip->logical_offset +
8457 dip->bytes;
8458 dio_data->unsubmitted_oe_range_start =
8459 dio_data->unsubmitted_oe_range_end;
8460 }
8461
8462 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
8463 if (!ret)
8464 return;
8465
8466 if (btrfs_bio->end_io)
8467 btrfs_bio->end_io(btrfs_bio, ret);
8468
8469free_ordered:
8470
8471
8472
8473
8474
8475
8476
8477
8478
8479 if (io_bio && dip) {
8480 io_bio->bi_error = -EIO;
8481 bio_endio(io_bio);
8482
8483
8484
8485
8486
8487 dip = NULL;
8488 io_bio = NULL;
8489 } else {
8490 if (write)
8491 btrfs_endio_direct_write_update_ordered(inode,
8492 file_offset,
8493 dio_bio->bi_iter.bi_size,
8494 0);
8495 else
8496 unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
8497 file_offset + dio_bio->bi_iter.bi_size - 1);
8498
8499 dio_bio->bi_error = -EIO;
8500
8501
8502
8503
8504 dio_end_io(dio_bio, ret);
8505 }
8506 if (io_bio)
8507 bio_put(io_bio);
8508 kfree(dip);
8509}
8510
8511static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
8512 const struct iov_iter *iter, loff_t offset)
8513{
8514 int seg;
8515 int i;
8516 unsigned blocksize_mask = root->sectorsize - 1;
8517 ssize_t retval = -EINVAL;
8518
8519 if (offset & blocksize_mask)
8520 goto out;
8521
8522 if (iov_iter_alignment(iter) & blocksize_mask)
8523 goto out;
8524
8525
8526 if (iov_iter_rw(iter) == WRITE)
8527 return 0;
8528
8529
8530
8531
8532
8533 for (seg = 0; seg < iter->nr_segs; seg++) {
8534 for (i = seg + 1; i < iter->nr_segs; i++) {
8535 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
8536 goto out;
8537 }
8538 }
8539 retval = 0;
8540out:
8541 return retval;
8542}
8543
8544static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
8545 loff_t offset)
8546{
8547 struct file *file = iocb->ki_filp;
8548 struct inode *inode = file->f_mapping->host;
8549 struct btrfs_root *root = BTRFS_I(inode)->root;
8550 struct btrfs_dio_data dio_data = { 0 };
8551 size_t count = 0;
8552 int flags = 0;
8553 bool wakeup = true;
8554 bool relock = false;
8555 ssize_t ret;
8556
8557 if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
8558 return 0;
8559
8560 inode_dio_begin(inode);
8561 smp_mb__after_atomic();
8562
8563
8564
8565
8566
8567
8568
8569 count = iov_iter_count(iter);
8570 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8571 &BTRFS_I(inode)->runtime_flags))
8572 filemap_fdatawrite_range(inode->i_mapping, offset,
8573 offset + count - 1);
8574
8575 if (iov_iter_rw(iter) == WRITE) {
8576
8577
8578
8579
8580
8581 if (offset + count <= inode->i_size) {
8582 inode_unlock(inode);
8583 relock = true;
8584 }
8585 ret = btrfs_delalloc_reserve_space(inode, offset, count);
8586 if (ret)
8587 goto out;
8588 dio_data.outstanding_extents = div64_u64(count +
8589 BTRFS_MAX_EXTENT_SIZE - 1,
8590 BTRFS_MAX_EXTENT_SIZE);
8591
8592
8593
8594
8595
8596
8597 dio_data.reserve = round_up(count, root->sectorsize);
8598 dio_data.unsubmitted_oe_range_start = (u64)offset;
8599 dio_data.unsubmitted_oe_range_end = (u64)offset;
8600 current->journal_info = &dio_data;
8601 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8602 &BTRFS_I(inode)->runtime_flags)) {
8603 inode_dio_end(inode);
8604 flags = DIO_LOCKING | DIO_SKIP_HOLES;
8605 wakeup = false;
8606 }
8607
8608 ret = __blockdev_direct_IO(iocb, inode,
8609 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
8610 iter, offset, btrfs_get_blocks_direct, NULL,
8611 btrfs_submit_direct, flags);
8612 if (iov_iter_rw(iter) == WRITE) {
8613 current->journal_info = NULL;
8614 if (ret < 0 && ret != -EIOCBQUEUED) {
8615 if (dio_data.reserve)
8616 btrfs_delalloc_release_space(inode, offset,
8617 dio_data.reserve);
8618
8619
8620
8621
8622
8623
8624 if (dio_data.unsubmitted_oe_range_start <
8625 dio_data.unsubmitted_oe_range_end)
8626 btrfs_endio_direct_write_update_ordered(inode,
8627 dio_data.unsubmitted_oe_range_start,
8628 dio_data.unsubmitted_oe_range_end -
8629 dio_data.unsubmitted_oe_range_start,
8630 0);
8631 } else if (ret >= 0 && (size_t)ret < count)
8632 btrfs_delalloc_release_space(inode, offset,
8633 count - (size_t)ret);
8634 }
8635out:
8636 if (wakeup)
8637 inode_dio_end(inode);
8638 if (relock)
8639 inode_lock(inode);
8640
8641 return ret;
8642}
8643
8644#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
8645
8646static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8647 __u64 start, __u64 len)
8648{
8649 int ret;
8650
8651 ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
8652 if (ret)
8653 return ret;
8654
8655 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
8656}
8657
8658int btrfs_readpage(struct file *file, struct page *page)
8659{
8660 struct extent_io_tree *tree;
8661 tree = &BTRFS_I(page->mapping->host)->io_tree;
8662 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
8663}
8664
8665static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8666{
8667 struct extent_io_tree *tree;
8668 struct inode *inode = page->mapping->host;
8669 int ret;
8670
8671 if (current->flags & PF_MEMALLOC) {
8672 redirty_page_for_writepage(wbc, page);
8673 unlock_page(page);
8674 return 0;
8675 }
8676
8677
8678
8679
8680
8681
8682 if (!igrab(inode)) {
8683 redirty_page_for_writepage(wbc, page);
8684 return AOP_WRITEPAGE_ACTIVATE;
8685 }
8686 tree = &BTRFS_I(page->mapping->host)->io_tree;
8687 ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc);
8688 btrfs_add_delayed_iput(inode);
8689 return ret;
8690}
8691
8692static int btrfs_writepages(struct address_space *mapping,
8693 struct writeback_control *wbc)
8694{
8695 struct extent_io_tree *tree;
8696
8697 tree = &BTRFS_I(mapping->host)->io_tree;
8698 return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
8699}
8700
8701static int
8702btrfs_readpages(struct file *file, struct address_space *mapping,
8703 struct list_head *pages, unsigned nr_pages)
8704{
8705 struct extent_io_tree *tree;
8706 tree = &BTRFS_I(mapping->host)->io_tree;
8707 return extent_readpages(tree, mapping, pages, nr_pages,
8708 btrfs_get_extent);
8709}
8710static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8711{
8712 struct extent_io_tree *tree;
8713 struct extent_map_tree *map;
8714 int ret;
8715
8716 tree = &BTRFS_I(page->mapping->host)->io_tree;
8717 map = &BTRFS_I(page->mapping->host)->extent_tree;
8718 ret = try_release_extent_mapping(map, tree, page, gfp_flags);
8719 if (ret == 1) {
8720 ClearPagePrivate(page);
8721 set_page_private(page, 0);
8722 put_page(page);
8723 }
8724 return ret;
8725}
8726
8727static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8728{
8729 if (PageWriteback(page) || PageDirty(page))
8730 return 0;
8731 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
8732}
8733
8734static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8735 unsigned int length)
8736{
8737 struct inode *inode = page->mapping->host;
8738 struct extent_io_tree *tree;
8739 struct btrfs_ordered_extent *ordered;
8740 struct extent_state *cached_state = NULL;
8741 u64 page_start = page_offset(page);
8742 u64 page_end = page_start + PAGE_SIZE - 1;
8743 u64 start;
8744 u64 end;
8745 int inode_evicting = inode->i_state & I_FREEING;
8746
8747
8748
8749
8750
8751
8752
8753
8754 wait_on_page_writeback(page);
8755
8756 tree = &BTRFS_I(inode)->io_tree;
8757 if (offset) {
8758 btrfs_releasepage(page, GFP_NOFS);
8759 return;
8760 }
8761
8762 if (!inode_evicting)
8763 lock_extent_bits(tree, page_start, page_end, &cached_state);
8764again:
8765 start = page_start;
8766 ordered = btrfs_lookup_ordered_range(inode, start,
8767 page_end - start + 1);
8768 if (ordered) {
8769 end = min(page_end, ordered->file_offset + ordered->len - 1);
8770
8771
8772
8773
8774 if (!inode_evicting)
8775 clear_extent_bit(tree, start, end,
8776 EXTENT_DIRTY | EXTENT_DELALLOC |
8777 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8778 EXTENT_DEFRAG, 1, 0, &cached_state,
8779 GFP_NOFS);
8780
8781
8782
8783
8784 if (TestClearPagePrivate2(page)) {
8785 struct btrfs_ordered_inode_tree *tree;
8786 u64 new_len;
8787
8788 tree = &BTRFS_I(inode)->ordered_tree;
8789
8790 spin_lock_irq(&tree->lock);
8791 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8792 new_len = start - ordered->file_offset;
8793 if (new_len < ordered->truncated_len)
8794 ordered->truncated_len = new_len;
8795 spin_unlock_irq(&tree->lock);
8796
8797 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8798 start,
8799 end - start + 1, 1))
8800 btrfs_finish_ordered_io(ordered);
8801 }
8802 btrfs_put_ordered_extent(ordered);
8803 if (!inode_evicting) {
8804 cached_state = NULL;
8805 lock_extent_bits(tree, start, end,
8806 &cached_state);
8807 }
8808
8809 start = end + 1;
8810 if (start < page_end)
8811 goto again;
8812 }
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825 btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
8826 if (!inode_evicting) {
8827 clear_extent_bit(tree, page_start, page_end,
8828 EXTENT_LOCKED | EXTENT_DIRTY |
8829 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8830 EXTENT_DEFRAG, 1, 1,
8831 &cached_state, GFP_NOFS);
8832
8833 __btrfs_releasepage(page, GFP_NOFS);
8834 }
8835
8836 ClearPageChecked(page);
8837 if (PagePrivate(page)) {
8838 ClearPagePrivate(page);
8839 set_page_private(page, 0);
8840 put_page(page);
8841 }
8842}
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8860{
8861 struct page *page = vmf->page;
8862 struct inode *inode = file_inode(vma->vm_file);
8863 struct btrfs_root *root = BTRFS_I(inode)->root;
8864 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8865 struct btrfs_ordered_extent *ordered;
8866 struct extent_state *cached_state = NULL;
8867 char *kaddr;
8868 unsigned long zero_start;
8869 loff_t size;
8870 int ret;
8871 int reserved = 0;
8872 u64 reserved_space;
8873 u64 page_start;
8874 u64 page_end;
8875 u64 end;
8876
8877 reserved_space = PAGE_SIZE;
8878
8879 sb_start_pagefault(inode->i_sb);
8880 page_start = page_offset(page);
8881 page_end = page_start + PAGE_SIZE - 1;
8882 end = page_end;
8883
8884
8885
8886
8887
8888
8889
8890
8891
8892 ret = btrfs_delalloc_reserve_space(inode, page_start,
8893 reserved_space);
8894 if (!ret) {
8895 ret = file_update_time(vma->vm_file);
8896 reserved = 1;
8897 }
8898 if (ret) {
8899 if (ret == -ENOMEM)
8900 ret = VM_FAULT_OOM;
8901 else
8902 ret = VM_FAULT_SIGBUS;
8903 if (reserved)
8904 goto out;
8905 goto out_noreserve;
8906 }
8907
8908 ret = VM_FAULT_NOPAGE;
8909again:
8910 lock_page(page);
8911 size = i_size_read(inode);
8912
8913 if ((page->mapping != inode->i_mapping) ||
8914 (page_start >= size)) {
8915
8916 goto out_unlock;
8917 }
8918 wait_on_page_writeback(page);
8919
8920 lock_extent_bits(io_tree, page_start, page_end, &cached_state);
8921 set_page_extent_mapped(page);
8922
8923
8924
8925
8926
8927 ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
8928 if (ordered) {
8929 unlock_extent_cached(io_tree, page_start, page_end,
8930 &cached_state, GFP_NOFS);
8931 unlock_page(page);
8932 btrfs_start_ordered_extent(inode, ordered, 1);
8933 btrfs_put_ordered_extent(ordered);
8934 goto again;
8935 }
8936
8937 if (page->index == ((size - 1) >> PAGE_SHIFT)) {
8938 reserved_space = round_up(size - page_start, root->sectorsize);
8939 if (reserved_space < PAGE_SIZE) {
8940 end = page_start + reserved_space - 1;
8941 spin_lock(&BTRFS_I(inode)->lock);
8942 BTRFS_I(inode)->outstanding_extents++;
8943 spin_unlock(&BTRFS_I(inode)->lock);
8944 btrfs_delalloc_release_space(inode, page_start,
8945 PAGE_SIZE - reserved_space);
8946 }
8947 }
8948
8949
8950
8951
8952
8953
8954
8955
8956 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
8957 EXTENT_DIRTY | EXTENT_DELALLOC |
8958 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
8959 0, 0, &cached_state, GFP_NOFS);
8960
8961 ret = btrfs_set_extent_delalloc(inode, page_start, end,
8962 &cached_state);
8963 if (ret) {
8964 unlock_extent_cached(io_tree, page_start, page_end,
8965 &cached_state, GFP_NOFS);
8966 ret = VM_FAULT_SIGBUS;
8967 goto out_unlock;
8968 }
8969 ret = 0;
8970
8971
8972 if (page_start + PAGE_SIZE > size)
8973 zero_start = size & ~PAGE_MASK;
8974 else
8975 zero_start = PAGE_SIZE;
8976
8977 if (zero_start != PAGE_SIZE) {
8978 kaddr = kmap(page);
8979 memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
8980 flush_dcache_page(page);
8981 kunmap(page);
8982 }
8983 ClearPageChecked(page);
8984 set_page_dirty(page);
8985 SetPageUptodate(page);
8986
8987 BTRFS_I(inode)->last_trans = root->fs_info->generation;
8988 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
8989 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
8990
8991 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
8992
8993out_unlock:
8994 if (!ret) {
8995 sb_end_pagefault(inode->i_sb);
8996 return VM_FAULT_LOCKED;
8997 }
8998 unlock_page(page);
8999out:
9000 btrfs_delalloc_release_space(inode, page_start, reserved_space);
9001out_noreserve:
9002 sb_end_pagefault(inode->i_sb);
9003 return ret;
9004}
9005
9006static int btrfs_truncate(struct inode *inode)
9007{
9008 struct btrfs_root *root = BTRFS_I(inode)->root;
9009 struct btrfs_block_rsv *rsv;
9010 int ret = 0;
9011 int err = 0;
9012 struct btrfs_trans_handle *trans;
9013 u64 mask = root->sectorsize - 1;
9014 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
9015
9016 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
9017 (u64)-1);
9018 if (ret)
9019 return ret;
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9043
9044
9045
9046
9047
9048
9049
9050
9051
9052
9053
9054
9055
9056
9057 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
9058 if (!rsv)
9059 return -ENOMEM;
9060 rsv->size = min_size;
9061 rsv->failfast = 1;
9062
9063
9064
9065
9066
9067 trans = btrfs_start_transaction(root, 2);
9068 if (IS_ERR(trans)) {
9069 err = PTR_ERR(trans);
9070 goto out;
9071 }
9072
9073
9074 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
9075 min_size);
9076 BUG_ON(ret);
9077
9078
9079
9080
9081
9082
9083
9084
9085 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
9086 trans->block_rsv = rsv;
9087
9088 while (1) {
9089 ret = btrfs_truncate_inode_items(trans, root, inode,
9090 inode->i_size,
9091 BTRFS_EXTENT_DATA_KEY);
9092 if (ret != -ENOSPC && ret != -EAGAIN) {
9093 err = ret;
9094 break;
9095 }
9096
9097 trans->block_rsv = &root->fs_info->trans_block_rsv;
9098 ret = btrfs_update_inode(trans, root, inode);
9099 if (ret) {
9100 err = ret;
9101 break;
9102 }
9103
9104 btrfs_end_transaction(trans, root);
9105 btrfs_btree_balance_dirty(root);
9106
9107 trans = btrfs_start_transaction(root, 2);
9108 if (IS_ERR(trans)) {
9109 ret = err = PTR_ERR(trans);
9110 trans = NULL;
9111 break;
9112 }
9113
9114 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
9115 rsv, min_size);
9116 BUG_ON(ret);
9117 trans->block_rsv = rsv;
9118 }
9119
9120 if (ret == 0 && inode->i_nlink > 0) {
9121 trans->block_rsv = root->orphan_block_rsv;
9122 ret = btrfs_orphan_del(trans, inode);
9123 if (ret)
9124 err = ret;
9125 }
9126
9127 if (trans) {
9128 trans->block_rsv = &root->fs_info->trans_block_rsv;
9129 ret = btrfs_update_inode(trans, root, inode);
9130 if (ret && !err)
9131 err = ret;
9132
9133 ret = btrfs_end_transaction(trans, root);
9134 btrfs_btree_balance_dirty(root);
9135 }
9136
9137out:
9138 btrfs_free_block_rsv(root, rsv);
9139
9140 if (ret && !err)
9141 err = ret;
9142
9143 return err;
9144}
9145
9146
9147
9148
9149int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
9150 struct btrfs_root *new_root,
9151 struct btrfs_root *parent_root,
9152 u64 new_dirid)
9153{
9154 struct inode *inode;
9155 int err;
9156 u64 index = 0;
9157
9158 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
9159 new_dirid, new_dirid,
9160 S_IFDIR | (~current_umask() & S_IRWXUGO),
9161 &index);
9162 if (IS_ERR(inode))
9163 return PTR_ERR(inode);
9164 inode->i_op = &btrfs_dir_inode_operations;
9165 inode->i_fop = &btrfs_dir_file_operations;
9166
9167 set_nlink(inode, 1);
9168 btrfs_i_size_write(inode, 0);
9169 unlock_new_inode(inode);
9170
9171 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
9172 if (err)
9173 btrfs_err(new_root->fs_info,
9174 "error inheriting subvolume %llu properties: %d",
9175 new_root->root_key.objectid, err);
9176
9177 err = btrfs_update_inode(trans, new_root, inode);
9178
9179 iput(inode);
9180 return err;
9181}
9182
9183struct inode *btrfs_alloc_inode(struct super_block *sb)
9184{
9185 struct btrfs_inode *ei;
9186 struct inode *inode;
9187
9188 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
9189 if (!ei)
9190 return NULL;
9191
9192 ei->root = NULL;
9193 ei->generation = 0;
9194 ei->last_trans = 0;
9195 ei->last_sub_trans = 0;
9196 ei->logged_trans = 0;
9197 ei->delalloc_bytes = 0;
9198 ei->defrag_bytes = 0;
9199 ei->disk_i_size = 0;
9200 ei->flags = 0;
9201 ei->csum_bytes = 0;
9202 ei->index_cnt = (u64)-1;
9203 ei->dir_index = 0;
9204 ei->last_unlink_trans = 0;
9205 ei->last_log_commit = 0;
9206 ei->delayed_iput_count = 0;
9207
9208 spin_lock_init(&ei->lock);
9209 ei->outstanding_extents = 0;
9210 ei->reserved_extents = 0;
9211
9212 ei->runtime_flags = 0;
9213 ei->force_compress = BTRFS_COMPRESS_NONE;
9214
9215 ei->delayed_node = NULL;
9216
9217 ei->i_otime.tv_sec = 0;
9218 ei->i_otime.tv_nsec = 0;
9219
9220 inode = &ei->vfs_inode;
9221 extent_map_tree_init(&ei->extent_tree);
9222 extent_io_tree_init(&ei->io_tree, &inode->i_data);
9223 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
9224 ei->io_tree.track_uptodate = 1;
9225 ei->io_failure_tree.track_uptodate = 1;
9226 atomic_set(&ei->sync_writers, 0);
9227 mutex_init(&ei->log_mutex);
9228 mutex_init(&ei->delalloc_mutex);
9229 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
9230 INIT_LIST_HEAD(&ei->delalloc_inodes);
9231 INIT_LIST_HEAD(&ei->delayed_iput);
9232 RB_CLEAR_NODE(&ei->rb_node);
9233
9234 return inode;
9235}
9236
9237#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
9238void btrfs_test_destroy_inode(struct inode *inode)
9239{
9240 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
9241 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9242}
9243#endif
9244
9245static void btrfs_i_callback(struct rcu_head *head)
9246{
9247 struct inode *inode = container_of(head, struct inode, i_rcu);
9248 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
9249}
9250
9251void btrfs_destroy_inode(struct inode *inode)
9252{
9253 struct btrfs_ordered_extent *ordered;
9254 struct btrfs_root *root = BTRFS_I(inode)->root;
9255
9256 WARN_ON(!hlist_empty(&inode->i_dentry));
9257 WARN_ON(inode->i_data.nrpages);
9258 WARN_ON(BTRFS_I(inode)->outstanding_extents);
9259 WARN_ON(BTRFS_I(inode)->reserved_extents);
9260 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
9261 WARN_ON(BTRFS_I(inode)->csum_bytes);
9262 WARN_ON(BTRFS_I(inode)->defrag_bytes);
9263
9264
9265
9266
9267
9268
9269 if (!root)
9270 goto free;
9271
9272 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
9273 &BTRFS_I(inode)->runtime_flags)) {
9274 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
9275 btrfs_ino(inode));
9276 atomic_dec(&root->orphan_inodes);
9277 }
9278
9279 while (1) {
9280 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
9281 if (!ordered)
9282 break;
9283 else {
9284 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
9285 ordered->file_offset, ordered->len);
9286 btrfs_remove_ordered_extent(inode, ordered);
9287 btrfs_put_ordered_extent(ordered);
9288 btrfs_put_ordered_extent(ordered);
9289 }
9290 }
9291 btrfs_qgroup_check_reserved_leak(inode);
9292 inode_tree_del(inode);
9293 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
9294free:
9295 call_rcu(&inode->i_rcu, btrfs_i_callback);
9296}
9297
9298int btrfs_drop_inode(struct inode *inode)
9299{
9300 struct btrfs_root *root = BTRFS_I(inode)->root;
9301
9302 if (root == NULL)
9303 return 1;
9304
9305
9306 if (btrfs_root_refs(&root->root_item) == 0)
9307 return 1;
9308 else
9309 return generic_drop_inode(inode);
9310}
9311
9312static void init_once(void *foo)
9313{
9314 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
9315
9316 inode_init_once(&ei->vfs_inode);
9317}
9318
9319void btrfs_destroy_cachep(void)
9320{
9321
9322
9323
9324
9325 rcu_barrier();
9326 kmem_cache_destroy(btrfs_inode_cachep);
9327 kmem_cache_destroy(btrfs_trans_handle_cachep);
9328 kmem_cache_destroy(btrfs_transaction_cachep);
9329 kmem_cache_destroy(btrfs_path_cachep);
9330 kmem_cache_destroy(btrfs_free_space_cachep);
9331}
9332
9333int btrfs_init_cachep(void)
9334{
9335 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
9336 sizeof(struct btrfs_inode), 0,
9337 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
9338 init_once);
9339 if (!btrfs_inode_cachep)
9340 goto fail;
9341
9342 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
9343 sizeof(struct btrfs_trans_handle), 0,
9344 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
9345 if (!btrfs_trans_handle_cachep)
9346 goto fail;
9347
9348 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
9349 sizeof(struct btrfs_transaction), 0,
9350 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
9351 if (!btrfs_transaction_cachep)
9352 goto fail;
9353
9354 btrfs_path_cachep = kmem_cache_create("btrfs_path",
9355 sizeof(struct btrfs_path), 0,
9356 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
9357 if (!btrfs_path_cachep)
9358 goto fail;
9359
9360 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
9361 sizeof(struct btrfs_free_space), 0,
9362 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
9363 if (!btrfs_free_space_cachep)
9364 goto fail;
9365
9366 return 0;
9367fail:
9368 btrfs_destroy_cachep();
9369 return -ENOMEM;
9370}
9371
9372static int btrfs_getattr(struct vfsmount *mnt,
9373 struct dentry *dentry, struct kstat *stat)
9374{
9375 u64 delalloc_bytes;
9376 struct inode *inode = d_inode(dentry);
9377 u32 blocksize = inode->i_sb->s_blocksize;
9378
9379 generic_fillattr(inode, stat);
9380 stat->dev = BTRFS_I(inode)->root->anon_dev;
9381
9382 spin_lock(&BTRFS_I(inode)->lock);
9383 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
9384 spin_unlock(&BTRFS_I(inode)->lock);
9385 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
9386 ALIGN(delalloc_bytes, blocksize)) >> 9;
9387 return 0;
9388}
9389
9390static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
9391 struct inode *new_dir, struct dentry *new_dentry)
9392{
9393 struct btrfs_trans_handle *trans;
9394 struct btrfs_root *root = BTRFS_I(old_dir)->root;
9395 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
9396 struct inode *new_inode = d_inode(new_dentry);
9397 struct inode *old_inode = d_inode(old_dentry);
9398 u64 index = 0;
9399 u64 root_objectid;
9400 int ret;
9401 u64 old_ino = btrfs_ino(old_inode);
9402
9403 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
9404 return -EPERM;
9405
9406
9407 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
9408 return -EXDEV;
9409
9410 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
9411 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
9412 return -ENOTEMPTY;
9413
9414 if (S_ISDIR(old_inode->i_mode) && new_inode &&
9415 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
9416 return -ENOTEMPTY;
9417
9418
9419
9420 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
9421 new_dentry->d_name.name,
9422 new_dentry->d_name.len);
9423
9424 if (ret) {
9425 if (ret == -EEXIST) {
9426
9427
9428 if (WARN_ON(!new_inode)) {
9429 return ret;
9430 }
9431 } else {
9432
9433 return ret;
9434 }
9435 }
9436 ret = 0;
9437
9438
9439
9440
9441
9442 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
9443 filemap_flush(old_inode->i_mapping);
9444
9445
9446 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9447 down_read(&root->fs_info->subvol_sem);
9448
9449
9450
9451
9452
9453
9454
9455
9456 trans = btrfs_start_transaction(root, 11);
9457 if (IS_ERR(trans)) {
9458 ret = PTR_ERR(trans);
9459 goto out_notrans;
9460 }
9461
9462 if (dest != root)
9463 btrfs_record_root_in_trans(trans, dest);
9464
9465 ret = btrfs_set_inode_index(new_dir, &index);
9466 if (ret)
9467 goto out_fail;
9468
9469 BTRFS_I(old_inode)->dir_index = 0ULL;
9470 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9471
9472 btrfs_set_log_full_commit(root->fs_info, trans);
9473 } else {
9474 ret = btrfs_insert_inode_ref(trans, dest,
9475 new_dentry->d_name.name,
9476 new_dentry->d_name.len,
9477 old_ino,
9478 btrfs_ino(new_dir), index);
9479 if (ret)
9480 goto out_fail;
9481
9482
9483
9484
9485
9486
9487
9488 btrfs_pin_log_trans(root);
9489 }
9490
9491 inode_inc_iversion(old_dir);
9492 inode_inc_iversion(new_dir);
9493 inode_inc_iversion(old_inode);
9494 old_dir->i_ctime = old_dir->i_mtime =
9495 new_dir->i_ctime = new_dir->i_mtime =
9496 old_inode->i_ctime = current_fs_time(old_dir->i_sb);
9497
9498 if (old_dentry->d_parent != new_dentry->d_parent)
9499 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
9500
9501 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
9502 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
9503 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
9504 old_dentry->d_name.name,
9505 old_dentry->d_name.len);
9506 } else {
9507 ret = __btrfs_unlink_inode(trans, root, old_dir,
9508 d_inode(old_dentry),
9509 old_dentry->d_name.name,
9510 old_dentry->d_name.len);
9511 if (!ret)
9512 ret = btrfs_update_inode(trans, root, old_inode);
9513 }
9514 if (ret) {
9515 btrfs_abort_transaction(trans, root, ret);
9516 goto out_fail;
9517 }
9518
9519 if (new_inode) {
9520 inode_inc_iversion(new_inode);
9521 new_inode->i_ctime = current_fs_time(new_inode->i_sb);
9522 if (unlikely(btrfs_ino(new_inode) ==
9523 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
9524 root_objectid = BTRFS_I(new_inode)->location.objectid;
9525 ret = btrfs_unlink_subvol(trans, dest, new_dir,
9526 root_objectid,
9527 new_dentry->d_name.name,
9528 new_dentry->d_name.len);
9529 BUG_ON(new_inode->i_nlink == 0);
9530 } else {
9531 ret = btrfs_unlink_inode(trans, dest, new_dir,
9532 d_inode(new_dentry),
9533 new_dentry->d_name.name,
9534 new_dentry->d_name.len);
9535 }
9536 if (!ret && new_inode->i_nlink == 0)
9537 ret = btrfs_orphan_add(trans, d_inode(new_dentry));
9538 if (ret) {
9539 btrfs_abort_transaction(trans, root, ret);
9540 goto out_fail;
9541 }
9542 }
9543
9544 ret = btrfs_add_link(trans, new_dir, old_inode,
9545 new_dentry->d_name.name,
9546 new_dentry->d_name.len, 0, index);
9547 if (ret) {
9548 btrfs_abort_transaction(trans, root, ret);
9549 goto out_fail;
9550 }
9551
9552 if (old_inode->i_nlink == 1)
9553 BTRFS_I(old_inode)->dir_index = index;
9554
9555 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
9556 struct dentry *parent = new_dentry->d_parent;
9557 btrfs_log_new_name(trans, old_inode, old_dir, parent);
9558 btrfs_end_log_trans(root);
9559 }
9560out_fail:
9561 btrfs_end_transaction(trans, root);
9562out_notrans:
9563 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
9564 up_read(&root->fs_info->subvol_sem);
9565
9566 return ret;
9567}
9568
9569static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
9570 struct inode *new_dir, struct dentry *new_dentry,
9571 unsigned int flags)
9572{
9573 if (flags & ~RENAME_NOREPLACE)
9574 return -EINVAL;
9575
9576 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
9577}
9578
9579static void btrfs_run_delalloc_work(struct btrfs_work *work)
9580{
9581 struct btrfs_delalloc_work *delalloc_work;
9582 struct inode *inode;
9583
9584 delalloc_work = container_of(work, struct btrfs_delalloc_work,
9585 work);
9586 inode = delalloc_work->inode;
9587 filemap_flush(inode->i_mapping);
9588 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
9589 &BTRFS_I(inode)->runtime_flags))
9590 filemap_flush(inode->i_mapping);
9591
9592 if (delalloc_work->delay_iput)
9593 btrfs_add_delayed_iput(inode);
9594 else
9595 iput(inode);
9596 complete(&delalloc_work->completion);
9597}
9598
9599struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
9600 int delay_iput)
9601{
9602 struct btrfs_delalloc_work *work;
9603
9604 work = kmalloc(sizeof(*work), GFP_NOFS);
9605 if (!work)
9606 return NULL;
9607
9608 init_completion(&work->completion);
9609 INIT_LIST_HEAD(&work->list);
9610 work->inode = inode;
9611 work->delay_iput = delay_iput;
9612 WARN_ON_ONCE(!inode);
9613 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
9614 btrfs_run_delalloc_work, NULL, NULL);
9615
9616 return work;
9617}
9618
9619void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
9620{
9621 wait_for_completion(&work->completion);
9622 kfree(work);
9623}
9624
9625
9626
9627
9628
9629static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
9630 int nr)
9631{
9632 struct btrfs_inode *binode;
9633 struct inode *inode;
9634 struct btrfs_delalloc_work *work, *next;
9635 struct list_head works;
9636 struct list_head splice;
9637 int ret = 0;
9638
9639 INIT_LIST_HEAD(&works);
9640 INIT_LIST_HEAD(&splice);
9641
9642 mutex_lock(&root->delalloc_mutex);
9643 spin_lock(&root->delalloc_lock);
9644 list_splice_init(&root->delalloc_inodes, &splice);
9645 while (!list_empty(&splice)) {
9646 binode = list_entry(splice.next, struct btrfs_inode,
9647 delalloc_inodes);
9648
9649 list_move_tail(&binode->delalloc_inodes,
9650 &root->delalloc_inodes);
9651 inode = igrab(&binode->vfs_inode);
9652 if (!inode) {
9653 cond_resched_lock(&root->delalloc_lock);
9654 continue;
9655 }
9656 spin_unlock(&root->delalloc_lock);
9657
9658 work = btrfs_alloc_delalloc_work(inode, delay_iput);
9659 if (!work) {
9660 if (delay_iput)
9661 btrfs_add_delayed_iput(inode);
9662 else
9663 iput(inode);
9664 ret = -ENOMEM;
9665 goto out;
9666 }
9667 list_add_tail(&work->list, &works);
9668 btrfs_queue_work(root->fs_info->flush_workers,
9669 &work->work);
9670 ret++;
9671 if (nr != -1 && ret >= nr)
9672 goto out;
9673 cond_resched();
9674 spin_lock(&root->delalloc_lock);
9675 }
9676 spin_unlock(&root->delalloc_lock);
9677
9678out:
9679 list_for_each_entry_safe(work, next, &works, list) {
9680 list_del_init(&work->list);
9681 btrfs_wait_and_free_delalloc_work(work);
9682 }
9683
9684 if (!list_empty_careful(&splice)) {
9685 spin_lock(&root->delalloc_lock);
9686 list_splice_tail(&splice, &root->delalloc_inodes);
9687 spin_unlock(&root->delalloc_lock);
9688 }
9689 mutex_unlock(&root->delalloc_mutex);
9690 return ret;
9691}
9692
9693int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
9694{
9695 int ret;
9696
9697 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
9698 return -EROFS;
9699
9700 ret = __start_delalloc_inodes(root, delay_iput, -1);
9701 if (ret > 0)
9702 ret = 0;
9703
9704
9705
9706
9707
9708 atomic_inc(&root->fs_info->async_submit_draining);
9709 while (atomic_read(&root->fs_info->nr_async_submits) ||
9710 atomic_read(&root->fs_info->async_delalloc_pages)) {
9711 wait_event(root->fs_info->async_submit_wait,
9712 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
9713 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
9714 }
9715 atomic_dec(&root->fs_info->async_submit_draining);
9716 return ret;
9717}
9718
9719int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
9720 int nr)
9721{
9722 struct btrfs_root *root;
9723 struct list_head splice;
9724 int ret;
9725
9726 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
9727 return -EROFS;
9728
9729 INIT_LIST_HEAD(&splice);
9730
9731 mutex_lock(&fs_info->delalloc_root_mutex);
9732 spin_lock(&fs_info->delalloc_root_lock);
9733 list_splice_init(&fs_info->delalloc_roots, &splice);
9734 while (!list_empty(&splice) && nr) {
9735 root = list_first_entry(&splice, struct btrfs_root,
9736 delalloc_root);
9737 root = btrfs_grab_fs_root(root);
9738 BUG_ON(!root);
9739 list_move_tail(&root->delalloc_root,
9740 &fs_info->delalloc_roots);
9741 spin_unlock(&fs_info->delalloc_root_lock);
9742
9743 ret = __start_delalloc_inodes(root, delay_iput, nr);
9744 btrfs_put_fs_root(root);
9745 if (ret < 0)
9746 goto out;
9747
9748 if (nr != -1) {
9749 nr -= ret;
9750 WARN_ON(nr < 0);
9751 }
9752 spin_lock(&fs_info->delalloc_root_lock);
9753 }
9754 spin_unlock(&fs_info->delalloc_root_lock);
9755
9756 ret = 0;
9757 atomic_inc(&fs_info->async_submit_draining);
9758 while (atomic_read(&fs_info->nr_async_submits) ||
9759 atomic_read(&fs_info->async_delalloc_pages)) {
9760 wait_event(fs_info->async_submit_wait,
9761 (atomic_read(&fs_info->nr_async_submits) == 0 &&
9762 atomic_read(&fs_info->async_delalloc_pages) == 0));
9763 }
9764 atomic_dec(&fs_info->async_submit_draining);
9765out:
9766 if (!list_empty_careful(&splice)) {
9767 spin_lock(&fs_info->delalloc_root_lock);
9768 list_splice_tail(&splice, &fs_info->delalloc_roots);
9769 spin_unlock(&fs_info->delalloc_root_lock);
9770 }
9771 mutex_unlock(&fs_info->delalloc_root_mutex);
9772 return ret;
9773}
9774
9775static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9776 const char *symname)
9777{
9778 struct btrfs_trans_handle *trans;
9779 struct btrfs_root *root = BTRFS_I(dir)->root;
9780 struct btrfs_path *path;
9781 struct btrfs_key key;
9782 struct inode *inode = NULL;
9783 int err;
9784 int drop_inode = 0;
9785 u64 objectid;
9786 u64 index = 0;
9787 int name_len;
9788 int datasize;
9789 unsigned long ptr;
9790 struct btrfs_file_extent_item *ei;
9791 struct extent_buffer *leaf;
9792
9793 name_len = strlen(symname);
9794 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
9795 return -ENAMETOOLONG;
9796
9797
9798
9799
9800
9801
9802
9803
9804 trans = btrfs_start_transaction(root, 7);
9805 if (IS_ERR(trans))
9806 return PTR_ERR(trans);
9807
9808 err = btrfs_find_free_ino(root, &objectid);
9809 if (err)
9810 goto out_unlock;
9811
9812 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
9813 dentry->d_name.len, btrfs_ino(dir), objectid,
9814 S_IFLNK|S_IRWXUGO, &index);
9815 if (IS_ERR(inode)) {
9816 err = PTR_ERR(inode);
9817 goto out_unlock;
9818 }
9819
9820
9821
9822
9823
9824
9825
9826 inode->i_fop = &btrfs_file_operations;
9827 inode->i_op = &btrfs_file_inode_operations;
9828 inode->i_mapping->a_ops = &btrfs_aops;
9829 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9830
9831 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
9832 if (err)
9833 goto out_unlock_inode;
9834
9835 path = btrfs_alloc_path();
9836 if (!path) {
9837 err = -ENOMEM;
9838 goto out_unlock_inode;
9839 }
9840 key.objectid = btrfs_ino(inode);
9841 key.offset = 0;
9842 key.type = BTRFS_EXTENT_DATA_KEY;
9843 datasize = btrfs_file_extent_calc_inline_size(name_len);
9844 err = btrfs_insert_empty_item(trans, root, path, &key,
9845 datasize);
9846 if (err) {
9847 btrfs_free_path(path);
9848 goto out_unlock_inode;
9849 }
9850 leaf = path->nodes[0];
9851 ei = btrfs_item_ptr(leaf, path->slots[0],
9852 struct btrfs_file_extent_item);
9853 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
9854 btrfs_set_file_extent_type(leaf, ei,
9855 BTRFS_FILE_EXTENT_INLINE);
9856 btrfs_set_file_extent_encryption(leaf, ei, 0);
9857 btrfs_set_file_extent_compression(leaf, ei, 0);
9858 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
9859 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
9860
9861 ptr = btrfs_file_extent_inline_start(ei);
9862 write_extent_buffer(leaf, symname, ptr, name_len);
9863 btrfs_mark_buffer_dirty(leaf);
9864 btrfs_free_path(path);
9865
9866 inode->i_op = &btrfs_symlink_inode_operations;
9867 inode_nohighmem(inode);
9868 inode->i_mapping->a_ops = &btrfs_symlink_aops;
9869 inode_set_bytes(inode, name_len);
9870 btrfs_i_size_write(inode, name_len);
9871 err = btrfs_update_inode(trans, root, inode);
9872
9873
9874
9875
9876
9877 if (!err)
9878 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
9879 if (err) {
9880 drop_inode = 1;
9881 goto out_unlock_inode;
9882 }
9883
9884 unlock_new_inode(inode);
9885 d_instantiate(dentry, inode);
9886
9887out_unlock:
9888 btrfs_end_transaction(trans, root);
9889 if (drop_inode) {
9890 inode_dec_link_count(inode);
9891 iput(inode);
9892 }
9893 btrfs_btree_balance_dirty(root);
9894 return err;
9895
9896out_unlock_inode:
9897 drop_inode = 1;
9898 unlock_new_inode(inode);
9899 goto out_unlock;
9900}
9901
9902static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9903 u64 start, u64 num_bytes, u64 min_size,
9904 loff_t actual_len, u64 *alloc_hint,
9905 struct btrfs_trans_handle *trans)
9906{
9907 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
9908 struct extent_map *em;
9909 struct btrfs_root *root = BTRFS_I(inode)->root;
9910 struct btrfs_key ins;
9911 u64 cur_offset = start;
9912 u64 i_size;
9913 u64 cur_bytes;
9914 u64 last_alloc = (u64)-1;
9915 int ret = 0;
9916 bool own_trans = true;
9917
9918 if (trans)
9919 own_trans = false;
9920 while (num_bytes > 0) {
9921 if (own_trans) {
9922 trans = btrfs_start_transaction(root, 3);
9923 if (IS_ERR(trans)) {
9924 ret = PTR_ERR(trans);
9925 break;
9926 }
9927 }
9928
9929 cur_bytes = min_t(u64, num_bytes, SZ_256M);
9930 cur_bytes = max(cur_bytes, min_size);
9931
9932
9933
9934
9935
9936
9937 cur_bytes = min(cur_bytes, last_alloc);
9938 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
9939 *alloc_hint, &ins, 1, 0);
9940 if (ret) {
9941 if (own_trans)
9942 btrfs_end_transaction(trans, root);
9943 break;
9944 }
9945
9946 last_alloc = ins.offset;
9947 ret = insert_reserved_file_extent(trans, inode,
9948 cur_offset, ins.objectid,
9949 ins.offset, ins.offset,
9950 ins.offset, 0, 0, 0,
9951 BTRFS_FILE_EXTENT_PREALLOC);
9952 if (ret) {
9953 btrfs_free_reserved_extent(root, ins.objectid,
9954 ins.offset, 0);
9955 btrfs_abort_transaction(trans, root, ret);
9956 if (own_trans)
9957 btrfs_end_transaction(trans, root);
9958 break;
9959 }
9960
9961 btrfs_drop_extent_cache(inode, cur_offset,
9962 cur_offset + ins.offset -1, 0);
9963
9964 em = alloc_extent_map();
9965 if (!em) {
9966 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
9967 &BTRFS_I(inode)->runtime_flags);
9968 goto next;
9969 }
9970
9971 em->start = cur_offset;
9972 em->orig_start = cur_offset;
9973 em->len = ins.offset;
9974 em->block_start = ins.objectid;
9975 em->block_len = ins.offset;
9976 em->orig_block_len = ins.offset;
9977 em->ram_bytes = ins.offset;
9978 em->bdev = root->fs_info->fs_devices->latest_bdev;
9979 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
9980 em->generation = trans->transid;
9981
9982 while (1) {
9983 write_lock(&em_tree->lock);
9984 ret = add_extent_mapping(em_tree, em, 1);
9985 write_unlock(&em_tree->lock);
9986 if (ret != -EEXIST)
9987 break;
9988 btrfs_drop_extent_cache(inode, cur_offset,
9989 cur_offset + ins.offset - 1,
9990 0);
9991 }
9992 free_extent_map(em);
9993next:
9994 num_bytes -= ins.offset;
9995 cur_offset += ins.offset;
9996 *alloc_hint = ins.objectid + ins.offset;
9997
9998 inode_inc_iversion(inode);
9999 inode->i_ctime = current_fs_time(inode->i_sb);
10000 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
10001 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
10002 (actual_len > inode->i_size) &&
10003 (cur_offset > inode->i_size)) {
10004 if (cur_offset > actual_len)
10005 i_size = actual_len;
10006 else
10007 i_size = cur_offset;
10008 i_size_write(inode, i_size);
10009 btrfs_ordered_update_i_size(inode, i_size, NULL);
10010 }
10011
10012 ret = btrfs_update_inode(trans, root, inode);
10013
10014 if (ret) {
10015 btrfs_abort_transaction(trans, root, ret);
10016 if (own_trans)
10017 btrfs_end_transaction(trans, root);
10018 break;
10019 }
10020
10021 if (own_trans)
10022 btrfs_end_transaction(trans, root);
10023 }
10024 return ret;
10025}
10026
10027int btrfs_prealloc_file_range(struct inode *inode, int mode,
10028 u64 start, u64 num_bytes, u64 min_size,
10029 loff_t actual_len, u64 *alloc_hint)
10030{
10031 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10032 min_size, actual_len, alloc_hint,
10033 NULL);
10034}
10035
10036int btrfs_prealloc_file_range_trans(struct inode *inode,
10037 struct btrfs_trans_handle *trans, int mode,
10038 u64 start, u64 num_bytes, u64 min_size,
10039 loff_t actual_len, u64 *alloc_hint)
10040{
10041 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
10042 min_size, actual_len, alloc_hint, trans);
10043}
10044
10045static int btrfs_set_page_dirty(struct page *page)
10046{
10047 return __set_page_dirty_nobuffers(page);
10048}
10049
10050static int btrfs_permission(struct inode *inode, int mask)
10051{
10052 struct btrfs_root *root = BTRFS_I(inode)->root;
10053 umode_t mode = inode->i_mode;
10054
10055 if (mask & MAY_WRITE &&
10056 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
10057 if (btrfs_root_readonly(root))
10058 return -EROFS;
10059 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
10060 return -EACCES;
10061 }
10062 return generic_permission(inode, mask);
10063}
10064
10065static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
10066{
10067 struct btrfs_trans_handle *trans;
10068 struct btrfs_root *root = BTRFS_I(dir)->root;
10069 struct inode *inode = NULL;
10070 u64 objectid;
10071 u64 index;
10072 int ret = 0;
10073
10074
10075
10076
10077 trans = btrfs_start_transaction(root, 5);
10078 if (IS_ERR(trans))
10079 return PTR_ERR(trans);
10080
10081 ret = btrfs_find_free_ino(root, &objectid);
10082 if (ret)
10083 goto out;
10084
10085 inode = btrfs_new_inode(trans, root, dir, NULL, 0,
10086 btrfs_ino(dir), objectid, mode, &index);
10087 if (IS_ERR(inode)) {
10088 ret = PTR_ERR(inode);
10089 inode = NULL;
10090 goto out;
10091 }
10092
10093 inode->i_fop = &btrfs_file_operations;
10094 inode->i_op = &btrfs_file_inode_operations;
10095
10096 inode->i_mapping->a_ops = &btrfs_aops;
10097 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
10098
10099 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
10100 if (ret)
10101 goto out_inode;
10102
10103 ret = btrfs_update_inode(trans, root, inode);
10104 if (ret)
10105 goto out_inode;
10106 ret = btrfs_orphan_add(trans, inode);
10107 if (ret)
10108 goto out_inode;
10109
10110
10111
10112
10113
10114
10115
10116
10117 set_nlink(inode, 1);
10118 unlock_new_inode(inode);
10119 d_tmpfile(dentry, inode);
10120 mark_inode_dirty(inode);
10121
10122out:
10123 btrfs_end_transaction(trans, root);
10124 if (ret)
10125 iput(inode);
10126 btrfs_balance_delayed_items(root);
10127 btrfs_btree_balance_dirty(root);
10128 return ret;
10129
10130out_inode:
10131 unlock_new_inode(inode);
10132 goto out;
10133
10134}
10135
10136
10137int btrfs_inode_check_errors(struct inode *inode)
10138{
10139 int ret = 0;
10140
10141 if (test_bit(AS_ENOSPC, &inode->i_mapping->flags) &&
10142 test_and_clear_bit(AS_ENOSPC, &inode->i_mapping->flags))
10143 ret = -ENOSPC;
10144 if (test_bit(AS_EIO, &inode->i_mapping->flags) &&
10145 test_and_clear_bit(AS_EIO, &inode->i_mapping->flags))
10146 ret = -EIO;
10147
10148 return ret;
10149}
10150
10151static const struct inode_operations btrfs_dir_inode_operations = {
10152 .getattr = btrfs_getattr,
10153 .lookup = btrfs_lookup,
10154 .create = btrfs_create,
10155 .unlink = btrfs_unlink,
10156 .link = btrfs_link,
10157 .mkdir = btrfs_mkdir,
10158 .rmdir = btrfs_rmdir,
10159 .rename2 = btrfs_rename2,
10160 .symlink = btrfs_symlink,
10161 .setattr = btrfs_setattr,
10162 .mknod = btrfs_mknod,
10163 .setxattr = btrfs_setxattr,
10164 .getxattr = generic_getxattr,
10165 .listxattr = btrfs_listxattr,
10166 .removexattr = btrfs_removexattr,
10167 .permission = btrfs_permission,
10168 .get_acl = btrfs_get_acl,
10169 .set_acl = btrfs_set_acl,
10170 .update_time = btrfs_update_time,
10171 .tmpfile = btrfs_tmpfile,
10172};
10173static const struct inode_operations btrfs_dir_ro_inode_operations = {
10174 .lookup = btrfs_lookup,
10175 .permission = btrfs_permission,
10176 .get_acl = btrfs_get_acl,
10177 .set_acl = btrfs_set_acl,
10178 .update_time = btrfs_update_time,
10179};
10180
10181static const struct file_operations btrfs_dir_file_operations = {
10182 .llseek = generic_file_llseek,
10183 .read = generic_read_dir,
10184 .iterate = btrfs_real_readdir,
10185 .unlocked_ioctl = btrfs_ioctl,
10186#ifdef CONFIG_COMPAT
10187 .compat_ioctl = btrfs_ioctl,
10188#endif
10189 .release = btrfs_release_file,
10190 .fsync = btrfs_sync_file,
10191};
10192
10193static const struct extent_io_ops btrfs_extent_io_ops = {
10194 .fill_delalloc = run_delalloc_range,
10195 .submit_bio_hook = btrfs_submit_bio_hook,
10196 .merge_bio_hook = btrfs_merge_bio_hook,
10197 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
10198 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
10199 .writepage_start_hook = btrfs_writepage_start_hook,
10200 .set_bit_hook = btrfs_set_bit_hook,
10201 .clear_bit_hook = btrfs_clear_bit_hook,
10202 .merge_extent_hook = btrfs_merge_extent_hook,
10203 .split_extent_hook = btrfs_split_extent_hook,
10204};
10205
10206
10207
10208
10209
10210
10211
10212
10213
10214
10215
10216
10217
10218static const struct address_space_operations btrfs_aops = {
10219 .readpage = btrfs_readpage,
10220 .writepage = btrfs_writepage,
10221 .writepages = btrfs_writepages,
10222 .readpages = btrfs_readpages,
10223 .direct_IO = btrfs_direct_IO,
10224 .invalidatepage = btrfs_invalidatepage,
10225 .releasepage = btrfs_releasepage,
10226 .set_page_dirty = btrfs_set_page_dirty,
10227 .error_remove_page = generic_error_remove_page,
10228};
10229
10230static const struct address_space_operations btrfs_symlink_aops = {
10231 .readpage = btrfs_readpage,
10232 .writepage = btrfs_writepage,
10233 .invalidatepage = btrfs_invalidatepage,
10234 .releasepage = btrfs_releasepage,
10235};
10236
10237static const struct inode_operations btrfs_file_inode_operations = {
10238 .getattr = btrfs_getattr,
10239 .setattr = btrfs_setattr,
10240 .setxattr = btrfs_setxattr,
10241 .getxattr = generic_getxattr,
10242 .listxattr = btrfs_listxattr,
10243 .removexattr = btrfs_removexattr,
10244 .permission = btrfs_permission,
10245 .fiemap = btrfs_fiemap,
10246 .get_acl = btrfs_get_acl,
10247 .set_acl = btrfs_set_acl,
10248 .update_time = btrfs_update_time,
10249};
10250static const struct inode_operations btrfs_special_inode_operations = {
10251 .getattr = btrfs_getattr,
10252 .setattr = btrfs_setattr,
10253 .permission = btrfs_permission,
10254 .setxattr = btrfs_setxattr,
10255 .getxattr = generic_getxattr,
10256 .listxattr = btrfs_listxattr,
10257 .removexattr = btrfs_removexattr,
10258 .get_acl = btrfs_get_acl,
10259 .set_acl = btrfs_set_acl,
10260 .update_time = btrfs_update_time,
10261};
10262static const struct inode_operations btrfs_symlink_inode_operations = {
10263 .readlink = generic_readlink,
10264 .get_link = page_get_link,
10265 .getattr = btrfs_getattr,
10266 .setattr = btrfs_setattr,
10267 .permission = btrfs_permission,
10268 .setxattr = btrfs_setxattr,
10269 .getxattr = generic_getxattr,
10270 .listxattr = btrfs_listxattr,
10271 .removexattr = btrfs_removexattr,
10272 .update_time = btrfs_update_time,
10273};
10274
10275const struct dentry_operations btrfs_dentry_operations = {
10276 .d_delete = btrfs_dentry_delete,
10277 .d_release = btrfs_dentry_release,
10278};
10279