1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
24#include <linux/pagemap.h>
25#include <linux/highmem.h>
26#include <linux/time.h>
27#include <linux/init.h>
28#include <linux/string.h>
29#include <linux/backing-dev.h>
30#include <linux/mpage.h>
31#include <linux/swap.h>
32#include <linux/writeback.h>
33#include <linux/statfs.h>
34#include <linux/compat.h>
35#include <linux/aio.h>
36#include <linux/bit_spinlock.h>
37#include <linux/xattr.h>
38#include <linux/posix_acl.h>
39#include <linux/falloc.h>
40#include <linux/slab.h>
41#include <linux/ratelimit.h>
42#include <linux/mount.h>
43#include <linux/btrfs.h>
44#include <linux/blkdev.h>
45#include <linux/posix_acl_xattr.h>
46#include "ctree.h"
47#include "disk-io.h"
48#include "transaction.h"
49#include "btrfs_inode.h"
50#include "print-tree.h"
51#include "ordered-data.h"
52#include "xattr.h"
53#include "tree-log.h"
54#include "volumes.h"
55#include "compression.h"
56#include "locking.h"
57#include "free-space-cache.h"
58#include "inode-map.h"
59#include "backref.h"
60#include "hash.h"
61#include "props.h"
62
63struct btrfs_iget_args {
64 struct btrfs_key *location;
65 struct btrfs_root *root;
66};
67
68static const struct inode_operations btrfs_dir_inode_operations;
69static const struct inode_operations btrfs_symlink_inode_operations;
70static const struct inode_operations btrfs_dir_ro_inode_operations;
71static const struct inode_operations btrfs_special_inode_operations;
72static const struct inode_operations btrfs_file_inode_operations;
73static const struct address_space_operations btrfs_aops;
74static const struct address_space_operations btrfs_symlink_aops;
75static const struct file_operations btrfs_dir_file_operations;
76static struct extent_io_ops btrfs_extent_io_ops;
77
78static struct kmem_cache *btrfs_inode_cachep;
79static struct kmem_cache *btrfs_delalloc_work_cachep;
80struct kmem_cache *btrfs_trans_handle_cachep;
81struct kmem_cache *btrfs_transaction_cachep;
82struct kmem_cache *btrfs_path_cachep;
83struct kmem_cache *btrfs_free_space_cachep;
84
85#define S_SHIFT 12
86static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
87 [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
88 [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
89 [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
90 [S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
91 [S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
92 [S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
93 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
94};
95
96static int btrfs_setsize(struct inode *inode, struct iattr *attr);
97static int btrfs_truncate(struct inode *inode);
98static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
99static noinline int cow_file_range(struct inode *inode,
100 struct page *locked_page,
101 u64 start, u64 end, int *page_started,
102 unsigned long *nr_written, int unlock);
103static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
104 u64 len, u64 orig_start,
105 u64 block_start, u64 block_len,
106 u64 orig_block_len, u64 ram_bytes,
107 int type);
108
109static int btrfs_dirty_inode(struct inode *inode);
110
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr)
114{
115 int err;
116
117 err = btrfs_init_acl(trans, inode, dir);
118 if (!err)
119 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
120 return err;
121}
122
123
124
125
126
127
128static int insert_inline_extent(struct btrfs_trans_handle *trans,
129 struct btrfs_path *path, int extent_inserted,
130 struct btrfs_root *root, struct inode *inode,
131 u64 start, size_t size, size_t compressed_size,
132 int compress_type,
133 struct page **compressed_pages)
134{
135 struct extent_buffer *leaf;
136 struct page *page = NULL;
137 char *kaddr;
138 unsigned long ptr;
139 struct btrfs_file_extent_item *ei;
140 int err = 0;
141 int ret;
142 size_t cur_size = size;
143 unsigned long offset;
144
145 if (compressed_size && compressed_pages)
146 cur_size = compressed_size;
147
148 inode_add_bytes(inode, size);
149
150 if (!extent_inserted) {
151 struct btrfs_key key;
152 size_t datasize;
153
154 key.objectid = btrfs_ino(inode);
155 key.offset = start;
156 key.type = BTRFS_EXTENT_DATA_KEY;
157
158 datasize = btrfs_file_extent_calc_inline_size(cur_size);
159 path->leave_spinning = 1;
160 ret = btrfs_insert_empty_item(trans, root, path, &key,
161 datasize);
162 if (ret) {
163 err = ret;
164 goto fail;
165 }
166 }
167 leaf = path->nodes[0];
168 ei = btrfs_item_ptr(leaf, path->slots[0],
169 struct btrfs_file_extent_item);
170 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
171 btrfs_set_file_extent_type(leaf, ei, BTRFS_FILE_EXTENT_INLINE);
172 btrfs_set_file_extent_encryption(leaf, ei, 0);
173 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
174 btrfs_set_file_extent_ram_bytes(leaf, ei, size);
175 ptr = btrfs_file_extent_inline_start(ei);
176
177 if (compress_type != BTRFS_COMPRESS_NONE) {
178 struct page *cpage;
179 int i = 0;
180 while (compressed_size > 0) {
181 cpage = compressed_pages[i];
182 cur_size = min_t(unsigned long, compressed_size,
183 PAGE_CACHE_SIZE);
184
185 kaddr = kmap_atomic(cpage);
186 write_extent_buffer(leaf, kaddr, ptr, cur_size);
187 kunmap_atomic(kaddr);
188
189 i++;
190 ptr += cur_size;
191 compressed_size -= cur_size;
192 }
193 btrfs_set_file_extent_compression(leaf, ei,
194 compress_type);
195 } else {
196 page = find_get_page(inode->i_mapping,
197 start >> PAGE_CACHE_SHIFT);
198 btrfs_set_file_extent_compression(leaf, ei, 0);
199 kaddr = kmap_atomic(page);
200 offset = start & (PAGE_CACHE_SIZE - 1);
201 write_extent_buffer(leaf, kaddr + offset, ptr, size);
202 kunmap_atomic(kaddr);
203 page_cache_release(page);
204 }
205 btrfs_mark_buffer_dirty(leaf);
206 btrfs_release_path(path);
207
208
209
210
211
212
213
214
215
216
217 BTRFS_I(inode)->disk_i_size = inode->i_size;
218 ret = btrfs_update_inode(trans, root, inode);
219
220 return ret;
221fail:
222 return err;
223}
224
225
226
227
228
229
230
231static noinline int cow_file_range_inline(struct btrfs_root *root,
232 struct inode *inode, u64 start,
233 u64 end, size_t compressed_size,
234 int compress_type,
235 struct page **compressed_pages)
236{
237 struct btrfs_trans_handle *trans;
238 u64 isize = i_size_read(inode);
239 u64 actual_end = min(end + 1, isize);
240 u64 inline_len = actual_end - start;
241 u64 aligned_end = ALIGN(end, root->sectorsize);
242 u64 data_len = inline_len;
243 int ret;
244 struct btrfs_path *path;
245 int extent_inserted = 0;
246 u32 extent_item_size;
247
248 if (compressed_size)
249 data_len = compressed_size;
250
251 if (start > 0 ||
252 actual_end > PAGE_CACHE_SIZE ||
253 data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
254 (!compressed_size &&
255 (actual_end & (root->sectorsize - 1)) == 0) ||
256 end + 1 < isize ||
257 data_len > root->fs_info->max_inline) {
258 return 1;
259 }
260
261 path = btrfs_alloc_path();
262 if (!path)
263 return -ENOMEM;
264
265 trans = btrfs_join_transaction(root);
266 if (IS_ERR(trans)) {
267 btrfs_free_path(path);
268 return PTR_ERR(trans);
269 }
270 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
271
272 if (compressed_size && compressed_pages)
273 extent_item_size = btrfs_file_extent_calc_inline_size(
274 compressed_size);
275 else
276 extent_item_size = btrfs_file_extent_calc_inline_size(
277 inline_len);
278
279 ret = __btrfs_drop_extents(trans, root, inode, path,
280 start, aligned_end, NULL,
281 1, 1, extent_item_size, &extent_inserted);
282 if (ret) {
283 btrfs_abort_transaction(trans, root, ret);
284 goto out;
285 }
286
287 if (isize > actual_end)
288 inline_len = min_t(u64, isize, actual_end);
289 ret = insert_inline_extent(trans, path, extent_inserted,
290 root, inode, start,
291 inline_len, compressed_size,
292 compress_type, compressed_pages);
293 if (ret && ret != -ENOSPC) {
294 btrfs_abort_transaction(trans, root, ret);
295 goto out;
296 } else if (ret == -ENOSPC) {
297 ret = 1;
298 goto out;
299 }
300
301 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
302 btrfs_delalloc_release_metadata(inode, end + 1 - start);
303 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
304out:
305 btrfs_free_path(path);
306 btrfs_end_transaction(trans, root);
307 return ret;
308}
309
310struct async_extent {
311 u64 start;
312 u64 ram_size;
313 u64 compressed_size;
314 struct page **pages;
315 unsigned long nr_pages;
316 int compress_type;
317 struct list_head list;
318};
319
320struct async_cow {
321 struct inode *inode;
322 struct btrfs_root *root;
323 struct page *locked_page;
324 u64 start;
325 u64 end;
326 struct list_head extents;
327 struct btrfs_work work;
328};
329
330static noinline int add_async_extent(struct async_cow *cow,
331 u64 start, u64 ram_size,
332 u64 compressed_size,
333 struct page **pages,
334 unsigned long nr_pages,
335 int compress_type)
336{
337 struct async_extent *async_extent;
338
339 async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
340 BUG_ON(!async_extent);
341 async_extent->start = start;
342 async_extent->ram_size = ram_size;
343 async_extent->compressed_size = compressed_size;
344 async_extent->pages = pages;
345 async_extent->nr_pages = nr_pages;
346 async_extent->compress_type = compress_type;
347 list_add_tail(&async_extent->list, &cow->extents);
348 return 0;
349}
350
351static inline int inode_need_compress(struct inode *inode)
352{
353 struct btrfs_root *root = BTRFS_I(inode)->root;
354
355
356 if (btrfs_test_opt(root, FORCE_COMPRESS))
357 return 1;
358
359 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
360 return 0;
361 if (btrfs_test_opt(root, COMPRESS) ||
362 BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
363 BTRFS_I(inode)->force_compress)
364 return 1;
365 return 0;
366}
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385static noinline int compress_file_range(struct inode *inode,
386 struct page *locked_page,
387 u64 start, u64 end,
388 struct async_cow *async_cow,
389 int *num_added)
390{
391 struct btrfs_root *root = BTRFS_I(inode)->root;
392 u64 num_bytes;
393 u64 blocksize = root->sectorsize;
394 u64 actual_end;
395 u64 isize = i_size_read(inode);
396 int ret = 0;
397 struct page **pages = NULL;
398 unsigned long nr_pages;
399 unsigned long nr_pages_ret = 0;
400 unsigned long total_compressed = 0;
401 unsigned long total_in = 0;
402 unsigned long max_compressed = 128 * 1024;
403 unsigned long max_uncompressed = 128 * 1024;
404 int i;
405 int will_compress;
406 int compress_type = root->fs_info->compress_type;
407 int redirty = 0;
408
409
410 if ((end - start + 1) < 16 * 1024 &&
411 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
412 btrfs_add_inode_defrag(NULL, inode);
413
414
415
416
417
418 if ((end - start + 1) <= blocksize &&
419 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
420 goto cleanup_and_bail_uncompressed;
421
422 actual_end = min_t(u64, isize, end + 1);
423again:
424 will_compress = 0;
425 nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
426 nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
427
428
429
430
431
432
433
434
435
436
437
438 if (actual_end <= start)
439 goto cleanup_and_bail_uncompressed;
440
441 total_compressed = actual_end - start;
442
443
444
445
446
447
448
449
450
451
452
453 total_compressed = min(total_compressed, max_uncompressed);
454 num_bytes = ALIGN(end - start + 1, blocksize);
455 num_bytes = max(blocksize, num_bytes);
456 total_in = 0;
457 ret = 0;
458
459
460
461
462
463
464 if (inode_need_compress(inode)) {
465 WARN_ON(pages);
466 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
467 if (!pages) {
468
469 goto cont;
470 }
471
472 if (BTRFS_I(inode)->force_compress)
473 compress_type = BTRFS_I(inode)->force_compress;
474
475
476
477
478
479
480
481
482
483
484 extent_range_clear_dirty_for_io(inode, start, end);
485 redirty = 1;
486 ret = btrfs_compress_pages(compress_type,
487 inode->i_mapping, start,
488 total_compressed, pages,
489 nr_pages, &nr_pages_ret,
490 &total_in,
491 &total_compressed,
492 max_compressed);
493
494 if (!ret) {
495 unsigned long offset = total_compressed &
496 (PAGE_CACHE_SIZE - 1);
497 struct page *page = pages[nr_pages_ret - 1];
498 char *kaddr;
499
500
501
502
503 if (offset) {
504 kaddr = kmap_atomic(page);
505 memset(kaddr + offset, 0,
506 PAGE_CACHE_SIZE - offset);
507 kunmap_atomic(kaddr);
508 }
509 will_compress = 1;
510 }
511 }
512cont:
513 if (start == 0) {
514
515 if (ret || total_in < (actual_end - start)) {
516
517
518
519 ret = cow_file_range_inline(root, inode, start, end,
520 0, 0, NULL);
521 } else {
522
523 ret = cow_file_range_inline(root, inode, start, end,
524 total_compressed,
525 compress_type, pages);
526 }
527 if (ret <= 0) {
528 unsigned long clear_flags = EXTENT_DELALLOC |
529 EXTENT_DEFRAG;
530 clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
531
532
533
534
535
536
537 extent_clear_unlock_delalloc(inode, start, end, NULL,
538 clear_flags, PAGE_UNLOCK |
539 PAGE_CLEAR_DIRTY |
540 PAGE_SET_WRITEBACK |
541 PAGE_END_WRITEBACK);
542 goto free_pages_out;
543 }
544 }
545
546 if (will_compress) {
547
548
549
550
551
552 total_compressed = ALIGN(total_compressed, blocksize);
553
554
555
556
557
558 total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
559 if (total_compressed >= total_in) {
560 will_compress = 0;
561 } else {
562 num_bytes = total_in;
563 }
564 }
565 if (!will_compress && pages) {
566
567
568
569
570 for (i = 0; i < nr_pages_ret; i++) {
571 WARN_ON(pages[i]->mapping);
572 page_cache_release(pages[i]);
573 }
574 kfree(pages);
575 pages = NULL;
576 total_compressed = 0;
577 nr_pages_ret = 0;
578
579
580 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
581 !(BTRFS_I(inode)->force_compress)) {
582 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
583 }
584 }
585 if (will_compress) {
586 *num_added += 1;
587
588
589
590
591
592 add_async_extent(async_cow, start, num_bytes,
593 total_compressed, pages, nr_pages_ret,
594 compress_type);
595
596 if (start + num_bytes < end) {
597 start += num_bytes;
598 pages = NULL;
599 cond_resched();
600 goto again;
601 }
602 } else {
603cleanup_and_bail_uncompressed:
604
605
606
607
608
609
610
611 if (page_offset(locked_page) >= start &&
612 page_offset(locked_page) <= end) {
613 __set_page_dirty_nobuffers(locked_page);
614
615 }
616 if (redirty)
617 extent_range_redirty_for_io(inode, start, end);
618 add_async_extent(async_cow, start, end - start + 1,
619 0, NULL, 0, BTRFS_COMPRESS_NONE);
620 *num_added += 1;
621 }
622
623out:
624 return ret;
625
626free_pages_out:
627 for (i = 0; i < nr_pages_ret; i++) {
628 WARN_ON(pages[i]->mapping);
629 page_cache_release(pages[i]);
630 }
631 kfree(pages);
632
633 goto out;
634}
635
636
637
638
639
640
641
642static noinline int submit_compressed_extents(struct inode *inode,
643 struct async_cow *async_cow)
644{
645 struct async_extent *async_extent;
646 u64 alloc_hint = 0;
647 struct btrfs_key ins;
648 struct extent_map *em;
649 struct btrfs_root *root = BTRFS_I(inode)->root;
650 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
651 struct extent_io_tree *io_tree;
652 int ret = 0;
653
654 if (list_empty(&async_cow->extents))
655 return 0;
656
657again:
658 while (!list_empty(&async_cow->extents)) {
659 async_extent = list_entry(async_cow->extents.next,
660 struct async_extent, list);
661 list_del(&async_extent->list);
662
663 io_tree = &BTRFS_I(inode)->io_tree;
664
665retry:
666
667 if (!async_extent->pages) {
668 int page_started = 0;
669 unsigned long nr_written = 0;
670
671 lock_extent(io_tree, async_extent->start,
672 async_extent->start +
673 async_extent->ram_size - 1);
674
675
676 ret = cow_file_range(inode, async_cow->locked_page,
677 async_extent->start,
678 async_extent->start +
679 async_extent->ram_size - 1,
680 &page_started, &nr_written, 0);
681
682
683
684
685
686
687
688
689
690 if (!page_started && !ret)
691 extent_write_locked_range(io_tree,
692 inode, async_extent->start,
693 async_extent->start +
694 async_extent->ram_size - 1,
695 btrfs_get_extent,
696 WB_SYNC_ALL);
697 else if (ret)
698 unlock_page(async_cow->locked_page);
699 kfree(async_extent);
700 cond_resched();
701 continue;
702 }
703
704 lock_extent(io_tree, async_extent->start,
705 async_extent->start + async_extent->ram_size - 1);
706
707 ret = btrfs_reserve_extent(root,
708 async_extent->compressed_size,
709 async_extent->compressed_size,
710 0, alloc_hint, &ins, 1, 1);
711 if (ret) {
712 int i;
713
714 for (i = 0; i < async_extent->nr_pages; i++) {
715 WARN_ON(async_extent->pages[i]->mapping);
716 page_cache_release(async_extent->pages[i]);
717 }
718 kfree(async_extent->pages);
719 async_extent->nr_pages = 0;
720 async_extent->pages = NULL;
721
722 if (ret == -ENOSPC) {
723 unlock_extent(io_tree, async_extent->start,
724 async_extent->start +
725 async_extent->ram_size - 1);
726
727
728
729
730
731
732
733 extent_range_redirty_for_io(inode,
734 async_extent->start,
735 async_extent->start +
736 async_extent->ram_size - 1);
737
738 goto retry;
739 }
740 goto out_free;
741 }
742
743
744
745
746
747 btrfs_drop_extent_cache(inode, async_extent->start,
748 async_extent->start +
749 async_extent->ram_size - 1, 0);
750
751 em = alloc_extent_map();
752 if (!em) {
753 ret = -ENOMEM;
754 goto out_free_reserve;
755 }
756 em->start = async_extent->start;
757 em->len = async_extent->ram_size;
758 em->orig_start = em->start;
759 em->mod_start = em->start;
760 em->mod_len = em->len;
761
762 em->block_start = ins.objectid;
763 em->block_len = ins.offset;
764 em->orig_block_len = ins.offset;
765 em->ram_bytes = async_extent->ram_size;
766 em->bdev = root->fs_info->fs_devices->latest_bdev;
767 em->compress_type = async_extent->compress_type;
768 set_bit(EXTENT_FLAG_PINNED, &em->flags);
769 set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
770 em->generation = -1;
771
772 while (1) {
773 write_lock(&em_tree->lock);
774 ret = add_extent_mapping(em_tree, em, 1);
775 write_unlock(&em_tree->lock);
776 if (ret != -EEXIST) {
777 free_extent_map(em);
778 break;
779 }
780 btrfs_drop_extent_cache(inode, async_extent->start,
781 async_extent->start +
782 async_extent->ram_size - 1, 0);
783 }
784
785 if (ret)
786 goto out_free_reserve;
787
788 ret = btrfs_add_ordered_extent_compress(inode,
789 async_extent->start,
790 ins.objectid,
791 async_extent->ram_size,
792 ins.offset,
793 BTRFS_ORDERED_COMPRESSED,
794 async_extent->compress_type);
795 if (ret) {
796 btrfs_drop_extent_cache(inode, async_extent->start,
797 async_extent->start +
798 async_extent->ram_size - 1, 0);
799 goto out_free_reserve;
800 }
801
802
803
804
805 extent_clear_unlock_delalloc(inode, async_extent->start,
806 async_extent->start +
807 async_extent->ram_size - 1,
808 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
809 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
810 PAGE_SET_WRITEBACK);
811 ret = btrfs_submit_compressed_write(inode,
812 async_extent->start,
813 async_extent->ram_size,
814 ins.objectid,
815 ins.offset, async_extent->pages,
816 async_extent->nr_pages);
817 alloc_hint = ins.objectid + ins.offset;
818 kfree(async_extent);
819 if (ret)
820 goto out;
821 cond_resched();
822 }
823 ret = 0;
824out:
825 return ret;
826out_free_reserve:
827 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
828out_free:
829 extent_clear_unlock_delalloc(inode, async_extent->start,
830 async_extent->start +
831 async_extent->ram_size - 1,
832 NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
833 EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
834 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
835 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
836 kfree(async_extent);
837 goto again;
838}
839
840static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
841 u64 num_bytes)
842{
843 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
844 struct extent_map *em;
845 u64 alloc_hint = 0;
846
847 read_lock(&em_tree->lock);
848 em = search_extent_mapping(em_tree, start, num_bytes);
849 if (em) {
850
851
852
853
854
855 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
856 free_extent_map(em);
857 em = search_extent_mapping(em_tree, 0, 0);
858 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
859 alloc_hint = em->block_start;
860 if (em)
861 free_extent_map(em);
862 } else {
863 alloc_hint = em->block_start;
864 free_extent_map(em);
865 }
866 }
867 read_unlock(&em_tree->lock);
868
869 return alloc_hint;
870}
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885static noinline int cow_file_range(struct inode *inode,
886 struct page *locked_page,
887 u64 start, u64 end, int *page_started,
888 unsigned long *nr_written,
889 int unlock)
890{
891 struct btrfs_root *root = BTRFS_I(inode)->root;
892 u64 alloc_hint = 0;
893 u64 num_bytes;
894 unsigned long ram_size;
895 u64 disk_num_bytes;
896 u64 cur_alloc_size;
897 u64 blocksize = root->sectorsize;
898 struct btrfs_key ins;
899 struct extent_map *em;
900 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
901 int ret = 0;
902
903 if (btrfs_is_free_space_inode(inode)) {
904 WARN_ON_ONCE(1);
905 ret = -EINVAL;
906 goto out_unlock;
907 }
908
909 num_bytes = ALIGN(end - start + 1, blocksize);
910 num_bytes = max(blocksize, num_bytes);
911 disk_num_bytes = num_bytes;
912
913
914 if (num_bytes < 64 * 1024 &&
915 (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
916 btrfs_add_inode_defrag(NULL, inode);
917
918 if (start == 0) {
919
920 ret = cow_file_range_inline(root, inode, start, end, 0, 0,
921 NULL);
922 if (ret == 0) {
923 extent_clear_unlock_delalloc(inode, start, end, NULL,
924 EXTENT_LOCKED | EXTENT_DELALLOC |
925 EXTENT_DEFRAG, PAGE_UNLOCK |
926 PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
927 PAGE_END_WRITEBACK);
928
929 *nr_written = *nr_written +
930 (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
931 *page_started = 1;
932 goto out;
933 } else if (ret < 0) {
934 goto out_unlock;
935 }
936 }
937
938 BUG_ON(disk_num_bytes >
939 btrfs_super_total_bytes(root->fs_info->super_copy));
940
941 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
942 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
943
944 while (disk_num_bytes > 0) {
945 unsigned long op;
946
947 cur_alloc_size = disk_num_bytes;
948 ret = btrfs_reserve_extent(root, cur_alloc_size,
949 root->sectorsize, 0, alloc_hint,
950 &ins, 1, 1);
951 if (ret < 0)
952 goto out_unlock;
953
954 em = alloc_extent_map();
955 if (!em) {
956 ret = -ENOMEM;
957 goto out_reserve;
958 }
959 em->start = start;
960 em->orig_start = em->start;
961 ram_size = ins.offset;
962 em->len = ins.offset;
963 em->mod_start = em->start;
964 em->mod_len = em->len;
965
966 em->block_start = ins.objectid;
967 em->block_len = ins.offset;
968 em->orig_block_len = ins.offset;
969 em->ram_bytes = ram_size;
970 em->bdev = root->fs_info->fs_devices->latest_bdev;
971 set_bit(EXTENT_FLAG_PINNED, &em->flags);
972 em->generation = -1;
973
974 while (1) {
975 write_lock(&em_tree->lock);
976 ret = add_extent_mapping(em_tree, em, 1);
977 write_unlock(&em_tree->lock);
978 if (ret != -EEXIST) {
979 free_extent_map(em);
980 break;
981 }
982 btrfs_drop_extent_cache(inode, start,
983 start + ram_size - 1, 0);
984 }
985 if (ret)
986 goto out_reserve;
987
988 cur_alloc_size = ins.offset;
989 ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
990 ram_size, cur_alloc_size, 0);
991 if (ret)
992 goto out_drop_extent_cache;
993
994 if (root->root_key.objectid ==
995 BTRFS_DATA_RELOC_TREE_OBJECTID) {
996 ret = btrfs_reloc_clone_csums(inode, start,
997 cur_alloc_size);
998 if (ret)
999 goto out_drop_extent_cache;
1000 }
1001
1002 if (disk_num_bytes < cur_alloc_size)
1003 break;
1004
1005
1006
1007
1008
1009
1010
1011
1012 op = unlock ? PAGE_UNLOCK : 0;
1013 op |= PAGE_SET_PRIVATE2;
1014
1015 extent_clear_unlock_delalloc(inode, start,
1016 start + ram_size - 1, locked_page,
1017 EXTENT_LOCKED | EXTENT_DELALLOC,
1018 op);
1019 disk_num_bytes -= cur_alloc_size;
1020 num_bytes -= cur_alloc_size;
1021 alloc_hint = ins.objectid + ins.offset;
1022 start += cur_alloc_size;
1023 }
1024out:
1025 return ret;
1026
1027out_drop_extent_cache:
1028 btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
1029out_reserve:
1030 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
1031out_unlock:
1032 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1033 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
1034 EXTENT_DELALLOC | EXTENT_DEFRAG,
1035 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
1036 PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
1037 goto out;
1038}
1039
1040
1041
1042
1043static noinline void async_cow_start(struct btrfs_work *work)
1044{
1045 struct async_cow *async_cow;
1046 int num_added = 0;
1047 async_cow = container_of(work, struct async_cow, work);
1048
1049 compress_file_range(async_cow->inode, async_cow->locked_page,
1050 async_cow->start, async_cow->end, async_cow,
1051 &num_added);
1052 if (num_added == 0) {
1053 btrfs_add_delayed_iput(async_cow->inode);
1054 async_cow->inode = NULL;
1055 }
1056}
1057
1058
1059
1060
1061static noinline void async_cow_submit(struct btrfs_work *work)
1062{
1063 struct async_cow *async_cow;
1064 struct btrfs_root *root;
1065 unsigned long nr_pages;
1066
1067 async_cow = container_of(work, struct async_cow, work);
1068
1069 root = async_cow->root;
1070 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1071 PAGE_CACHE_SHIFT;
1072
1073 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1074 5 * 1024 * 1024 &&
1075 waitqueue_active(&root->fs_info->async_submit_wait))
1076 wake_up(&root->fs_info->async_submit_wait);
1077
1078 if (async_cow->inode)
1079 submit_compressed_extents(async_cow->inode, async_cow);
1080}
1081
1082static noinline void async_cow_free(struct btrfs_work *work)
1083{
1084 struct async_cow *async_cow;
1085 async_cow = container_of(work, struct async_cow, work);
1086 if (async_cow->inode)
1087 btrfs_add_delayed_iput(async_cow->inode);
1088 kfree(async_cow);
1089}
1090
1091static int cow_file_range_async(struct inode *inode, struct page *locked_page,
1092 u64 start, u64 end, int *page_started,
1093 unsigned long *nr_written)
1094{
1095 struct async_cow *async_cow;
1096 struct btrfs_root *root = BTRFS_I(inode)->root;
1097 unsigned long nr_pages;
1098 u64 cur_end;
1099 int limit = 10 * 1024 * 1024;
1100
1101 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1102 1, 0, NULL, GFP_NOFS);
1103 while (start < end) {
1104 async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
1105 BUG_ON(!async_cow);
1106 async_cow->inode = igrab(inode);
1107 async_cow->root = root;
1108 async_cow->locked_page = locked_page;
1109 async_cow->start = start;
1110
1111 if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
1112 !btrfs_test_opt(root, FORCE_COMPRESS))
1113 cur_end = end;
1114 else
1115 cur_end = min(end, start + 512 * 1024 - 1);
1116
1117 async_cow->end = cur_end;
1118 INIT_LIST_HEAD(&async_cow->extents);
1119
1120 btrfs_init_work(&async_cow->work,
1121 btrfs_delalloc_helper,
1122 async_cow_start, async_cow_submit,
1123 async_cow_free);
1124
1125 nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
1126 PAGE_CACHE_SHIFT;
1127 atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
1128
1129 btrfs_queue_work(root->fs_info->delalloc_workers,
1130 &async_cow->work);
1131
1132 if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
1133 wait_event(root->fs_info->async_submit_wait,
1134 (atomic_read(&root->fs_info->async_delalloc_pages) <
1135 limit));
1136 }
1137
1138 while (atomic_read(&root->fs_info->async_submit_draining) &&
1139 atomic_read(&root->fs_info->async_delalloc_pages)) {
1140 wait_event(root->fs_info->async_submit_wait,
1141 (atomic_read(&root->fs_info->async_delalloc_pages) ==
1142 0));
1143 }
1144
1145 *nr_written += nr_pages;
1146 start = cur_end + 1;
1147 }
1148 *page_started = 1;
1149 return 0;
1150}
1151
1152static noinline int csum_exist_in_range(struct btrfs_root *root,
1153 u64 bytenr, u64 num_bytes)
1154{
1155 int ret;
1156 struct btrfs_ordered_sum *sums;
1157 LIST_HEAD(list);
1158
1159 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
1160 bytenr + num_bytes - 1, &list, 0);
1161 if (ret == 0 && list_empty(&list))
1162 return 0;
1163
1164 while (!list_empty(&list)) {
1165 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
1166 list_del(&sums->list);
1167 kfree(sums);
1168 }
1169 return 1;
1170}
1171
1172
1173
1174
1175
1176
1177
1178
1179static noinline int run_delalloc_nocow(struct inode *inode,
1180 struct page *locked_page,
1181 u64 start, u64 end, int *page_started, int force,
1182 unsigned long *nr_written)
1183{
1184 struct btrfs_root *root = BTRFS_I(inode)->root;
1185 struct btrfs_trans_handle *trans;
1186 struct extent_buffer *leaf;
1187 struct btrfs_path *path;
1188 struct btrfs_file_extent_item *fi;
1189 struct btrfs_key found_key;
1190 u64 cow_start;
1191 u64 cur_offset;
1192 u64 extent_end;
1193 u64 extent_offset;
1194 u64 disk_bytenr;
1195 u64 num_bytes;
1196 u64 disk_num_bytes;
1197 u64 ram_bytes;
1198 int extent_type;
1199 int ret, err;
1200 int type;
1201 int nocow;
1202 int check_prev = 1;
1203 bool nolock;
1204 u64 ino = btrfs_ino(inode);
1205
1206 path = btrfs_alloc_path();
1207 if (!path) {
1208 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1209 EXTENT_LOCKED | EXTENT_DELALLOC |
1210 EXTENT_DO_ACCOUNTING |
1211 EXTENT_DEFRAG, PAGE_UNLOCK |
1212 PAGE_CLEAR_DIRTY |
1213 PAGE_SET_WRITEBACK |
1214 PAGE_END_WRITEBACK);
1215 return -ENOMEM;
1216 }
1217
1218 nolock = btrfs_is_free_space_inode(inode);
1219
1220 if (nolock)
1221 trans = btrfs_join_transaction_nolock(root);
1222 else
1223 trans = btrfs_join_transaction(root);
1224
1225 if (IS_ERR(trans)) {
1226 extent_clear_unlock_delalloc(inode, start, end, locked_page,
1227 EXTENT_LOCKED | EXTENT_DELALLOC |
1228 EXTENT_DO_ACCOUNTING |
1229 EXTENT_DEFRAG, PAGE_UNLOCK |
1230 PAGE_CLEAR_DIRTY |
1231 PAGE_SET_WRITEBACK |
1232 PAGE_END_WRITEBACK);
1233 btrfs_free_path(path);
1234 return PTR_ERR(trans);
1235 }
1236
1237 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1238
1239 cow_start = (u64)-1;
1240 cur_offset = start;
1241 while (1) {
1242 ret = btrfs_lookup_file_extent(trans, root, path, ino,
1243 cur_offset, 0);
1244 if (ret < 0)
1245 goto error;
1246 if (ret > 0 && path->slots[0] > 0 && check_prev) {
1247 leaf = path->nodes[0];
1248 btrfs_item_key_to_cpu(leaf, &found_key,
1249 path->slots[0] - 1);
1250 if (found_key.objectid == ino &&
1251 found_key.type == BTRFS_EXTENT_DATA_KEY)
1252 path->slots[0]--;
1253 }
1254 check_prev = 0;
1255next_slot:
1256 leaf = path->nodes[0];
1257 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
1258 ret = btrfs_next_leaf(root, path);
1259 if (ret < 0)
1260 goto error;
1261 if (ret > 0)
1262 break;
1263 leaf = path->nodes[0];
1264 }
1265
1266 nocow = 0;
1267 disk_bytenr = 0;
1268 num_bytes = 0;
1269 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1270
1271 if (found_key.objectid > ino ||
1272 found_key.type > BTRFS_EXTENT_DATA_KEY ||
1273 found_key.offset > end)
1274 break;
1275
1276 if (found_key.offset > cur_offset) {
1277 extent_end = found_key.offset;
1278 extent_type = 0;
1279 goto out_check;
1280 }
1281
1282 fi = btrfs_item_ptr(leaf, path->slots[0],
1283 struct btrfs_file_extent_item);
1284 extent_type = btrfs_file_extent_type(leaf, fi);
1285
1286 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
1287 if (extent_type == BTRFS_FILE_EXTENT_REG ||
1288 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1289 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1290 extent_offset = btrfs_file_extent_offset(leaf, fi);
1291 extent_end = found_key.offset +
1292 btrfs_file_extent_num_bytes(leaf, fi);
1293 disk_num_bytes =
1294 btrfs_file_extent_disk_num_bytes(leaf, fi);
1295 if (extent_end <= start) {
1296 path->slots[0]++;
1297 goto next_slot;
1298 }
1299 if (disk_bytenr == 0)
1300 goto out_check;
1301 if (btrfs_file_extent_compression(leaf, fi) ||
1302 btrfs_file_extent_encryption(leaf, fi) ||
1303 btrfs_file_extent_other_encoding(leaf, fi))
1304 goto out_check;
1305 if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
1306 goto out_check;
1307 if (btrfs_extent_readonly(root, disk_bytenr))
1308 goto out_check;
1309 if (btrfs_cross_ref_exist(trans, root, ino,
1310 found_key.offset -
1311 extent_offset, disk_bytenr))
1312 goto out_check;
1313 disk_bytenr += extent_offset;
1314 disk_bytenr += cur_offset - found_key.offset;
1315 num_bytes = min(end + 1, extent_end) - cur_offset;
1316
1317
1318
1319
1320 if (!nolock) {
1321 err = btrfs_start_nocow_write(root);
1322 if (!err)
1323 goto out_check;
1324 }
1325
1326
1327
1328
1329
1330 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
1331 goto out_check;
1332 nocow = 1;
1333 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1334 extent_end = found_key.offset +
1335 btrfs_file_extent_inline_len(leaf,
1336 path->slots[0], fi);
1337 extent_end = ALIGN(extent_end, root->sectorsize);
1338 } else {
1339 BUG_ON(1);
1340 }
1341out_check:
1342 if (extent_end <= start) {
1343 path->slots[0]++;
1344 if (!nolock && nocow)
1345 btrfs_end_nocow_write(root);
1346 goto next_slot;
1347 }
1348 if (!nocow) {
1349 if (cow_start == (u64)-1)
1350 cow_start = cur_offset;
1351 cur_offset = extent_end;
1352 if (cur_offset > end)
1353 break;
1354 path->slots[0]++;
1355 goto next_slot;
1356 }
1357
1358 btrfs_release_path(path);
1359 if (cow_start != (u64)-1) {
1360 ret = cow_file_range(inode, locked_page,
1361 cow_start, found_key.offset - 1,
1362 page_started, nr_written, 1);
1363 if (ret) {
1364 if (!nolock && nocow)
1365 btrfs_end_nocow_write(root);
1366 goto error;
1367 }
1368 cow_start = (u64)-1;
1369 }
1370
1371 if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
1372 struct extent_map *em;
1373 struct extent_map_tree *em_tree;
1374 em_tree = &BTRFS_I(inode)->extent_tree;
1375 em = alloc_extent_map();
1376 BUG_ON(!em);
1377 em->start = cur_offset;
1378 em->orig_start = found_key.offset - extent_offset;
1379 em->len = num_bytes;
1380 em->block_len = num_bytes;
1381 em->block_start = disk_bytenr;
1382 em->orig_block_len = disk_num_bytes;
1383 em->ram_bytes = ram_bytes;
1384 em->bdev = root->fs_info->fs_devices->latest_bdev;
1385 em->mod_start = em->start;
1386 em->mod_len = em->len;
1387 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1388 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1389 em->generation = -1;
1390 while (1) {
1391 write_lock(&em_tree->lock);
1392 ret = add_extent_mapping(em_tree, em, 1);
1393 write_unlock(&em_tree->lock);
1394 if (ret != -EEXIST) {
1395 free_extent_map(em);
1396 break;
1397 }
1398 btrfs_drop_extent_cache(inode, em->start,
1399 em->start + em->len - 1, 0);
1400 }
1401 type = BTRFS_ORDERED_PREALLOC;
1402 } else {
1403 type = BTRFS_ORDERED_NOCOW;
1404 }
1405
1406 ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr,
1407 num_bytes, num_bytes, type);
1408 BUG_ON(ret);
1409
1410 if (root->root_key.objectid ==
1411 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1412 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1413 num_bytes);
1414 if (ret) {
1415 if (!nolock && nocow)
1416 btrfs_end_nocow_write(root);
1417 goto error;
1418 }
1419 }
1420
1421 extent_clear_unlock_delalloc(inode, cur_offset,
1422 cur_offset + num_bytes - 1,
1423 locked_page, EXTENT_LOCKED |
1424 EXTENT_DELALLOC, PAGE_UNLOCK |
1425 PAGE_SET_PRIVATE2);
1426 if (!nolock && nocow)
1427 btrfs_end_nocow_write(root);
1428 cur_offset = extent_end;
1429 if (cur_offset > end)
1430 break;
1431 }
1432 btrfs_release_path(path);
1433
1434 if (cur_offset <= end && cow_start == (u64)-1) {
1435 cow_start = cur_offset;
1436 cur_offset = end;
1437 }
1438
1439 if (cow_start != (u64)-1) {
1440 ret = cow_file_range(inode, locked_page, cow_start, end,
1441 page_started, nr_written, 1);
1442 if (ret)
1443 goto error;
1444 }
1445
1446error:
1447 err = btrfs_end_transaction(trans, root);
1448 if (!ret)
1449 ret = err;
1450
1451 if (ret && cur_offset < end)
1452 extent_clear_unlock_delalloc(inode, cur_offset, end,
1453 locked_page, EXTENT_LOCKED |
1454 EXTENT_DELALLOC | EXTENT_DEFRAG |
1455 EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
1456 PAGE_CLEAR_DIRTY |
1457 PAGE_SET_WRITEBACK |
1458 PAGE_END_WRITEBACK);
1459 btrfs_free_path(path);
1460 return ret;
1461}
1462
1463static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
1464{
1465
1466 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
1467 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
1468 return 0;
1469
1470
1471
1472
1473
1474
1475 if (BTRFS_I(inode)->defrag_bytes &&
1476 test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
1477 EXTENT_DEFRAG, 0, NULL))
1478 return 1;
1479
1480 return 0;
1481}
1482
1483
1484
1485
1486static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1487 u64 start, u64 end, int *page_started,
1488 unsigned long *nr_written)
1489{
1490 int ret;
1491 int force_cow = need_force_cow(inode, start, end);
1492
1493 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
1494 ret = run_delalloc_nocow(inode, locked_page, start, end,
1495 page_started, 1, nr_written);
1496 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1497 ret = run_delalloc_nocow(inode, locked_page, start, end,
1498 page_started, 0, nr_written);
1499 } else if (!inode_need_compress(inode)) {
1500 ret = cow_file_range(inode, locked_page, start, end,
1501 page_started, nr_written, 1);
1502 } else {
1503 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1504 &BTRFS_I(inode)->runtime_flags);
1505 ret = cow_file_range_async(inode, locked_page, start, end,
1506 page_started, nr_written);
1507 }
1508 return ret;
1509}
1510
1511static void btrfs_split_extent_hook(struct inode *inode,
1512 struct extent_state *orig, u64 split)
1513{
1514
1515 if (!(orig->state & EXTENT_DELALLOC))
1516 return;
1517
1518 spin_lock(&BTRFS_I(inode)->lock);
1519 BTRFS_I(inode)->outstanding_extents++;
1520 spin_unlock(&BTRFS_I(inode)->lock);
1521}
1522
1523
1524
1525
1526
1527
1528
1529static void btrfs_merge_extent_hook(struct inode *inode,
1530 struct extent_state *new,
1531 struct extent_state *other)
1532{
1533
1534 if (!(other->state & EXTENT_DELALLOC))
1535 return;
1536
1537 spin_lock(&BTRFS_I(inode)->lock);
1538 BTRFS_I(inode)->outstanding_extents--;
1539 spin_unlock(&BTRFS_I(inode)->lock);
1540}
1541
1542static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
1543 struct inode *inode)
1544{
1545 spin_lock(&root->delalloc_lock);
1546 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1547 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1548 &root->delalloc_inodes);
1549 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1550 &BTRFS_I(inode)->runtime_flags);
1551 root->nr_delalloc_inodes++;
1552 if (root->nr_delalloc_inodes == 1) {
1553 spin_lock(&root->fs_info->delalloc_root_lock);
1554 BUG_ON(!list_empty(&root->delalloc_root));
1555 list_add_tail(&root->delalloc_root,
1556 &root->fs_info->delalloc_roots);
1557 spin_unlock(&root->fs_info->delalloc_root_lock);
1558 }
1559 }
1560 spin_unlock(&root->delalloc_lock);
1561}
1562
1563static void btrfs_del_delalloc_inode(struct btrfs_root *root,
1564 struct inode *inode)
1565{
1566 spin_lock(&root->delalloc_lock);
1567 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1568 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1569 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1570 &BTRFS_I(inode)->runtime_flags);
1571 root->nr_delalloc_inodes--;
1572 if (!root->nr_delalloc_inodes) {
1573 spin_lock(&root->fs_info->delalloc_root_lock);
1574 BUG_ON(list_empty(&root->delalloc_root));
1575 list_del_init(&root->delalloc_root);
1576 spin_unlock(&root->fs_info->delalloc_root_lock);
1577 }
1578 }
1579 spin_unlock(&root->delalloc_lock);
1580}
1581
1582
1583
1584
1585
1586
1587static void btrfs_set_bit_hook(struct inode *inode,
1588 struct extent_state *state, unsigned long *bits)
1589{
1590
1591 if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
1592 WARN_ON(1);
1593
1594
1595
1596
1597
1598 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1599 struct btrfs_root *root = BTRFS_I(inode)->root;
1600 u64 len = state->end + 1 - state->start;
1601 bool do_list = !btrfs_is_free_space_inode(inode);
1602
1603 if (*bits & EXTENT_FIRST_DELALLOC) {
1604 *bits &= ~EXTENT_FIRST_DELALLOC;
1605 } else {
1606 spin_lock(&BTRFS_I(inode)->lock);
1607 BTRFS_I(inode)->outstanding_extents++;
1608 spin_unlock(&BTRFS_I(inode)->lock);
1609 }
1610
1611 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1612 root->fs_info->delalloc_batch);
1613 spin_lock(&BTRFS_I(inode)->lock);
1614 BTRFS_I(inode)->delalloc_bytes += len;
1615 if (*bits & EXTENT_DEFRAG)
1616 BTRFS_I(inode)->defrag_bytes += len;
1617 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1618 &BTRFS_I(inode)->runtime_flags))
1619 btrfs_add_delalloc_inodes(root, inode);
1620 spin_unlock(&BTRFS_I(inode)->lock);
1621 }
1622}
1623
1624
1625
1626
1627static void btrfs_clear_bit_hook(struct inode *inode,
1628 struct extent_state *state,
1629 unsigned long *bits)
1630{
1631 u64 len = state->end + 1 - state->start;
1632
1633 spin_lock(&BTRFS_I(inode)->lock);
1634 if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
1635 BTRFS_I(inode)->defrag_bytes -= len;
1636 spin_unlock(&BTRFS_I(inode)->lock);
1637
1638
1639
1640
1641
1642
1643 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1644 struct btrfs_root *root = BTRFS_I(inode)->root;
1645 bool do_list = !btrfs_is_free_space_inode(inode);
1646
1647 if (*bits & EXTENT_FIRST_DELALLOC) {
1648 *bits &= ~EXTENT_FIRST_DELALLOC;
1649 } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
1650 spin_lock(&BTRFS_I(inode)->lock);
1651 BTRFS_I(inode)->outstanding_extents--;
1652 spin_unlock(&BTRFS_I(inode)->lock);
1653 }
1654
1655
1656
1657
1658
1659
1660 if (*bits & EXTENT_DO_ACCOUNTING &&
1661 root != root->fs_info->tree_root)
1662 btrfs_delalloc_release_metadata(inode, len);
1663
1664 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1665 && do_list && !(state->state & EXTENT_NORESERVE))
1666 btrfs_free_reserved_data_space(inode, len);
1667
1668 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1669 root->fs_info->delalloc_batch);
1670 spin_lock(&BTRFS_I(inode)->lock);
1671 BTRFS_I(inode)->delalloc_bytes -= len;
1672 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1673 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1674 &BTRFS_I(inode)->runtime_flags))
1675 btrfs_del_delalloc_inode(root, inode);
1676 spin_unlock(&BTRFS_I(inode)->lock);
1677 }
1678}
1679
1680
1681
1682
1683
1684int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
1685 size_t size, struct bio *bio,
1686 unsigned long bio_flags)
1687{
1688 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1689 u64 logical = (u64)bio->bi_iter.bi_sector << 9;
1690 u64 length = 0;
1691 u64 map_length;
1692 int ret;
1693
1694 if (bio_flags & EXTENT_BIO_COMPRESSED)
1695 return 0;
1696
1697 length = bio->bi_iter.bi_size;
1698 map_length = length;
1699 ret = btrfs_map_block(root->fs_info, rw, logical,
1700 &map_length, NULL, 0);
1701
1702 BUG_ON(ret < 0);
1703 if (map_length < length + size)
1704 return 1;
1705 return 0;
1706}
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1717 struct bio *bio, int mirror_num,
1718 unsigned long bio_flags,
1719 u64 bio_offset)
1720{
1721 struct btrfs_root *root = BTRFS_I(inode)->root;
1722 int ret = 0;
1723
1724 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1725 BUG_ON(ret);
1726 return 0;
1727}
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1738 int mirror_num, unsigned long bio_flags,
1739 u64 bio_offset)
1740{
1741 struct btrfs_root *root = BTRFS_I(inode)->root;
1742 int ret;
1743
1744 ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
1745 if (ret)
1746 bio_endio(bio, ret);
1747 return ret;
1748}
1749
1750
1751
1752
1753
1754static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1755 int mirror_num, unsigned long bio_flags,
1756 u64 bio_offset)
1757{
1758 struct btrfs_root *root = BTRFS_I(inode)->root;
1759 int ret = 0;
1760 int skip_sum;
1761 int metadata = 0;
1762 int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
1763
1764 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1765
1766 if (btrfs_is_free_space_inode(inode))
1767 metadata = 2;
1768
1769 if (!(rw & REQ_WRITE)) {
1770 ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
1771 if (ret)
1772 goto out;
1773
1774 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1775 ret = btrfs_submit_compressed_read(inode, bio,
1776 mirror_num,
1777 bio_flags);
1778 goto out;
1779 } else if (!skip_sum) {
1780 ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
1781 if (ret)
1782 goto out;
1783 }
1784 goto mapit;
1785 } else if (async && !skip_sum) {
1786
1787 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
1788 goto mapit;
1789
1790 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1791 inode, rw, bio, mirror_num,
1792 bio_flags, bio_offset,
1793 __btrfs_submit_bio_start,
1794 __btrfs_submit_bio_done);
1795 goto out;
1796 } else if (!skip_sum) {
1797 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1798 if (ret)
1799 goto out;
1800 }
1801
1802mapit:
1803 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
1804
1805out:
1806 if (ret < 0)
1807 bio_endio(bio, ret);
1808 return ret;
1809}
1810
1811
1812
1813
1814
1815static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1816 struct inode *inode, u64 file_offset,
1817 struct list_head *list)
1818{
1819 struct btrfs_ordered_sum *sum;
1820
1821 list_for_each_entry(sum, list, list) {
1822 trans->adding_csums = 1;
1823 btrfs_csum_file_blocks(trans,
1824 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1825 trans->adding_csums = 0;
1826 }
1827 return 0;
1828}
1829
1830int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1831 struct extent_state **cached_state)
1832{
1833 WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
1834 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1835 cached_state, GFP_NOFS);
1836}
1837
1838
1839struct btrfs_writepage_fixup {
1840 struct page *page;
1841 struct btrfs_work work;
1842};
1843
1844static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1845{
1846 struct btrfs_writepage_fixup *fixup;
1847 struct btrfs_ordered_extent *ordered;
1848 struct extent_state *cached_state = NULL;
1849 struct page *page;
1850 struct inode *inode;
1851 u64 page_start;
1852 u64 page_end;
1853 int ret;
1854
1855 fixup = container_of(work, struct btrfs_writepage_fixup, work);
1856 page = fixup->page;
1857again:
1858 lock_page(page);
1859 if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
1860 ClearPageChecked(page);
1861 goto out_page;
1862 }
1863
1864 inode = page->mapping->host;
1865 page_start = page_offset(page);
1866 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1867
1868 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1869 &cached_state);
1870
1871
1872 if (PagePrivate2(page))
1873 goto out;
1874
1875 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1876 if (ordered) {
1877 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1878 page_end, &cached_state, GFP_NOFS);
1879 unlock_page(page);
1880 btrfs_start_ordered_extent(inode, ordered, 1);
1881 btrfs_put_ordered_extent(ordered);
1882 goto again;
1883 }
1884
1885 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
1886 if (ret) {
1887 mapping_set_error(page->mapping, ret);
1888 end_extent_writepage(page, ret, page_start, page_end);
1889 ClearPageChecked(page);
1890 goto out;
1891 }
1892
1893 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1894 ClearPageChecked(page);
1895 set_page_dirty(page);
1896out:
1897 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1898 &cached_state, GFP_NOFS);
1899out_page:
1900 unlock_page(page);
1901 page_cache_release(page);
1902 kfree(fixup);
1903}
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
1917{
1918 struct inode *inode = page->mapping->host;
1919 struct btrfs_writepage_fixup *fixup;
1920 struct btrfs_root *root = BTRFS_I(inode)->root;
1921
1922
1923 if (TestClearPagePrivate2(page))
1924 return 0;
1925
1926 if (PageChecked(page))
1927 return -EAGAIN;
1928
1929 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
1930 if (!fixup)
1931 return -EAGAIN;
1932
1933 SetPageChecked(page);
1934 page_cache_get(page);
1935 btrfs_init_work(&fixup->work, btrfs_fixup_helper,
1936 btrfs_writepage_fixup_worker, NULL, NULL);
1937 fixup->page = page;
1938 btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
1939 return -EBUSY;
1940}
1941
1942static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1943 struct inode *inode, u64 file_pos,
1944 u64 disk_bytenr, u64 disk_num_bytes,
1945 u64 num_bytes, u64 ram_bytes,
1946 u8 compression, u8 encryption,
1947 u16 other_encoding, int extent_type)
1948{
1949 struct btrfs_root *root = BTRFS_I(inode)->root;
1950 struct btrfs_file_extent_item *fi;
1951 struct btrfs_path *path;
1952 struct extent_buffer *leaf;
1953 struct btrfs_key ins;
1954 int extent_inserted = 0;
1955 int ret;
1956
1957 path = btrfs_alloc_path();
1958 if (!path)
1959 return -ENOMEM;
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
1971 file_pos + num_bytes, NULL, 0,
1972 1, sizeof(*fi), &extent_inserted);
1973 if (ret)
1974 goto out;
1975
1976 if (!extent_inserted) {
1977 ins.objectid = btrfs_ino(inode);
1978 ins.offset = file_pos;
1979 ins.type = BTRFS_EXTENT_DATA_KEY;
1980
1981 path->leave_spinning = 1;
1982 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1983 sizeof(*fi));
1984 if (ret)
1985 goto out;
1986 }
1987 leaf = path->nodes[0];
1988 fi = btrfs_item_ptr(leaf, path->slots[0],
1989 struct btrfs_file_extent_item);
1990 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1991 btrfs_set_file_extent_type(leaf, fi, extent_type);
1992 btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
1993 btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
1994 btrfs_set_file_extent_offset(leaf, fi, 0);
1995 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
1996 btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
1997 btrfs_set_file_extent_compression(leaf, fi, compression);
1998 btrfs_set_file_extent_encryption(leaf, fi, encryption);
1999 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
2000
2001 btrfs_mark_buffer_dirty(leaf);
2002 btrfs_release_path(path);
2003
2004 inode_add_bytes(inode, num_bytes);
2005
2006 ins.objectid = disk_bytenr;
2007 ins.offset = disk_num_bytes;
2008 ins.type = BTRFS_EXTENT_ITEM_KEY;
2009 ret = btrfs_alloc_reserved_file_extent(trans, root,
2010 root->root_key.objectid,
2011 btrfs_ino(inode), file_pos, &ins);
2012out:
2013 btrfs_free_path(path);
2014
2015 return ret;
2016}
2017
2018
2019struct sa_defrag_extent_backref {
2020 struct rb_node node;
2021 struct old_sa_defrag_extent *old;
2022 u64 root_id;
2023 u64 inum;
2024 u64 file_pos;
2025 u64 extent_offset;
2026 u64 num_bytes;
2027 u64 generation;
2028};
2029
2030struct old_sa_defrag_extent {
2031 struct list_head list;
2032 struct new_sa_defrag_extent *new;
2033
2034 u64 extent_offset;
2035 u64 bytenr;
2036 u64 offset;
2037 u64 len;
2038 int count;
2039};
2040
2041struct new_sa_defrag_extent {
2042 struct rb_root root;
2043 struct list_head head;
2044 struct btrfs_path *path;
2045 struct inode *inode;
2046 u64 file_pos;
2047 u64 len;
2048 u64 bytenr;
2049 u64 disk_len;
2050 u8 compress_type;
2051};
2052
2053static int backref_comp(struct sa_defrag_extent_backref *b1,
2054 struct sa_defrag_extent_backref *b2)
2055{
2056 if (b1->root_id < b2->root_id)
2057 return -1;
2058 else if (b1->root_id > b2->root_id)
2059 return 1;
2060
2061 if (b1->inum < b2->inum)
2062 return -1;
2063 else if (b1->inum > b2->inum)
2064 return 1;
2065
2066 if (b1->file_pos < b2->file_pos)
2067 return -1;
2068 else if (b1->file_pos > b2->file_pos)
2069 return 1;
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083 return 0;
2084}
2085
2086static void backref_insert(struct rb_root *root,
2087 struct sa_defrag_extent_backref *backref)
2088{
2089 struct rb_node **p = &root->rb_node;
2090 struct rb_node *parent = NULL;
2091 struct sa_defrag_extent_backref *entry;
2092 int ret;
2093
2094 while (*p) {
2095 parent = *p;
2096 entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
2097
2098 ret = backref_comp(backref, entry);
2099 if (ret < 0)
2100 p = &(*p)->rb_left;
2101 else
2102 p = &(*p)->rb_right;
2103 }
2104
2105 rb_link_node(&backref->node, parent, p);
2106 rb_insert_color(&backref->node, root);
2107}
2108
2109
2110
2111
2112static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
2113 void *ctx)
2114{
2115 struct btrfs_file_extent_item *extent;
2116 struct btrfs_fs_info *fs_info;
2117 struct old_sa_defrag_extent *old = ctx;
2118 struct new_sa_defrag_extent *new = old->new;
2119 struct btrfs_path *path = new->path;
2120 struct btrfs_key key;
2121 struct btrfs_root *root;
2122 struct sa_defrag_extent_backref *backref;
2123 struct extent_buffer *leaf;
2124 struct inode *inode = new->inode;
2125 int slot;
2126 int ret;
2127 u64 extent_offset;
2128 u64 num_bytes;
2129
2130 if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
2131 inum == btrfs_ino(inode))
2132 return 0;
2133
2134 key.objectid = root_id;
2135 key.type = BTRFS_ROOT_ITEM_KEY;
2136 key.offset = (u64)-1;
2137
2138 fs_info = BTRFS_I(inode)->root->fs_info;
2139 root = btrfs_read_fs_root_no_name(fs_info, &key);
2140 if (IS_ERR(root)) {
2141 if (PTR_ERR(root) == -ENOENT)
2142 return 0;
2143 WARN_ON(1);
2144 pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
2145 inum, offset, root_id);
2146 return PTR_ERR(root);
2147 }
2148
2149 key.objectid = inum;
2150 key.type = BTRFS_EXTENT_DATA_KEY;
2151 if (offset > (u64)-1 << 32)
2152 key.offset = 0;
2153 else
2154 key.offset = offset;
2155
2156 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2157 if (WARN_ON(ret < 0))
2158 return ret;
2159 ret = 0;
2160
2161 while (1) {
2162 cond_resched();
2163
2164 leaf = path->nodes[0];
2165 slot = path->slots[0];
2166
2167 if (slot >= btrfs_header_nritems(leaf)) {
2168 ret = btrfs_next_leaf(root, path);
2169 if (ret < 0) {
2170 goto out;
2171 } else if (ret > 0) {
2172 ret = 0;
2173 goto out;
2174 }
2175 continue;
2176 }
2177
2178 path->slots[0]++;
2179
2180 btrfs_item_key_to_cpu(leaf, &key, slot);
2181
2182 if (key.objectid > inum)
2183 goto out;
2184
2185 if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
2186 continue;
2187
2188 extent = btrfs_item_ptr(leaf, slot,
2189 struct btrfs_file_extent_item);
2190
2191 if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
2192 continue;
2193
2194
2195
2196
2197
2198
2199 if (key.offset != offset)
2200 continue;
2201
2202 extent_offset = btrfs_file_extent_offset(leaf, extent);
2203 num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
2204
2205 if (extent_offset >= old->extent_offset + old->offset +
2206 old->len || extent_offset + num_bytes <=
2207 old->extent_offset + old->offset)
2208 continue;
2209 break;
2210 }
2211
2212 backref = kmalloc(sizeof(*backref), GFP_NOFS);
2213 if (!backref) {
2214 ret = -ENOENT;
2215 goto out;
2216 }
2217
2218 backref->root_id = root_id;
2219 backref->inum = inum;
2220 backref->file_pos = offset;
2221 backref->num_bytes = num_bytes;
2222 backref->extent_offset = extent_offset;
2223 backref->generation = btrfs_file_extent_generation(leaf, extent);
2224 backref->old = old;
2225 backref_insert(&new->root, backref);
2226 old->count++;
2227out:
2228 btrfs_release_path(path);
2229 WARN_ON(ret);
2230 return ret;
2231}
2232
2233static noinline bool record_extent_backrefs(struct btrfs_path *path,
2234 struct new_sa_defrag_extent *new)
2235{
2236 struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
2237 struct old_sa_defrag_extent *old, *tmp;
2238 int ret;
2239
2240 new->path = path;
2241
2242 list_for_each_entry_safe(old, tmp, &new->head, list) {
2243 ret = iterate_inodes_from_logical(old->bytenr +
2244 old->extent_offset, fs_info,
2245 path, record_one_backref,
2246 old);
2247 if (ret < 0 && ret != -ENOENT)
2248 return false;
2249
2250
2251 if (!old->count) {
2252 list_del(&old->list);
2253 kfree(old);
2254 }
2255 }
2256
2257 if (list_empty(&new->head))
2258 return false;
2259
2260 return true;
2261}
2262
2263static int relink_is_mergable(struct extent_buffer *leaf,
2264 struct btrfs_file_extent_item *fi,
2265 struct new_sa_defrag_extent *new)
2266{
2267 if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
2268 return 0;
2269
2270 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2271 return 0;
2272
2273 if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
2274 return 0;
2275
2276 if (btrfs_file_extent_encryption(leaf, fi) ||
2277 btrfs_file_extent_other_encoding(leaf, fi))
2278 return 0;
2279
2280 return 1;
2281}
2282
2283
2284
2285
2286static noinline int relink_extent_backref(struct btrfs_path *path,
2287 struct sa_defrag_extent_backref *prev,
2288 struct sa_defrag_extent_backref *backref)
2289{
2290 struct btrfs_file_extent_item *extent;
2291 struct btrfs_file_extent_item *item;
2292 struct btrfs_ordered_extent *ordered;
2293 struct btrfs_trans_handle *trans;
2294 struct btrfs_fs_info *fs_info;
2295 struct btrfs_root *root;
2296 struct btrfs_key key;
2297 struct extent_buffer *leaf;
2298 struct old_sa_defrag_extent *old = backref->old;
2299 struct new_sa_defrag_extent *new = old->new;
2300 struct inode *src_inode = new->inode;
2301 struct inode *inode;
2302 struct extent_state *cached = NULL;
2303 int ret = 0;
2304 u64 start;
2305 u64 len;
2306 u64 lock_start;
2307 u64 lock_end;
2308 bool merge = false;
2309 int index;
2310
2311 if (prev && prev->root_id == backref->root_id &&
2312 prev->inum == backref->inum &&
2313 prev->file_pos + prev->num_bytes == backref->file_pos)
2314 merge = true;
2315
2316
2317 key.objectid = backref->root_id;
2318 key.type = BTRFS_ROOT_ITEM_KEY;
2319 key.offset = (u64)-1;
2320
2321 fs_info = BTRFS_I(src_inode)->root->fs_info;
2322 index = srcu_read_lock(&fs_info->subvol_srcu);
2323
2324 root = btrfs_read_fs_root_no_name(fs_info, &key);
2325 if (IS_ERR(root)) {
2326 srcu_read_unlock(&fs_info->subvol_srcu, index);
2327 if (PTR_ERR(root) == -ENOENT)
2328 return 0;
2329 return PTR_ERR(root);
2330 }
2331
2332 if (btrfs_root_readonly(root)) {
2333 srcu_read_unlock(&fs_info->subvol_srcu, index);
2334 return 0;
2335 }
2336
2337
2338 key.objectid = backref->inum;
2339 key.type = BTRFS_INODE_ITEM_KEY;
2340 key.offset = 0;
2341
2342 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
2343 if (IS_ERR(inode)) {
2344 srcu_read_unlock(&fs_info->subvol_srcu, index);
2345 return 0;
2346 }
2347
2348 srcu_read_unlock(&fs_info->subvol_srcu, index);
2349
2350
2351 lock_start = backref->file_pos;
2352 lock_end = backref->file_pos + backref->num_bytes - 1;
2353 lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2354 0, &cached);
2355
2356 ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
2357 if (ordered) {
2358 btrfs_put_ordered_extent(ordered);
2359 goto out_unlock;
2360 }
2361
2362 trans = btrfs_join_transaction(root);
2363 if (IS_ERR(trans)) {
2364 ret = PTR_ERR(trans);
2365 goto out_unlock;
2366 }
2367
2368 key.objectid = backref->inum;
2369 key.type = BTRFS_EXTENT_DATA_KEY;
2370 key.offset = backref->file_pos;
2371
2372 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2373 if (ret < 0) {
2374 goto out_free_path;
2375 } else if (ret > 0) {
2376 ret = 0;
2377 goto out_free_path;
2378 }
2379
2380 extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
2381 struct btrfs_file_extent_item);
2382
2383 if (btrfs_file_extent_generation(path->nodes[0], extent) !=
2384 backref->generation)
2385 goto out_free_path;
2386
2387 btrfs_release_path(path);
2388
2389 start = backref->file_pos;
2390 if (backref->extent_offset < old->extent_offset + old->offset)
2391 start += old->extent_offset + old->offset -
2392 backref->extent_offset;
2393
2394 len = min(backref->extent_offset + backref->num_bytes,
2395 old->extent_offset + old->offset + old->len);
2396 len -= max(backref->extent_offset, old->extent_offset + old->offset);
2397
2398 ret = btrfs_drop_extents(trans, root, inode, start,
2399 start + len, 1);
2400 if (ret)
2401 goto out_free_path;
2402again:
2403 key.objectid = btrfs_ino(inode);
2404 key.type = BTRFS_EXTENT_DATA_KEY;
2405 key.offset = start;
2406
2407 path->leave_spinning = 1;
2408 if (merge) {
2409 struct btrfs_file_extent_item *fi;
2410 u64 extent_len;
2411 struct btrfs_key found_key;
2412
2413 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2414 if (ret < 0)
2415 goto out_free_path;
2416
2417 path->slots[0]--;
2418 leaf = path->nodes[0];
2419 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2420
2421 fi = btrfs_item_ptr(leaf, path->slots[0],
2422 struct btrfs_file_extent_item);
2423 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
2424
2425 if (extent_len + found_key.offset == start &&
2426 relink_is_mergable(leaf, fi, new)) {
2427 btrfs_set_file_extent_num_bytes(leaf, fi,
2428 extent_len + len);
2429 btrfs_mark_buffer_dirty(leaf);
2430 inode_add_bytes(inode, len);
2431
2432 ret = 1;
2433 goto out_free_path;
2434 } else {
2435 merge = false;
2436 btrfs_release_path(path);
2437 goto again;
2438 }
2439 }
2440
2441 ret = btrfs_insert_empty_item(trans, root, path, &key,
2442 sizeof(*extent));
2443 if (ret) {
2444 btrfs_abort_transaction(trans, root, ret);
2445 goto out_free_path;
2446 }
2447
2448 leaf = path->nodes[0];
2449 item = btrfs_item_ptr(leaf, path->slots[0],
2450 struct btrfs_file_extent_item);
2451 btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
2452 btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
2453 btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
2454 btrfs_set_file_extent_num_bytes(leaf, item, len);
2455 btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
2456 btrfs_set_file_extent_generation(leaf, item, trans->transid);
2457 btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
2458 btrfs_set_file_extent_compression(leaf, item, new->compress_type);
2459 btrfs_set_file_extent_encryption(leaf, item, 0);
2460 btrfs_set_file_extent_other_encoding(leaf, item, 0);
2461
2462 btrfs_mark_buffer_dirty(leaf);
2463 inode_add_bytes(inode, len);
2464 btrfs_release_path(path);
2465
2466 ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
2467 new->disk_len, 0,
2468 backref->root_id, backref->inum,
2469 new->file_pos, 0);
2470 if (ret) {
2471 btrfs_abort_transaction(trans, root, ret);
2472 goto out_free_path;
2473 }
2474
2475 ret = 1;
2476out_free_path:
2477 btrfs_release_path(path);
2478 path->leave_spinning = 0;
2479 btrfs_end_transaction(trans, root);
2480out_unlock:
2481 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
2482 &cached, GFP_NOFS);
2483 iput(inode);
2484 return ret;
2485}
2486
2487static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
2488{
2489 struct old_sa_defrag_extent *old, *tmp;
2490
2491 if (!new)
2492 return;
2493
2494 list_for_each_entry_safe(old, tmp, &new->head, list) {
2495 list_del(&old->list);
2496 kfree(old);
2497 }
2498 kfree(new);
2499}
2500
2501static void relink_file_extents(struct new_sa_defrag_extent *new)
2502{
2503 struct btrfs_path *path;
2504 struct sa_defrag_extent_backref *backref;
2505 struct sa_defrag_extent_backref *prev = NULL;
2506 struct inode *inode;
2507 struct btrfs_root *root;
2508 struct rb_node *node;
2509 int ret;
2510
2511 inode = new->inode;
2512 root = BTRFS_I(inode)->root;
2513
2514 path = btrfs_alloc_path();
2515 if (!path)
2516 return;
2517
2518 if (!record_extent_backrefs(path, new)) {
2519 btrfs_free_path(path);
2520 goto out;
2521 }
2522 btrfs_release_path(path);
2523
2524 while (1) {
2525 node = rb_first(&new->root);
2526 if (!node)
2527 break;
2528 rb_erase(node, &new->root);
2529
2530 backref = rb_entry(node, struct sa_defrag_extent_backref, node);
2531
2532 ret = relink_extent_backref(path, prev, backref);
2533 WARN_ON(ret < 0);
2534
2535 kfree(prev);
2536
2537 if (ret == 1)
2538 prev = backref;
2539 else
2540 prev = NULL;
2541 cond_resched();
2542 }
2543 kfree(prev);
2544
2545 btrfs_free_path(path);
2546out:
2547 free_sa_defrag_extent(new);
2548
2549 atomic_dec(&root->fs_info->defrag_running);
2550 wake_up(&root->fs_info->transaction_wait);
2551}
2552
2553static struct new_sa_defrag_extent *
2554record_old_file_extents(struct inode *inode,
2555 struct btrfs_ordered_extent *ordered)
2556{
2557 struct btrfs_root *root = BTRFS_I(inode)->root;
2558 struct btrfs_path *path;
2559 struct btrfs_key key;
2560 struct old_sa_defrag_extent *old;
2561 struct new_sa_defrag_extent *new;
2562 int ret;
2563
2564 new = kmalloc(sizeof(*new), GFP_NOFS);
2565 if (!new)
2566 return NULL;
2567
2568 new->inode = inode;
2569 new->file_pos = ordered->file_offset;
2570 new->len = ordered->len;
2571 new->bytenr = ordered->start;
2572 new->disk_len = ordered->disk_len;
2573 new->compress_type = ordered->compress_type;
2574 new->root = RB_ROOT;
2575 INIT_LIST_HEAD(&new->head);
2576
2577 path = btrfs_alloc_path();
2578 if (!path)
2579 goto out_kfree;
2580
2581 key.objectid = btrfs_ino(inode);
2582 key.type = BTRFS_EXTENT_DATA_KEY;
2583 key.offset = new->file_pos;
2584
2585 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2586 if (ret < 0)
2587 goto out_free_path;
2588 if (ret > 0 && path->slots[0] > 0)
2589 path->slots[0]--;
2590
2591
2592 while (1) {
2593 struct btrfs_file_extent_item *extent;
2594 struct extent_buffer *l;
2595 int slot;
2596 u64 num_bytes;
2597 u64 offset;
2598 u64 end;
2599 u64 disk_bytenr;
2600 u64 extent_offset;
2601
2602 l = path->nodes[0];
2603 slot = path->slots[0];
2604
2605 if (slot >= btrfs_header_nritems(l)) {
2606 ret = btrfs_next_leaf(root, path);
2607 if (ret < 0)
2608 goto out_free_path;
2609 else if (ret > 0)
2610 break;
2611 continue;
2612 }
2613
2614 btrfs_item_key_to_cpu(l, &key, slot);
2615
2616 if (key.objectid != btrfs_ino(inode))
2617 break;
2618 if (key.type != BTRFS_EXTENT_DATA_KEY)
2619 break;
2620 if (key.offset >= new->file_pos + new->len)
2621 break;
2622
2623 extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
2624
2625 num_bytes = btrfs_file_extent_num_bytes(l, extent);
2626 if (key.offset + num_bytes < new->file_pos)
2627 goto next;
2628
2629 disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
2630 if (!disk_bytenr)
2631 goto next;
2632
2633 extent_offset = btrfs_file_extent_offset(l, extent);
2634
2635 old = kmalloc(sizeof(*old), GFP_NOFS);
2636 if (!old)
2637 goto out_free_path;
2638
2639 offset = max(new->file_pos, key.offset);
2640 end = min(new->file_pos + new->len, key.offset + num_bytes);
2641
2642 old->bytenr = disk_bytenr;
2643 old->extent_offset = extent_offset;
2644 old->offset = offset - key.offset;
2645 old->len = end - offset;
2646 old->new = new;
2647 old->count = 0;
2648 list_add_tail(&old->list, &new->head);
2649next:
2650 path->slots[0]++;
2651 cond_resched();
2652 }
2653
2654 btrfs_free_path(path);
2655 atomic_inc(&root->fs_info->defrag_running);
2656
2657 return new;
2658
2659out_free_path:
2660 btrfs_free_path(path);
2661out_kfree:
2662 free_sa_defrag_extent(new);
2663 return NULL;
2664}
2665
2666static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
2667 u64 start, u64 len)
2668{
2669 struct btrfs_block_group_cache *cache;
2670
2671 cache = btrfs_lookup_block_group(root->fs_info, start);
2672 ASSERT(cache);
2673
2674 spin_lock(&cache->lock);
2675 cache->delalloc_bytes -= len;
2676 spin_unlock(&cache->lock);
2677
2678 btrfs_put_block_group(cache);
2679}
2680
2681
2682
2683
2684
2685static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
2686{
2687 struct inode *inode = ordered_extent->inode;
2688 struct btrfs_root *root = BTRFS_I(inode)->root;
2689 struct btrfs_trans_handle *trans = NULL;
2690 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2691 struct extent_state *cached_state = NULL;
2692 struct new_sa_defrag_extent *new = NULL;
2693 int compress_type = 0;
2694 int ret = 0;
2695 u64 logical_len = ordered_extent->len;
2696 bool nolock;
2697 bool truncated = false;
2698
2699 nolock = btrfs_is_free_space_inode(inode);
2700
2701 if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
2702 ret = -EIO;
2703 goto out;
2704 }
2705
2706 btrfs_free_io_failure_record(inode, ordered_extent->file_offset,
2707 ordered_extent->file_offset +
2708 ordered_extent->len - 1);
2709
2710 if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
2711 truncated = true;
2712 logical_len = ordered_extent->truncated_len;
2713
2714 if (!logical_len)
2715 goto out;
2716 }
2717
2718 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
2719 BUG_ON(!list_empty(&ordered_extent->list));
2720 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2721 if (nolock)
2722 trans = btrfs_join_transaction_nolock(root);
2723 else
2724 trans = btrfs_join_transaction(root);
2725 if (IS_ERR(trans)) {
2726 ret = PTR_ERR(trans);
2727 trans = NULL;
2728 goto out;
2729 }
2730 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
2731 ret = btrfs_update_inode_fallback(trans, root, inode);
2732 if (ret)
2733 btrfs_abort_transaction(trans, root, ret);
2734 goto out;
2735 }
2736
2737 lock_extent_bits(io_tree, ordered_extent->file_offset,
2738 ordered_extent->file_offset + ordered_extent->len - 1,
2739 0, &cached_state);
2740
2741 ret = test_range_bit(io_tree, ordered_extent->file_offset,
2742 ordered_extent->file_offset + ordered_extent->len - 1,
2743 EXTENT_DEFRAG, 1, cached_state);
2744 if (ret) {
2745 u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
2746 if (0 && last_snapshot >= BTRFS_I(inode)->generation)
2747
2748 new = record_old_file_extents(inode, ordered_extent);
2749
2750 clear_extent_bit(io_tree, ordered_extent->file_offset,
2751 ordered_extent->file_offset + ordered_extent->len - 1,
2752 EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
2753 }
2754
2755 if (nolock)
2756 trans = btrfs_join_transaction_nolock(root);
2757 else
2758 trans = btrfs_join_transaction(root);
2759 if (IS_ERR(trans)) {
2760 ret = PTR_ERR(trans);
2761 trans = NULL;
2762 goto out_unlock;
2763 }
2764
2765 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
2766
2767 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
2768 compress_type = ordered_extent->compress_type;
2769 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
2770 BUG_ON(compress_type);
2771 ret = btrfs_mark_extent_written(trans, inode,
2772 ordered_extent->file_offset,
2773 ordered_extent->file_offset +
2774 logical_len);
2775 } else {
2776 BUG_ON(root == root->fs_info->tree_root);
2777 ret = insert_reserved_file_extent(trans, inode,
2778 ordered_extent->file_offset,
2779 ordered_extent->start,
2780 ordered_extent->disk_len,
2781 logical_len, logical_len,
2782 compress_type, 0, 0,
2783 BTRFS_FILE_EXTENT_REG);
2784 if (!ret)
2785 btrfs_release_delalloc_bytes(root,
2786 ordered_extent->start,
2787 ordered_extent->disk_len);
2788 }
2789 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
2790 ordered_extent->file_offset, ordered_extent->len,
2791 trans->transid);
2792 if (ret < 0) {
2793 btrfs_abort_transaction(trans, root, ret);
2794 goto out_unlock;
2795 }
2796
2797 add_pending_csums(trans, inode, ordered_extent->file_offset,
2798 &ordered_extent->list);
2799
2800 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
2801 ret = btrfs_update_inode_fallback(trans, root, inode);
2802 if (ret) {
2803 btrfs_abort_transaction(trans, root, ret);
2804 goto out_unlock;
2805 }
2806 ret = 0;
2807out_unlock:
2808 unlock_extent_cached(io_tree, ordered_extent->file_offset,
2809 ordered_extent->file_offset +
2810 ordered_extent->len - 1, &cached_state, GFP_NOFS);
2811out:
2812 if (root != root->fs_info->tree_root)
2813 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
2814 if (trans)
2815 btrfs_end_transaction(trans, root);
2816
2817 if (ret || truncated) {
2818 u64 start, end;
2819
2820 if (truncated)
2821 start = ordered_extent->file_offset + logical_len;
2822 else
2823 start = ordered_extent->file_offset;
2824 end = ordered_extent->file_offset + ordered_extent->len - 1;
2825 clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
2826
2827
2828 btrfs_drop_extent_cache(inode, start, end, 0);
2829
2830
2831
2832
2833
2834
2835
2836 if ((ret || !logical_len) &&
2837 !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2838 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2839 btrfs_free_reserved_extent(root, ordered_extent->start,
2840 ordered_extent->disk_len, 1);
2841 }
2842
2843
2844
2845
2846
2847
2848 btrfs_remove_ordered_extent(inode, ordered_extent);
2849
2850
2851 if (new) {
2852 if (ret) {
2853 free_sa_defrag_extent(new);
2854 atomic_dec(&root->fs_info->defrag_running);
2855 } else {
2856 relink_file_extents(new);
2857 }
2858 }
2859
2860
2861 btrfs_put_ordered_extent(ordered_extent);
2862
2863 btrfs_put_ordered_extent(ordered_extent);
2864
2865 return ret;
2866}
2867
2868static void finish_ordered_fn(struct btrfs_work *work)
2869{
2870 struct btrfs_ordered_extent *ordered_extent;
2871 ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
2872 btrfs_finish_ordered_io(ordered_extent);
2873}
2874
2875static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2876 struct extent_state *state, int uptodate)
2877{
2878 struct inode *inode = page->mapping->host;
2879 struct btrfs_root *root = BTRFS_I(inode)->root;
2880 struct btrfs_ordered_extent *ordered_extent = NULL;
2881 struct btrfs_workqueue *wq;
2882 btrfs_work_func_t func;
2883
2884 trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
2885
2886 ClearPagePrivate2(page);
2887 if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
2888 end - start + 1, uptodate))
2889 return 0;
2890
2891 if (btrfs_is_free_space_inode(inode)) {
2892 wq = root->fs_info->endio_freespace_worker;
2893 func = btrfs_freespace_write_helper;
2894 } else {
2895 wq = root->fs_info->endio_write_workers;
2896 func = btrfs_endio_write_helper;
2897 }
2898
2899 btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
2900 NULL);
2901 btrfs_queue_work(wq, &ordered_extent->work);
2902
2903 return 0;
2904}
2905
2906static int __readpage_endio_check(struct inode *inode,
2907 struct btrfs_io_bio *io_bio,
2908 int icsum, struct page *page,
2909 int pgoff, u64 start, size_t len)
2910{
2911 char *kaddr;
2912 u32 csum_expected;
2913 u32 csum = ~(u32)0;
2914 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
2915 DEFAULT_RATELIMIT_BURST);
2916
2917 csum_expected = *(((u32 *)io_bio->csum) + icsum);
2918
2919 kaddr = kmap_atomic(page);
2920 csum = btrfs_csum_data(kaddr + pgoff, csum, len);
2921 btrfs_csum_final(csum, (char *)&csum);
2922 if (csum != csum_expected)
2923 goto zeroit;
2924
2925 kunmap_atomic(kaddr);
2926 return 0;
2927zeroit:
2928 if (__ratelimit(&_rs))
2929 btrfs_info(BTRFS_I(inode)->root->fs_info,
2930 "csum failed ino %llu off %llu csum %u expected csum %u",
2931 btrfs_ino(inode), start, csum, csum_expected);
2932 memset(kaddr + pgoff, 1, len);
2933 flush_dcache_page(page);
2934 kunmap_atomic(kaddr);
2935 if (csum_expected == 0)
2936 return 0;
2937 return -EIO;
2938}
2939
2940
2941
2942
2943
2944
2945static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
2946 u64 phy_offset, struct page *page,
2947 u64 start, u64 end, int mirror)
2948{
2949 size_t offset = start - page_offset(page);
2950 struct inode *inode = page->mapping->host;
2951 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2952 struct btrfs_root *root = BTRFS_I(inode)->root;
2953
2954 if (PageChecked(page)) {
2955 ClearPageChecked(page);
2956 return 0;
2957 }
2958
2959 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
2960 return 0;
2961
2962 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
2963 test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
2964 clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,
2965 GFP_NOFS);
2966 return 0;
2967 }
2968
2969 phy_offset >>= inode->i_sb->s_blocksize_bits;
2970 return __readpage_endio_check(inode, io_bio, phy_offset, page, offset,
2971 start, (size_t)(end - start + 1));
2972}
2973
2974struct delayed_iput {
2975 struct list_head list;
2976 struct inode *inode;
2977};
2978
2979
2980
2981void btrfs_add_delayed_iput(struct inode *inode)
2982{
2983 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2984 struct delayed_iput *delayed;
2985
2986 if (atomic_add_unless(&inode->i_count, -1, 1))
2987 return;
2988
2989 delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
2990 delayed->inode = inode;
2991
2992 spin_lock(&fs_info->delayed_iput_lock);
2993 list_add_tail(&delayed->list, &fs_info->delayed_iputs);
2994 spin_unlock(&fs_info->delayed_iput_lock);
2995}
2996
2997void btrfs_run_delayed_iputs(struct btrfs_root *root)
2998{
2999 LIST_HEAD(list);
3000 struct btrfs_fs_info *fs_info = root->fs_info;
3001 struct delayed_iput *delayed;
3002 int empty;
3003
3004 spin_lock(&fs_info->delayed_iput_lock);
3005 empty = list_empty(&fs_info->delayed_iputs);
3006 spin_unlock(&fs_info->delayed_iput_lock);
3007 if (empty)
3008 return;
3009
3010 spin_lock(&fs_info->delayed_iput_lock);
3011 list_splice_init(&fs_info->delayed_iputs, &list);
3012 spin_unlock(&fs_info->delayed_iput_lock);
3013
3014 while (!list_empty(&list)) {
3015 delayed = list_entry(list.next, struct delayed_iput, list);
3016 list_del(&delayed->list);
3017 iput(delayed->inode);
3018 kfree(delayed);
3019 }
3020}
3021
3022
3023
3024
3025
3026
3027void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
3028 struct btrfs_root *root)
3029{
3030 struct btrfs_block_rsv *block_rsv;
3031 int ret;
3032
3033 if (atomic_read(&root->orphan_inodes) ||
3034 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
3035 return;
3036
3037 spin_lock(&root->orphan_lock);
3038 if (atomic_read(&root->orphan_inodes)) {
3039 spin_unlock(&root->orphan_lock);
3040 return;
3041 }
3042
3043 if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
3044 spin_unlock(&root->orphan_lock);
3045 return;
3046 }
3047
3048 block_rsv = root->orphan_block_rsv;
3049 root->orphan_block_rsv = NULL;
3050 spin_unlock(&root->orphan_lock);
3051
3052 if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
3053 btrfs_root_refs(&root->root_item) > 0) {
3054 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
3055 root->root_key.objectid);
3056 if (ret)
3057 btrfs_abort_transaction(trans, root, ret);
3058 else
3059 clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
3060 &root->state);
3061 }
3062
3063 if (block_rsv) {
3064 WARN_ON(block_rsv->size > 0);
3065 btrfs_free_block_rsv(root, block_rsv);
3066 }
3067}
3068
3069
3070
3071
3072
3073
3074
3075
3076int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
3077{
3078 struct btrfs_root *root = BTRFS_I(inode)->root;
3079 struct btrfs_block_rsv *block_rsv = NULL;
3080 int reserve = 0;
3081 int insert = 0;
3082 int ret;
3083
3084 if (!root->orphan_block_rsv) {
3085 block_rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3086 if (!block_rsv)
3087 return -ENOMEM;
3088 }
3089
3090 spin_lock(&root->orphan_lock);
3091 if (!root->orphan_block_rsv) {
3092 root->orphan_block_rsv = block_rsv;
3093 } else if (block_rsv) {
3094 btrfs_free_block_rsv(root, block_rsv);
3095 block_rsv = NULL;
3096 }
3097
3098 if (!test_and_set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3099 &BTRFS_I(inode)->runtime_flags)) {
3100#if 0
3101
3102
3103
3104
3105
3106 if (!xchg(&root->orphan_item_inserted, 1))
3107 insert = 2;
3108 else
3109 insert = 1;
3110#endif
3111 insert = 1;
3112 atomic_inc(&root->orphan_inodes);
3113 }
3114
3115 if (!test_and_set_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3116 &BTRFS_I(inode)->runtime_flags))
3117 reserve = 1;
3118 spin_unlock(&root->orphan_lock);
3119
3120
3121 if (reserve) {
3122 ret = btrfs_orphan_reserve_metadata(trans, inode);
3123 BUG_ON(ret);
3124 }
3125
3126
3127 if (insert >= 1) {
3128 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
3129 if (ret) {
3130 atomic_dec(&root->orphan_inodes);
3131 if (reserve) {
3132 clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3133 &BTRFS_I(inode)->runtime_flags);
3134 btrfs_orphan_release_metadata(inode);
3135 }
3136 if (ret != -EEXIST) {
3137 clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3138 &BTRFS_I(inode)->runtime_flags);
3139 btrfs_abort_transaction(trans, root, ret);
3140 return ret;
3141 }
3142 }
3143 ret = 0;
3144 }
3145
3146
3147 if (insert >= 2) {
3148 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
3149 root->root_key.objectid);
3150 if (ret && ret != -EEXIST) {
3151 btrfs_abort_transaction(trans, root, ret);
3152 return ret;
3153 }
3154 }
3155 return 0;
3156}
3157
3158
3159
3160
3161
3162static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
3163 struct inode *inode)
3164{
3165 struct btrfs_root *root = BTRFS_I(inode)->root;
3166 int delete_item = 0;
3167 int release_rsv = 0;
3168 int ret = 0;
3169
3170 spin_lock(&root->orphan_lock);
3171 if (test_and_clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3172 &BTRFS_I(inode)->runtime_flags))
3173 delete_item = 1;
3174
3175 if (test_and_clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
3176 &BTRFS_I(inode)->runtime_flags))
3177 release_rsv = 1;
3178 spin_unlock(&root->orphan_lock);
3179
3180 if (delete_item) {
3181 atomic_dec(&root->orphan_inodes);
3182 if (trans)
3183 ret = btrfs_del_orphan_item(trans, root,
3184 btrfs_ino(inode));
3185 }
3186
3187 if (release_rsv)
3188 btrfs_orphan_release_metadata(inode);
3189
3190 return ret;
3191}
3192
3193
3194
3195
3196
3197int btrfs_orphan_cleanup(struct btrfs_root *root)
3198{
3199 struct btrfs_path *path;
3200 struct extent_buffer *leaf;
3201 struct btrfs_key key, found_key;
3202 struct btrfs_trans_handle *trans;
3203 struct inode *inode;
3204 u64 last_objectid = 0;
3205 int ret = 0, nr_unlink = 0, nr_truncate = 0;
3206
3207 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
3208 return 0;
3209
3210 path = btrfs_alloc_path();
3211 if (!path) {
3212 ret = -ENOMEM;
3213 goto out;
3214 }
3215 path->reada = -1;
3216
3217 key.objectid = BTRFS_ORPHAN_OBJECTID;
3218 key.type = BTRFS_ORPHAN_ITEM_KEY;
3219 key.offset = (u64)-1;
3220
3221 while (1) {
3222 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3223 if (ret < 0)
3224 goto out;
3225
3226
3227
3228
3229
3230
3231 if (ret > 0) {
3232 ret = 0;
3233 if (path->slots[0] == 0)
3234 break;
3235 path->slots[0]--;
3236 }
3237
3238
3239 leaf = path->nodes[0];
3240 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3241
3242
3243 if (found_key.objectid != BTRFS_ORPHAN_OBJECTID)
3244 break;
3245 if (found_key.type != BTRFS_ORPHAN_ITEM_KEY)
3246 break;
3247
3248
3249 btrfs_release_path(path);
3250
3251
3252
3253
3254
3255
3256
3257 if (found_key.offset == last_objectid) {
3258 btrfs_err(root->fs_info,
3259 "Error removing orphan entry, stopping orphan cleanup");
3260 ret = -EINVAL;
3261 goto out;
3262 }
3263
3264 last_objectid = found_key.offset;
3265
3266 found_key.objectid = found_key.offset;
3267 found_key.type = BTRFS_INODE_ITEM_KEY;
3268 found_key.offset = 0;
3269 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
3270 ret = PTR_ERR_OR_ZERO(inode);
3271 if (ret && ret != -ESTALE)
3272 goto out;
3273
3274 if (ret == -ESTALE && root == root->fs_info->tree_root) {
3275 struct btrfs_root *dead_root;
3276 struct btrfs_fs_info *fs_info = root->fs_info;
3277 int is_dead_root = 0;
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290 spin_lock(&fs_info->trans_lock);
3291 list_for_each_entry(dead_root, &fs_info->dead_roots,
3292 root_list) {
3293 if (dead_root->root_key.objectid ==
3294 found_key.objectid) {
3295 is_dead_root = 1;
3296 break;
3297 }
3298 }
3299 spin_unlock(&fs_info->trans_lock);
3300 if (is_dead_root) {
3301
3302 key.offset = found_key.objectid - 1;
3303 continue;
3304 }
3305 }
3306
3307
3308
3309
3310 if (ret == -ESTALE) {
3311 trans = btrfs_start_transaction(root, 1);
3312 if (IS_ERR(trans)) {
3313 ret = PTR_ERR(trans);
3314 goto out;
3315 }
3316 btrfs_debug(root->fs_info, "auto deleting %Lu",
3317 found_key.objectid);
3318 ret = btrfs_del_orphan_item(trans, root,
3319 found_key.objectid);
3320 btrfs_end_transaction(trans, root);
3321 if (ret)
3322 goto out;
3323 continue;
3324 }
3325
3326
3327
3328
3329
3330 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
3331 &BTRFS_I(inode)->runtime_flags);
3332 atomic_inc(&root->orphan_inodes);
3333
3334
3335 if (inode->i_nlink) {
3336 if (WARN_ON(!S_ISREG(inode->i_mode))) {
3337 iput(inode);
3338 continue;
3339 }
3340 nr_truncate++;
3341
3342
3343 trans = btrfs_start_transaction(root, 1);
3344 if (IS_ERR(trans)) {
3345 iput(inode);
3346 ret = PTR_ERR(trans);
3347 goto out;
3348 }
3349 ret = btrfs_orphan_add(trans, inode);
3350 btrfs_end_transaction(trans, root);
3351 if (ret) {
3352 iput(inode);
3353 goto out;
3354 }
3355
3356 ret = btrfs_truncate(inode);
3357 if (ret)
3358 btrfs_orphan_del(NULL, inode);
3359 } else {
3360 nr_unlink++;
3361 }
3362
3363
3364 iput(inode);
3365 if (ret)
3366 goto out;
3367 }
3368
3369 btrfs_release_path(path);
3370
3371 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
3372
3373 if (root->orphan_block_rsv)
3374 btrfs_block_rsv_release(root, root->orphan_block_rsv,
3375 (u64)-1);
3376
3377 if (root->orphan_block_rsv ||
3378 test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
3379 trans = btrfs_join_transaction(root);
3380 if (!IS_ERR(trans))
3381 btrfs_end_transaction(trans, root);
3382 }
3383
3384 if (nr_unlink)
3385 btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
3386 if (nr_truncate)
3387 btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
3388
3389out:
3390 if (ret)
3391 btrfs_crit(root->fs_info,
3392 "could not do orphan cleanup %d", ret);
3393 btrfs_free_path(path);
3394 return ret;
3395}
3396
3397
3398
3399
3400
3401
3402
3403static noinline int acls_after_inode_item(struct extent_buffer *leaf,
3404 int slot, u64 objectid,
3405 int *first_xattr_slot)
3406{
3407 u32 nritems = btrfs_header_nritems(leaf);
3408 struct btrfs_key found_key;
3409 static u64 xattr_access = 0;
3410 static u64 xattr_default = 0;
3411 int scanned = 0;
3412
3413 if (!xattr_access) {
3414 xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
3415 strlen(POSIX_ACL_XATTR_ACCESS));
3416 xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
3417 strlen(POSIX_ACL_XATTR_DEFAULT));
3418 }
3419
3420 slot++;
3421 *first_xattr_slot = -1;
3422 while (slot < nritems) {
3423 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3424
3425
3426 if (found_key.objectid != objectid)
3427 return 0;
3428
3429
3430 if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
3431 if (*first_xattr_slot == -1)
3432 *first_xattr_slot = slot;
3433 if (found_key.offset == xattr_access ||
3434 found_key.offset == xattr_default)
3435 return 1;
3436 }
3437
3438
3439
3440
3441
3442 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
3443 return 0;
3444
3445 slot++;
3446 scanned++;
3447
3448
3449
3450
3451
3452
3453
3454 if (scanned >= 8)
3455 break;
3456 }
3457
3458
3459
3460
3461 if (*first_xattr_slot == -1)
3462 *first_xattr_slot = slot;
3463 return 1;
3464}
3465
3466
3467
3468
3469static void btrfs_read_locked_inode(struct inode *inode)
3470{
3471 struct btrfs_path *path;
3472 struct extent_buffer *leaf;
3473 struct btrfs_inode_item *inode_item;
3474 struct btrfs_timespec *tspec;
3475 struct btrfs_root *root = BTRFS_I(inode)->root;
3476 struct btrfs_key location;
3477 unsigned long ptr;
3478 int maybe_acls;
3479 u32 rdev;
3480 int ret;
3481 bool filled = false;
3482 int first_xattr_slot;
3483
3484 ret = btrfs_fill_inode(inode, &rdev);
3485 if (!ret)
3486 filled = true;
3487
3488 path = btrfs_alloc_path();
3489 if (!path)
3490 goto make_bad;
3491
3492 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
3493
3494 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
3495 if (ret)
3496 goto make_bad;
3497
3498 leaf = path->nodes[0];
3499
3500 if (filled)
3501 goto cache_index;
3502
3503 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3504 struct btrfs_inode_item);
3505 inode->i_mode = btrfs_inode_mode(leaf, inode_item);
3506 set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
3507 i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
3508 i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
3509 btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
3510
3511 tspec = btrfs_inode_atime(inode_item);
3512 inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
3513 inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
3514
3515 tspec = btrfs_inode_mtime(inode_item);
3516 inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
3517 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
3518
3519 tspec = btrfs_inode_ctime(inode_item);
3520 inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
3521 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
3522
3523 inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
3524 BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
3525 BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item);
3526
3527
3528
3529
3530
3531
3532
3533 if (BTRFS_I(inode)->last_trans == root->fs_info->generation)
3534 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
3535 &BTRFS_I(inode)->runtime_flags);
3536
3537 inode->i_version = btrfs_inode_sequence(leaf, inode_item);
3538 inode->i_generation = BTRFS_I(inode)->generation;
3539 inode->i_rdev = 0;
3540 rdev = btrfs_inode_rdev(leaf, inode_item);
3541
3542 BTRFS_I(inode)->index_cnt = (u64)-1;
3543 BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
3544
3545cache_index:
3546 path->slots[0]++;
3547 if (inode->i_nlink != 1 ||
3548 path->slots[0] >= btrfs_header_nritems(leaf))
3549 goto cache_acl;
3550
3551 btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
3552 if (location.objectid != btrfs_ino(inode))
3553 goto cache_acl;
3554
3555 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3556 if (location.type == BTRFS_INODE_REF_KEY) {
3557 struct btrfs_inode_ref *ref;
3558
3559 ref = (struct btrfs_inode_ref *)ptr;
3560 BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
3561 } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
3562 struct btrfs_inode_extref *extref;
3563
3564 extref = (struct btrfs_inode_extref *)ptr;
3565 BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
3566 extref);
3567 }
3568cache_acl:
3569
3570
3571
3572
3573 maybe_acls = acls_after_inode_item(leaf, path->slots[0],
3574 btrfs_ino(inode), &first_xattr_slot);
3575 if (first_xattr_slot != -1) {
3576 path->slots[0] = first_xattr_slot;
3577 ret = btrfs_load_inode_props(inode, path);
3578 if (ret)
3579 btrfs_err(root->fs_info,
3580 "error loading props for ino %llu (root %llu): %d",
3581 btrfs_ino(inode),
3582 root->root_key.objectid, ret);
3583 }
3584 btrfs_free_path(path);
3585
3586 if (!maybe_acls)
3587 cache_no_acl(inode);
3588
3589 switch (inode->i_mode & S_IFMT) {
3590 case S_IFREG:
3591 inode->i_mapping->a_ops = &btrfs_aops;
3592 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3593 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3594 inode->i_fop = &btrfs_file_operations;
3595 inode->i_op = &btrfs_file_inode_operations;
3596 break;
3597 case S_IFDIR:
3598 inode->i_fop = &btrfs_dir_file_operations;
3599 if (root == root->fs_info->tree_root)
3600 inode->i_op = &btrfs_dir_ro_inode_operations;
3601 else
3602 inode->i_op = &btrfs_dir_inode_operations;
3603 break;
3604 case S_IFLNK:
3605 inode->i_op = &btrfs_symlink_inode_operations;
3606 inode->i_mapping->a_ops = &btrfs_symlink_aops;
3607 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3608 break;
3609 default:
3610 inode->i_op = &btrfs_special_inode_operations;
3611 init_special_inode(inode, inode->i_mode, rdev);
3612 break;
3613 }
3614
3615 btrfs_update_iflags(inode);
3616 return;
3617
3618make_bad:
3619 btrfs_free_path(path);
3620 make_bad_inode(inode);
3621}
3622
3623
3624
3625
3626static void fill_inode_item(struct btrfs_trans_handle *trans,
3627 struct extent_buffer *leaf,
3628 struct btrfs_inode_item *item,
3629 struct inode *inode)
3630{
3631 struct btrfs_map_token token;
3632
3633 btrfs_init_map_token(&token);
3634
3635 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
3636 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
3637 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
3638 &token);
3639 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3640 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3641
3642 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
3643 inode->i_atime.tv_sec, &token);
3644 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
3645 inode->i_atime.tv_nsec, &token);
3646
3647 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
3648 inode->i_mtime.tv_sec, &token);
3649 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
3650 inode->i_mtime.tv_nsec, &token);
3651
3652 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
3653 inode->i_ctime.tv_sec, &token);
3654 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
3655 inode->i_ctime.tv_nsec, &token);
3656
3657 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
3658 &token);
3659 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
3660 &token);
3661 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
3662 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
3663 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
3664 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
3665 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
3666}
3667
3668
3669
3670
3671static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
3672 struct btrfs_root *root, struct inode *inode)
3673{
3674 struct btrfs_inode_item *inode_item;
3675 struct btrfs_path *path;
3676 struct extent_buffer *leaf;
3677 int ret;
3678
3679 path = btrfs_alloc_path();
3680 if (!path)
3681 return -ENOMEM;
3682
3683 path->leave_spinning = 1;
3684 ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
3685 1);
3686 if (ret) {
3687 if (ret > 0)
3688 ret = -ENOENT;
3689 goto failed;
3690 }
3691
3692 leaf = path->nodes[0];
3693 inode_item = btrfs_item_ptr(leaf, path->slots[0],
3694 struct btrfs_inode_item);
3695
3696 fill_inode_item(trans, leaf, inode_item, inode);
3697 btrfs_mark_buffer_dirty(leaf);
3698 btrfs_set_inode_last_trans(trans, inode);
3699 ret = 0;
3700failed:
3701 btrfs_free_path(path);
3702 return ret;
3703}
3704
3705
3706
3707
3708noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
3709 struct btrfs_root *root, struct inode *inode)
3710{
3711 int ret;
3712
3713
3714
3715
3716
3717
3718
3719
3720 if (!btrfs_is_free_space_inode(inode)
3721 && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
3722 && !root->fs_info->log_root_recovering) {
3723 btrfs_update_root_times(trans, root);
3724
3725 ret = btrfs_delayed_update_inode(trans, root, inode);
3726 if (!ret)
3727 btrfs_set_inode_last_trans(trans, inode);
3728 return ret;
3729 }
3730
3731 return btrfs_update_inode_item(trans, root, inode);
3732}
3733
3734noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
3735 struct btrfs_root *root,
3736 struct inode *inode)
3737{
3738 int ret;
3739
3740 ret = btrfs_update_inode(trans, root, inode);
3741 if (ret == -ENOSPC)
3742 return btrfs_update_inode_item(trans, root, inode);
3743 return ret;
3744}
3745
3746
3747
3748
3749
3750
3751static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3752 struct btrfs_root *root,
3753 struct inode *dir, struct inode *inode,
3754 const char *name, int name_len)
3755{
3756 struct btrfs_path *path;
3757 int ret = 0;
3758 struct extent_buffer *leaf;
3759 struct btrfs_dir_item *di;
3760 struct btrfs_key key;
3761 u64 index;
3762 u64 ino = btrfs_ino(inode);
3763 u64 dir_ino = btrfs_ino(dir);
3764
3765 path = btrfs_alloc_path();
3766 if (!path) {
3767 ret = -ENOMEM;
3768 goto out;
3769 }
3770
3771 path->leave_spinning = 1;
3772 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3773 name, name_len, -1);
3774 if (IS_ERR(di)) {
3775 ret = PTR_ERR(di);
3776 goto err;
3777 }
3778 if (!di) {
3779 ret = -ENOENT;
3780 goto err;
3781 }
3782 leaf = path->nodes[0];
3783 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3784 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3785 if (ret)
3786 goto err;
3787 btrfs_release_path(path);
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799 if (BTRFS_I(inode)->dir_index) {
3800 ret = btrfs_delayed_delete_inode_ref(inode);
3801 if (!ret) {
3802 index = BTRFS_I(inode)->dir_index;
3803 goto skip_backref;
3804 }
3805 }
3806
3807 ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
3808 dir_ino, &index);
3809 if (ret) {
3810 btrfs_info(root->fs_info,
3811 "failed to delete reference to %.*s, inode %llu parent %llu",
3812 name_len, name, ino, dir_ino);
3813 btrfs_abort_transaction(trans, root, ret);
3814 goto err;
3815 }
3816skip_backref:
3817 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
3818 if (ret) {
3819 btrfs_abort_transaction(trans, root, ret);
3820 goto err;
3821 }
3822
3823 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
3824 inode, dir_ino);
3825 if (ret != 0 && ret != -ENOENT) {
3826 btrfs_abort_transaction(trans, root, ret);
3827 goto err;
3828 }
3829
3830 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
3831 dir, index);
3832 if (ret == -ENOENT)
3833 ret = 0;
3834 else if (ret)
3835 btrfs_abort_transaction(trans, root, ret);
3836err:
3837 btrfs_free_path(path);
3838 if (ret)
3839 goto out;
3840
3841 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3842 inode_inc_iversion(inode);
3843 inode_inc_iversion(dir);
3844 inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3845 ret = btrfs_update_inode(trans, root, dir);
3846out:
3847 return ret;
3848}
3849
3850int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
3851 struct btrfs_root *root,
3852 struct inode *dir, struct inode *inode,
3853 const char *name, int name_len)
3854{
3855 int ret;
3856 ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
3857 if (!ret) {
3858 drop_nlink(inode);
3859 ret = btrfs_update_inode(trans, root, inode);
3860 }
3861 return ret;
3862}
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir)
3873{
3874 struct btrfs_trans_handle *trans;
3875 struct btrfs_root *root = BTRFS_I(dir)->root;
3876 int ret;
3877
3878
3879
3880
3881
3882
3883
3884
3885 trans = btrfs_start_transaction(root, 5);
3886 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
3887 return trans;
3888
3889 if (PTR_ERR(trans) == -ENOSPC) {
3890 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
3891
3892 trans = btrfs_start_transaction(root, 0);
3893 if (IS_ERR(trans))
3894 return trans;
3895 ret = btrfs_cond_migrate_bytes(root->fs_info,
3896 &root->fs_info->trans_block_rsv,
3897 num_bytes, 5);
3898 if (ret) {
3899 btrfs_end_transaction(trans, root);
3900 return ERR_PTR(ret);
3901 }
3902 trans->block_rsv = &root->fs_info->trans_block_rsv;
3903 trans->bytes_reserved = num_bytes;
3904 }
3905 return trans;
3906}
3907
3908static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
3909{
3910 struct btrfs_root *root = BTRFS_I(dir)->root;
3911 struct btrfs_trans_handle *trans;
3912 struct inode *inode = dentry->d_inode;
3913 int ret;
3914
3915 trans = __unlink_start_trans(dir);
3916 if (IS_ERR(trans))
3917 return PTR_ERR(trans);
3918
3919 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
3920
3921 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
3922 dentry->d_name.name, dentry->d_name.len);
3923 if (ret)
3924 goto out;
3925
3926 if (inode->i_nlink == 0) {
3927 ret = btrfs_orphan_add(trans, inode);
3928 if (ret)
3929 goto out;
3930 }
3931
3932out:
3933 btrfs_end_transaction(trans, root);
3934 btrfs_btree_balance_dirty(root);
3935 return ret;
3936}
3937
3938int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3939 struct btrfs_root *root,
3940 struct inode *dir, u64 objectid,
3941 const char *name, int name_len)
3942{
3943 struct btrfs_path *path;
3944 struct extent_buffer *leaf;
3945 struct btrfs_dir_item *di;
3946 struct btrfs_key key;
3947 u64 index;
3948 int ret;
3949 u64 dir_ino = btrfs_ino(dir);
3950
3951 path = btrfs_alloc_path();
3952 if (!path)
3953 return -ENOMEM;
3954
3955 di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
3956 name, name_len, -1);
3957 if (IS_ERR_OR_NULL(di)) {
3958 if (!di)
3959 ret = -ENOENT;
3960 else
3961 ret = PTR_ERR(di);
3962 goto out;
3963 }
3964
3965 leaf = path->nodes[0];
3966 btrfs_dir_item_key_to_cpu(leaf, di, &key);
3967 WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
3968 ret = btrfs_delete_one_dir_name(trans, root, path, di);
3969 if (ret) {
3970 btrfs_abort_transaction(trans, root, ret);
3971 goto out;
3972 }
3973 btrfs_release_path(path);
3974
3975 ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
3976 objectid, root->root_key.objectid,
3977 dir_ino, &index, name, name_len);
3978 if (ret < 0) {
3979 if (ret != -ENOENT) {
3980 btrfs_abort_transaction(trans, root, ret);
3981 goto out;
3982 }
3983 di = btrfs_search_dir_index_item(root, path, dir_ino,
3984 name, name_len);
3985 if (IS_ERR_OR_NULL(di)) {
3986 if (!di)
3987 ret = -ENOENT;
3988 else
3989 ret = PTR_ERR(di);
3990 btrfs_abort_transaction(trans, root, ret);
3991 goto out;
3992 }
3993
3994 leaf = path->nodes[0];
3995 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3996 btrfs_release_path(path);
3997 index = key.offset;
3998 }
3999 btrfs_release_path(path);
4000
4001 ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
4002 if (ret) {
4003 btrfs_abort_transaction(trans, root, ret);
4004 goto out;
4005 }
4006
4007 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
4008 inode_inc_iversion(dir);
4009 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
4010 ret = btrfs_update_inode_fallback(trans, root, dir);
4011 if (ret)
4012 btrfs_abort_transaction(trans, root, ret);
4013out:
4014 btrfs_free_path(path);
4015 return ret;
4016}
4017
4018static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
4019{
4020 struct inode *inode = dentry->d_inode;
4021 int err = 0;
4022 struct btrfs_root *root = BTRFS_I(dir)->root;
4023 struct btrfs_trans_handle *trans;
4024
4025 if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
4026 return -ENOTEMPTY;
4027 if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
4028 return -EPERM;
4029
4030 trans = __unlink_start_trans(dir);
4031 if (IS_ERR(trans))
4032 return PTR_ERR(trans);
4033
4034 if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
4035 err = btrfs_unlink_subvol(trans, root, dir,
4036 BTRFS_I(inode)->location.objectid,
4037 dentry->d_name.name,
4038 dentry->d_name.len);
4039 goto out;
4040 }
4041
4042 err = btrfs_orphan_add(trans, inode);
4043 if (err)
4044 goto out;
4045
4046
4047 err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
4048 dentry->d_name.name, dentry->d_name.len);
4049 if (!err)
4050 btrfs_i_size_write(inode, 0);
4051out:
4052 btrfs_end_transaction(trans, root);
4053 btrfs_btree_balance_dirty(root);
4054
4055 return err;
4056}
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
4070 struct btrfs_root *root,
4071 struct inode *inode,
4072 u64 new_size, u32 min_type)
4073{
4074 struct btrfs_path *path;
4075 struct extent_buffer *leaf;
4076 struct btrfs_file_extent_item *fi;
4077 struct btrfs_key key;
4078 struct btrfs_key found_key;
4079 u64 extent_start = 0;
4080 u64 extent_num_bytes = 0;
4081 u64 extent_offset = 0;
4082 u64 item_end = 0;
4083 u64 last_size = (u64)-1;
4084 u32 found_type = (u8)-1;
4085 int found_extent;
4086 int del_item;
4087 int pending_del_nr = 0;
4088 int pending_del_slot = 0;
4089 int extent_type = -1;
4090 int ret;
4091 int err = 0;
4092 u64 ino = btrfs_ino(inode);
4093
4094 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
4095
4096 path = btrfs_alloc_path();
4097 if (!path)
4098 return -ENOMEM;
4099 path->reada = -1;
4100
4101
4102
4103
4104
4105
4106 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4107 root == root->fs_info->tree_root)
4108 btrfs_drop_extent_cache(inode, ALIGN(new_size,
4109 root->sectorsize), (u64)-1, 0);
4110
4111
4112
4113
4114
4115
4116
4117 if (min_type == 0 && root == BTRFS_I(inode)->root)
4118 btrfs_kill_delayed_inode_items(inode);
4119
4120 key.objectid = ino;
4121 key.offset = (u64)-1;
4122 key.type = (u8)-1;
4123
4124search_again:
4125 path->leave_spinning = 1;
4126 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
4127 if (ret < 0) {
4128 err = ret;
4129 goto out;
4130 }
4131
4132 if (ret > 0) {
4133
4134
4135
4136 if (path->slots[0] == 0)
4137 goto out;
4138 path->slots[0]--;
4139 }
4140
4141 while (1) {
4142 fi = NULL;
4143 leaf = path->nodes[0];
4144 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4145 found_type = found_key.type;
4146
4147 if (found_key.objectid != ino)
4148 break;
4149
4150 if (found_type < min_type)
4151 break;
4152
4153 item_end = found_key.offset;
4154 if (found_type == BTRFS_EXTENT_DATA_KEY) {
4155 fi = btrfs_item_ptr(leaf, path->slots[0],
4156 struct btrfs_file_extent_item);
4157 extent_type = btrfs_file_extent_type(leaf, fi);
4158 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4159 item_end +=
4160 btrfs_file_extent_num_bytes(leaf, fi);
4161 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4162 item_end += btrfs_file_extent_inline_len(leaf,
4163 path->slots[0], fi);
4164 }
4165 item_end--;
4166 }
4167 if (found_type > min_type) {
4168 del_item = 1;
4169 } else {
4170 if (item_end < new_size)
4171 break;
4172 if (found_key.offset >= new_size)
4173 del_item = 1;
4174 else
4175 del_item = 0;
4176 }
4177 found_extent = 0;
4178
4179 if (found_type != BTRFS_EXTENT_DATA_KEY)
4180 goto delete;
4181
4182 if (del_item)
4183 last_size = found_key.offset;
4184 else
4185 last_size = new_size;
4186
4187 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
4188 u64 num_dec;
4189 extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
4190 if (!del_item) {
4191 u64 orig_num_bytes =
4192 btrfs_file_extent_num_bytes(leaf, fi);
4193 extent_num_bytes = ALIGN(new_size -
4194 found_key.offset,
4195 root->sectorsize);
4196 btrfs_set_file_extent_num_bytes(leaf, fi,
4197 extent_num_bytes);
4198 num_dec = (orig_num_bytes -
4199 extent_num_bytes);
4200 if (test_bit(BTRFS_ROOT_REF_COWS,
4201 &root->state) &&
4202 extent_start != 0)
4203 inode_sub_bytes(inode, num_dec);
4204 btrfs_mark_buffer_dirty(leaf);
4205 } else {
4206 extent_num_bytes =
4207 btrfs_file_extent_disk_num_bytes(leaf,
4208 fi);
4209 extent_offset = found_key.offset -
4210 btrfs_file_extent_offset(leaf, fi);
4211
4212
4213 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
4214 if (extent_start != 0) {
4215 found_extent = 1;
4216 if (test_bit(BTRFS_ROOT_REF_COWS,
4217 &root->state))
4218 inode_sub_bytes(inode, num_dec);
4219 }
4220 }
4221 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
4222
4223
4224
4225
4226 if (!del_item &&
4227 btrfs_file_extent_compression(leaf, fi) == 0 &&
4228 btrfs_file_extent_encryption(leaf, fi) == 0 &&
4229 btrfs_file_extent_other_encoding(leaf, fi) == 0) {
4230 u32 size = new_size - found_key.offset;
4231
4232 if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
4233 inode_sub_bytes(inode, item_end + 1 -
4234 new_size);
4235
4236
4237
4238
4239
4240 btrfs_set_file_extent_ram_bytes(leaf, fi, size);
4241 size =
4242 btrfs_file_extent_calc_inline_size(size);
4243 btrfs_truncate_item(root, path, size, 1);
4244 } else if (test_bit(BTRFS_ROOT_REF_COWS,
4245 &root->state)) {
4246 inode_sub_bytes(inode, item_end + 1 -
4247 found_key.offset);
4248 }
4249 }
4250delete:
4251 if (del_item) {
4252 if (!pending_del_nr) {
4253
4254 pending_del_slot = path->slots[0];
4255 pending_del_nr = 1;
4256 } else if (pending_del_nr &&
4257 path->slots[0] + 1 == pending_del_slot) {
4258
4259 pending_del_nr++;
4260 pending_del_slot = path->slots[0];
4261 } else {
4262 BUG();
4263 }
4264 } else {
4265 break;
4266 }
4267 if (found_extent &&
4268 (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
4269 root == root->fs_info->tree_root)) {
4270 btrfs_set_path_blocking(path);
4271 ret = btrfs_free_extent(trans, root, extent_start,
4272 extent_num_bytes, 0,
4273 btrfs_header_owner(leaf),
4274 ino, extent_offset, 0);
4275 BUG_ON(ret);
4276 }
4277
4278 if (found_type == BTRFS_INODE_ITEM_KEY)
4279 break;
4280
4281 if (path->slots[0] == 0 ||
4282 path->slots[0] != pending_del_slot) {
4283 if (pending_del_nr) {
4284 ret = btrfs_del_items(trans, root, path,
4285 pending_del_slot,
4286 pending_del_nr);
4287 if (ret) {
4288 btrfs_abort_transaction(trans,
4289 root, ret);
4290 goto error;
4291 }
4292 pending_del_nr = 0;
4293 }
4294 btrfs_release_path(path);
4295 goto search_again;
4296 } else {
4297 path->slots[0]--;
4298 }
4299 }
4300out:
4301 if (pending_del_nr) {
4302 ret = btrfs_del_items(trans, root, path, pending_del_slot,
4303 pending_del_nr);
4304 if (ret)
4305 btrfs_abort_transaction(trans, root, ret);
4306 }
4307error:
4308 if (last_size != (u64)-1 &&
4309 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
4310 btrfs_ordered_update_i_size(inode, last_size, NULL);
4311 btrfs_free_path(path);
4312 return err;
4313}
4314
4315
4316
4317
4318
4319
4320
4321
4322
4323
4324
4325
4326int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
4327 int front)
4328{
4329 struct address_space *mapping = inode->i_mapping;
4330 struct btrfs_root *root = BTRFS_I(inode)->root;
4331 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4332 struct btrfs_ordered_extent *ordered;
4333 struct extent_state *cached_state = NULL;
4334 char *kaddr;
4335 u32 blocksize = root->sectorsize;
4336 pgoff_t index = from >> PAGE_CACHE_SHIFT;
4337 unsigned offset = from & (PAGE_CACHE_SIZE-1);
4338 struct page *page;
4339 gfp_t mask = btrfs_alloc_write_mask(mapping);
4340 int ret = 0;
4341 u64 page_start;
4342 u64 page_end;
4343
4344 if ((offset & (blocksize - 1)) == 0 &&
4345 (!len || ((len & (blocksize - 1)) == 0)))
4346 goto out;
4347 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
4348 if (ret)
4349 goto out;
4350
4351again:
4352 page = find_or_create_page(mapping, index, mask);
4353 if (!page) {
4354 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
4355 ret = -ENOMEM;
4356 goto out;
4357 }
4358
4359 page_start = page_offset(page);
4360 page_end = page_start + PAGE_CACHE_SIZE - 1;
4361
4362 if (!PageUptodate(page)) {
4363 ret = btrfs_readpage(NULL, page);
4364 lock_page(page);
4365 if (page->mapping != mapping) {
4366 unlock_page(page);
4367 page_cache_release(page);
4368 goto again;
4369 }
4370 if (!PageUptodate(page)) {
4371 ret = -EIO;
4372 goto out_unlock;
4373 }
4374 }
4375 wait_on_page_writeback(page);
4376
4377 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
4378 set_page_extent_mapped(page);
4379
4380 ordered = btrfs_lookup_ordered_extent(inode, page_start);
4381 if (ordered) {
4382 unlock_extent_cached(io_tree, page_start, page_end,
4383 &cached_state, GFP_NOFS);
4384 unlock_page(page);
4385 page_cache_release(page);
4386 btrfs_start_ordered_extent(inode, ordered, 1);
4387 btrfs_put_ordered_extent(ordered);
4388 goto again;
4389 }
4390
4391 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
4392 EXTENT_DIRTY | EXTENT_DELALLOC |
4393 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
4394 0, 0, &cached_state, GFP_NOFS);
4395
4396 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
4397 &cached_state);
4398 if (ret) {
4399 unlock_extent_cached(io_tree, page_start, page_end,
4400 &cached_state, GFP_NOFS);
4401 goto out_unlock;
4402 }
4403
4404 if (offset != PAGE_CACHE_SIZE) {
4405 if (!len)
4406 len = PAGE_CACHE_SIZE - offset;
4407 kaddr = kmap(page);
4408 if (front)
4409 memset(kaddr, 0, offset);
4410 else
4411 memset(kaddr + offset, 0, len);
4412 flush_dcache_page(page);
4413 kunmap(page);
4414 }
4415 ClearPageChecked(page);
4416 set_page_dirty(page);
4417 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
4418 GFP_NOFS);
4419
4420out_unlock:
4421 if (ret)
4422 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
4423 unlock_page(page);
4424 page_cache_release(page);
4425out:
4426 return ret;
4427}
4428
4429static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
4430 u64 offset, u64 len)
4431{
4432 struct btrfs_trans_handle *trans;
4433 int ret;
4434
4435
4436
4437
4438
4439 if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
4440 BTRFS_I(inode)->last_trans = root->fs_info->generation;
4441 BTRFS_I(inode)->last_sub_trans = root->log_transid;
4442 BTRFS_I(inode)->last_log_commit = root->last_log_commit;
4443 return 0;
4444 }
4445
4446
4447
4448
4449
4450
4451 trans = btrfs_start_transaction(root, 3);
4452 if (IS_ERR(trans))
4453 return PTR_ERR(trans);
4454
4455 ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
4456 if (ret) {
4457 btrfs_abort_transaction(trans, root, ret);
4458 btrfs_end_transaction(trans, root);
4459 return ret;
4460 }
4461
4462 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
4463 0, 0, len, 0, len, 0, 0, 0);
4464 if (ret)
4465 btrfs_abort_transaction(trans, root, ret);
4466 else
4467 btrfs_update_inode(trans, root, inode);
4468 btrfs_end_transaction(trans, root);
4469 return ret;
4470}
4471
4472
4473
4474
4475
4476
4477
4478int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
4479{
4480 struct btrfs_root *root = BTRFS_I(inode)->root;
4481 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4482 struct extent_map *em = NULL;
4483 struct extent_state *cached_state = NULL;
4484 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4485 u64 hole_start = ALIGN(oldsize, root->sectorsize);
4486 u64 block_end = ALIGN(size, root->sectorsize);
4487 u64 last_byte;
4488 u64 cur_offset;
4489 u64 hole_size;
4490 int err = 0;
4491
4492
4493
4494
4495
4496
4497 err = btrfs_truncate_page(inode, oldsize, 0, 0);
4498 if (err)
4499 return err;
4500
4501 if (size <= hole_start)
4502 return 0;
4503
4504 while (1) {
4505 struct btrfs_ordered_extent *ordered;
4506
4507 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
4508 &cached_state);
4509 ordered = btrfs_lookup_ordered_range(inode, hole_start,
4510 block_end - hole_start);
4511 if (!ordered)
4512 break;
4513 unlock_extent_cached(io_tree, hole_start, block_end - 1,
4514 &cached_state, GFP_NOFS);
4515 btrfs_start_ordered_extent(inode, ordered, 1);
4516 btrfs_put_ordered_extent(ordered);
4517 }
4518
4519 cur_offset = hole_start;
4520 while (1) {
4521 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
4522 block_end - cur_offset, 0);
4523 if (IS_ERR(em)) {
4524 err = PTR_ERR(em);
4525 em = NULL;
4526 break;
4527 }
4528 last_byte = min(extent_map_end(em), block_end);
4529 last_byte = ALIGN(last_byte , root->sectorsize);
4530 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
4531 struct extent_map *hole_em;
4532 hole_size = last_byte - cur_offset;
4533
4534 err = maybe_insert_hole(root, inode, cur_offset,
4535 hole_size);
4536 if (err)
4537 break;
4538 btrfs_drop_extent_cache(inode, cur_offset,
4539 cur_offset + hole_size - 1, 0);
4540 hole_em = alloc_extent_map();
4541 if (!hole_em) {
4542 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4543 &BTRFS_I(inode)->runtime_flags);
4544 goto next;
4545 }
4546 hole_em->start = cur_offset;
4547 hole_em->len = hole_size;
4548 hole_em->orig_start = cur_offset;
4549
4550 hole_em->block_start = EXTENT_MAP_HOLE;
4551 hole_em->block_len = 0;
4552 hole_em->orig_block_len = 0;
4553 hole_em->ram_bytes = hole_size;
4554 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
4555 hole_em->compress_type = BTRFS_COMPRESS_NONE;
4556 hole_em->generation = root->fs_info->generation;
4557
4558 while (1) {
4559 write_lock(&em_tree->lock);
4560 err = add_extent_mapping(em_tree, hole_em, 1);
4561 write_unlock(&em_tree->lock);
4562 if (err != -EEXIST)
4563 break;
4564 btrfs_drop_extent_cache(inode, cur_offset,
4565 cur_offset +
4566 hole_size - 1, 0);
4567 }
4568 free_extent_map(hole_em);
4569 }
4570next:
4571 free_extent_map(em);
4572 em = NULL;
4573 cur_offset = last_byte;
4574 if (cur_offset >= block_end)
4575 break;
4576 }
4577 free_extent_map(em);
4578 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
4579 GFP_NOFS);
4580 return err;
4581}
4582
4583static int btrfs_setsize(struct inode *inode, struct iattr *attr)
4584{
4585 struct btrfs_root *root = BTRFS_I(inode)->root;
4586 struct btrfs_trans_handle *trans;
4587 loff_t oldsize = i_size_read(inode);
4588 loff_t newsize = attr->ia_size;
4589 int mask = attr->ia_valid;
4590 int ret;
4591
4592
4593
4594
4595
4596
4597
4598 if (newsize != oldsize) {
4599 inode_inc_iversion(inode);
4600 if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
4601 inode->i_ctime = inode->i_mtime =
4602 current_fs_time(inode->i_sb);
4603 }
4604
4605 if (newsize > oldsize) {
4606 truncate_pagecache(inode, newsize);
4607 ret = btrfs_cont_expand(inode, oldsize, newsize);
4608 if (ret)
4609 return ret;
4610
4611 trans = btrfs_start_transaction(root, 1);
4612 if (IS_ERR(trans))
4613 return PTR_ERR(trans);
4614
4615 i_size_write(inode, newsize);
4616 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
4617 ret = btrfs_update_inode(trans, root, inode);
4618 btrfs_end_transaction(trans, root);
4619 } else {
4620
4621
4622
4623
4624
4625
4626 if (newsize == 0)
4627 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
4628 &BTRFS_I(inode)->runtime_flags);
4629
4630
4631
4632
4633
4634 trans = btrfs_start_transaction(root, 2);
4635 if (IS_ERR(trans))
4636 return PTR_ERR(trans);
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648 ret = btrfs_orphan_add(trans, inode);
4649 btrfs_end_transaction(trans, root);
4650 if (ret)
4651 return ret;
4652
4653
4654 truncate_setsize(inode, newsize);
4655
4656
4657 btrfs_inode_block_unlocked_dio(inode);
4658 inode_dio_wait(inode);
4659 btrfs_inode_resume_unlocked_dio(inode);
4660
4661 ret = btrfs_truncate(inode);
4662 if (ret && inode->i_nlink) {
4663 int err;
4664
4665
4666
4667
4668
4669
4670
4671 trans = btrfs_join_transaction(root);
4672 if (IS_ERR(trans)) {
4673 btrfs_orphan_del(NULL, inode);
4674 return ret;
4675 }
4676 i_size_write(inode, BTRFS_I(inode)->disk_i_size);
4677 err = btrfs_orphan_del(trans, inode);
4678 if (err)
4679 btrfs_abort_transaction(trans, root, err);
4680 btrfs_end_transaction(trans, root);
4681 }
4682 }
4683
4684 return ret;
4685}
4686
4687static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
4688{
4689 struct inode *inode = dentry->d_inode;
4690 struct btrfs_root *root = BTRFS_I(inode)->root;
4691 int err;
4692
4693 if (btrfs_root_readonly(root))
4694 return -EROFS;
4695
4696 err = inode_change_ok(inode, attr);
4697 if (err)
4698 return err;
4699
4700 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
4701 err = btrfs_setsize(inode, attr);
4702 if (err)
4703 return err;
4704 }
4705
4706 if (attr->ia_valid) {
4707 setattr_copy(inode, attr);
4708 inode_inc_iversion(inode);
4709 err = btrfs_dirty_inode(inode);
4710
4711 if (!err && attr->ia_valid & ATTR_MODE)
4712 err = posix_acl_chmod(inode, inode->i_mode);
4713 }
4714
4715 return err;
4716}
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730static void evict_inode_truncate_pages(struct inode *inode)
4731{
4732 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
4733 struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
4734 struct rb_node *node;
4735
4736 ASSERT(inode->i_state & I_FREEING);
4737 truncate_inode_pages_final(&inode->i_data);
4738
4739 write_lock(&map_tree->lock);
4740 while (!RB_EMPTY_ROOT(&map_tree->map)) {
4741 struct extent_map *em;
4742
4743 node = rb_first(&map_tree->map);
4744 em = rb_entry(node, struct extent_map, rb_node);
4745 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
4746 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
4747 remove_extent_mapping(map_tree, em);
4748 free_extent_map(em);
4749 if (need_resched()) {
4750 write_unlock(&map_tree->lock);
4751 cond_resched();
4752 write_lock(&map_tree->lock);
4753 }
4754 }
4755 write_unlock(&map_tree->lock);
4756
4757 spin_lock(&io_tree->lock);
4758 while (!RB_EMPTY_ROOT(&io_tree->state)) {
4759 struct extent_state *state;
4760 struct extent_state *cached_state = NULL;
4761
4762 node = rb_first(&io_tree->state);
4763 state = rb_entry(node, struct extent_state, rb_node);
4764 atomic_inc(&state->refs);
4765 spin_unlock(&io_tree->lock);
4766
4767 lock_extent_bits(io_tree, state->start, state->end,
4768 0, &cached_state);
4769 clear_extent_bit(io_tree, state->start, state->end,
4770 EXTENT_LOCKED | EXTENT_DIRTY |
4771 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
4772 EXTENT_DEFRAG, 1, 1,
4773 &cached_state, GFP_NOFS);
4774 free_extent_state(state);
4775
4776 cond_resched();
4777 spin_lock(&io_tree->lock);
4778 }
4779 spin_unlock(&io_tree->lock);
4780}
4781
4782void btrfs_evict_inode(struct inode *inode)
4783{
4784 struct btrfs_trans_handle *trans;
4785 struct btrfs_root *root = BTRFS_I(inode)->root;
4786 struct btrfs_block_rsv *rsv, *global_rsv;
4787 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
4788 int ret;
4789
4790 trace_btrfs_inode_evict(inode);
4791
4792 evict_inode_truncate_pages(inode);
4793
4794 if (inode->i_nlink &&
4795 ((btrfs_root_refs(&root->root_item) != 0 &&
4796 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
4797 btrfs_is_free_space_inode(inode)))
4798 goto no_delete;
4799
4800 if (is_bad_inode(inode)) {
4801 btrfs_orphan_del(NULL, inode);
4802 goto no_delete;
4803 }
4804
4805 btrfs_wait_ordered_range(inode, 0, (u64)-1);
4806
4807 btrfs_free_io_failure_record(inode, 0, (u64)-1);
4808
4809 if (root->fs_info->log_root_recovering) {
4810 BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
4811 &BTRFS_I(inode)->runtime_flags));
4812 goto no_delete;
4813 }
4814
4815 if (inode->i_nlink > 0) {
4816 BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
4817 root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
4818 goto no_delete;
4819 }
4820
4821 ret = btrfs_commit_inode_delayed_inode(inode);
4822 if (ret) {
4823 btrfs_orphan_del(NULL, inode);
4824 goto no_delete;
4825 }
4826
4827 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
4828 if (!rsv) {
4829 btrfs_orphan_del(NULL, inode);
4830 goto no_delete;
4831 }
4832 rsv->size = min_size;
4833 rsv->failfast = 1;
4834 global_rsv = &root->fs_info->global_block_rsv;
4835
4836 btrfs_i_size_write(inode, 0);
4837
4838
4839
4840
4841
4842
4843
4844 while (1) {
4845 ret = btrfs_block_rsv_refill(root, rsv, min_size,
4846 BTRFS_RESERVE_FLUSH_LIMIT);
4847
4848
4849
4850
4851
4852
4853 if (ret)
4854 ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
4855
4856 if (ret) {
4857 btrfs_warn(root->fs_info,
4858 "Could not get space for a delete, will truncate on mount %d",
4859 ret);
4860 btrfs_orphan_del(NULL, inode);
4861 btrfs_free_block_rsv(root, rsv);
4862 goto no_delete;
4863 }
4864
4865 trans = btrfs_join_transaction(root);
4866 if (IS_ERR(trans)) {
4867 btrfs_orphan_del(NULL, inode);
4868 btrfs_free_block_rsv(root, rsv);
4869 goto no_delete;
4870 }
4871
4872 trans->block_rsv = rsv;
4873
4874 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
4875 if (ret != -ENOSPC)
4876 break;
4877
4878 trans->block_rsv = &root->fs_info->trans_block_rsv;
4879 btrfs_end_transaction(trans, root);
4880 trans = NULL;
4881 btrfs_btree_balance_dirty(root);
4882 }
4883
4884 btrfs_free_block_rsv(root, rsv);
4885
4886
4887
4888
4889
4890 if (ret == 0) {
4891 trans->block_rsv = root->orphan_block_rsv;
4892 btrfs_orphan_del(trans, inode);
4893 } else {
4894 btrfs_orphan_del(NULL, inode);
4895 }
4896
4897 trans->block_rsv = &root->fs_info->trans_block_rsv;
4898 if (!(root == root->fs_info->tree_root ||
4899 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
4900 btrfs_return_ino(root, btrfs_ino(inode));
4901
4902 btrfs_end_transaction(trans, root);
4903 btrfs_btree_balance_dirty(root);
4904no_delete:
4905 btrfs_remove_delayed_node(inode);
4906 clear_inode(inode);
4907 return;
4908}
4909
4910
4911
4912
4913
4914static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
4915 struct btrfs_key *location)
4916{
4917 const char *name = dentry->d_name.name;
4918 int namelen = dentry->d_name.len;
4919 struct btrfs_dir_item *di;
4920 struct btrfs_path *path;
4921 struct btrfs_root *root = BTRFS_I(dir)->root;
4922 int ret = 0;
4923
4924 path = btrfs_alloc_path();
4925 if (!path)
4926 return -ENOMEM;
4927
4928 di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
4929 namelen, 0);
4930 if (IS_ERR(di))
4931 ret = PTR_ERR(di);
4932
4933 if (IS_ERR_OR_NULL(di))
4934 goto out_err;
4935
4936 btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
4937out:
4938 btrfs_free_path(path);
4939 return ret;
4940out_err:
4941 location->objectid = 0;
4942 goto out;
4943}
4944
4945
4946
4947
4948
4949
4950static int fixup_tree_root_location(struct btrfs_root *root,
4951 struct inode *dir,
4952 struct dentry *dentry,
4953 struct btrfs_key *location,
4954 struct btrfs_root **sub_root)
4955{
4956 struct btrfs_path *path;
4957 struct btrfs_root *new_root;
4958 struct btrfs_root_ref *ref;
4959 struct extent_buffer *leaf;
4960 int ret;
4961 int err = 0;
4962
4963 path = btrfs_alloc_path();
4964 if (!path) {
4965 err = -ENOMEM;
4966 goto out;
4967 }
4968
4969 err = -ENOENT;
4970 ret = btrfs_find_item(root->fs_info->tree_root, path,
4971 BTRFS_I(dir)->root->root_key.objectid,
4972 location->objectid, BTRFS_ROOT_REF_KEY, NULL);
4973 if (ret) {
4974 if (ret < 0)
4975 err = ret;
4976 goto out;
4977 }
4978
4979 leaf = path->nodes[0];
4980 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
4981 if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
4982 btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
4983 goto out;
4984
4985 ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
4986 (unsigned long)(ref + 1),
4987 dentry->d_name.len);
4988 if (ret)
4989 goto out;
4990
4991 btrfs_release_path(path);
4992
4993 new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
4994 if (IS_ERR(new_root)) {
4995 err = PTR_ERR(new_root);
4996 goto out;
4997 }
4998
4999 *sub_root = new_root;
5000 location->objectid = btrfs_root_dirid(&new_root->root_item);
5001 location->type = BTRFS_INODE_ITEM_KEY;
5002 location->offset = 0;
5003 err = 0;
5004out:
5005 btrfs_free_path(path);
5006 return err;
5007}
5008
5009static void inode_tree_add(struct inode *inode)
5010{
5011 struct btrfs_root *root = BTRFS_I(inode)->root;
5012 struct btrfs_inode *entry;
5013 struct rb_node **p;
5014 struct rb_node *parent;
5015 struct rb_node *new = &BTRFS_I(inode)->rb_node;
5016 u64 ino = btrfs_ino(inode);
5017
5018 if (inode_unhashed(inode))
5019 return;
5020 parent = NULL;
5021 spin_lock(&root->inode_lock);
5022 p = &root->inode_tree.rb_node;
5023 while (*p) {
5024 parent = *p;
5025 entry = rb_entry(parent, struct btrfs_inode, rb_node);
5026
5027 if (ino < btrfs_ino(&entry->vfs_inode))
5028 p = &parent->rb_left;
5029 else if (ino > btrfs_ino(&entry->vfs_inode))
5030 p = &parent->rb_right;
5031 else {
5032 WARN_ON(!(entry->vfs_inode.i_state &
5033 (I_WILL_FREE | I_FREEING)));
5034 rb_replace_node(parent, new, &root->inode_tree);
5035 RB_CLEAR_NODE(parent);
5036 spin_unlock(&root->inode_lock);
5037 return;
5038 }
5039 }
5040 rb_link_node(new, parent, p);
5041 rb_insert_color(new, &root->inode_tree);
5042 spin_unlock(&root->inode_lock);
5043}
5044
5045static void inode_tree_del(struct inode *inode)
5046{
5047 struct btrfs_root *root = BTRFS_I(inode)->root;
5048 int empty = 0;
5049
5050 spin_lock(&root->inode_lock);
5051 if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {
5052 rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);
5053 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
5054 empty = RB_EMPTY_ROOT(&root->inode_tree);
5055 }
5056 spin_unlock(&root->inode_lock);
5057
5058 if (empty && btrfs_root_refs(&root->root_item) == 0) {
5059 synchronize_srcu(&root->fs_info->subvol_srcu);
5060 spin_lock(&root->inode_lock);
5061 empty = RB_EMPTY_ROOT(&root->inode_tree);
5062 spin_unlock(&root->inode_lock);
5063 if (empty)
5064 btrfs_add_dead_root(root);
5065 }
5066}
5067
5068void btrfs_invalidate_inodes(struct btrfs_root *root)
5069{
5070 struct rb_node *node;
5071 struct rb_node *prev;
5072 struct btrfs_inode *entry;
5073 struct inode *inode;
5074 u64 objectid = 0;
5075
5076 if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
5077 WARN_ON(btrfs_root_refs(&root->root_item) != 0);
5078
5079 spin_lock(&root->inode_lock);
5080again:
5081 node = root->inode_tree.rb_node;
5082 prev = NULL;
5083 while (node) {
5084 prev = node;
5085 entry = rb_entry(node, struct btrfs_inode, rb_node);
5086
5087 if (objectid < btrfs_ino(&entry->vfs_inode))
5088 node = node->rb_left;
5089 else if (objectid > btrfs_ino(&entry->vfs_inode))
5090 node = node->rb_right;
5091 else
5092 break;
5093 }
5094 if (!node) {
5095 while (prev) {
5096 entry = rb_entry(prev, struct btrfs_inode, rb_node);
5097 if (objectid <= btrfs_ino(&entry->vfs_inode)) {
5098 node = prev;
5099 break;
5100 }
5101 prev = rb_next(prev);
5102 }
5103 }
5104 while (node) {
5105 entry = rb_entry(node, struct btrfs_inode, rb_node);
5106 objectid = btrfs_ino(&entry->vfs_inode) + 1;
5107 inode = igrab(&entry->vfs_inode);
5108 if (inode) {
5109 spin_unlock(&root->inode_lock);
5110 if (atomic_read(&inode->i_count) > 1)
5111 d_prune_aliases(inode);
5112
5113
5114
5115
5116
5117 iput(inode);
5118 cond_resched();
5119 spin_lock(&root->inode_lock);
5120 goto again;
5121 }
5122
5123 if (cond_resched_lock(&root->inode_lock))
5124 goto again;
5125
5126 node = rb_next(node);
5127 }
5128 spin_unlock(&root->inode_lock);
5129}
5130
5131static int btrfs_init_locked_inode(struct inode *inode, void *p)
5132{
5133 struct btrfs_iget_args *args = p;
5134 inode->i_ino = args->location->objectid;
5135 memcpy(&BTRFS_I(inode)->location, args->location,
5136 sizeof(*args->location));
5137 BTRFS_I(inode)->root = args->root;
5138 return 0;
5139}
5140
5141static int btrfs_find_actor(struct inode *inode, void *opaque)
5142{
5143 struct btrfs_iget_args *args = opaque;
5144 return args->location->objectid == BTRFS_I(inode)->location.objectid &&
5145 args->root == BTRFS_I(inode)->root;
5146}
5147
5148static struct inode *btrfs_iget_locked(struct super_block *s,
5149 struct btrfs_key *location,
5150 struct btrfs_root *root)
5151{
5152 struct inode *inode;
5153 struct btrfs_iget_args args;
5154 unsigned long hashval = btrfs_inode_hash(location->objectid, root);
5155
5156 args.location = location;
5157 args.root = root;
5158
5159 inode = iget5_locked(s, hashval, btrfs_find_actor,
5160 btrfs_init_locked_inode,
5161 (void *)&args);
5162 return inode;
5163}
5164
5165
5166
5167
5168struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
5169 struct btrfs_root *root, int *new)
5170{
5171 struct inode *inode;
5172
5173 inode = btrfs_iget_locked(s, location, root);
5174 if (!inode)
5175 return ERR_PTR(-ENOMEM);
5176
5177 if (inode->i_state & I_NEW) {
5178 btrfs_read_locked_inode(inode);
5179 if (!is_bad_inode(inode)) {
5180 inode_tree_add(inode);
5181 unlock_new_inode(inode);
5182 if (new)
5183 *new = 1;
5184 } else {
5185 unlock_new_inode(inode);
5186 iput(inode);
5187 inode = ERR_PTR(-ESTALE);
5188 }
5189 }
5190
5191 return inode;
5192}
5193
5194static struct inode *new_simple_dir(struct super_block *s,
5195 struct btrfs_key *key,
5196 struct btrfs_root *root)
5197{
5198 struct inode *inode = new_inode(s);
5199
5200 if (!inode)
5201 return ERR_PTR(-ENOMEM);
5202
5203 BTRFS_I(inode)->root = root;
5204 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
5205 set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
5206
5207 inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
5208 inode->i_op = &btrfs_dir_ro_inode_operations;
5209 inode->i_fop = &simple_dir_operations;
5210 inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
5211 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
5212
5213 return inode;
5214}
5215
5216struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
5217{
5218 struct inode *inode;
5219 struct btrfs_root *root = BTRFS_I(dir)->root;
5220 struct btrfs_root *sub_root = root;
5221 struct btrfs_key location;
5222 int index;
5223 int ret = 0;
5224
5225 if (dentry->d_name.len > BTRFS_NAME_LEN)
5226 return ERR_PTR(-ENAMETOOLONG);
5227
5228 ret = btrfs_inode_by_name(dir, dentry, &location);
5229 if (ret < 0)
5230 return ERR_PTR(ret);
5231
5232 if (location.objectid == 0)
5233 return ERR_PTR(-ENOENT);
5234
5235 if (location.type == BTRFS_INODE_ITEM_KEY) {
5236 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
5237 return inode;
5238 }
5239
5240 BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY);
5241
5242 index = srcu_read_lock(&root->fs_info->subvol_srcu);
5243 ret = fixup_tree_root_location(root, dir, dentry,
5244 &location, &sub_root);
5245 if (ret < 0) {
5246 if (ret != -ENOENT)
5247 inode = ERR_PTR(ret);
5248 else
5249 inode = new_simple_dir(dir->i_sb, &location, sub_root);
5250 } else {
5251 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
5252 }
5253 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
5254
5255 if (!IS_ERR(inode) && root != sub_root) {
5256 down_read(&root->fs_info->cleanup_work_sem);
5257 if (!(inode->i_sb->s_flags & MS_RDONLY))
5258 ret = btrfs_orphan_cleanup(sub_root);
5259 up_read(&root->fs_info->cleanup_work_sem);
5260 if (ret) {
5261 iput(inode);
5262 inode = ERR_PTR(ret);
5263 }
5264 }
5265
5266 return inode;
5267}
5268
5269static int btrfs_dentry_delete(const struct dentry *dentry)
5270{
5271 struct btrfs_root *root;
5272 struct inode *inode = dentry->d_inode;
5273
5274 if (!inode && !IS_ROOT(dentry))
5275 inode = dentry->d_parent->d_inode;
5276
5277 if (inode) {
5278 root = BTRFS_I(inode)->root;
5279 if (btrfs_root_refs(&root->root_item) == 0)
5280 return 1;
5281
5282 if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
5283 return 1;
5284 }
5285 return 0;
5286}
5287
5288static void btrfs_dentry_release(struct dentry *dentry)
5289{
5290 kfree(dentry->d_fsdata);
5291}
5292
5293static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
5294 unsigned int flags)
5295{
5296 struct inode *inode;
5297
5298 inode = btrfs_lookup_dentry(dir, dentry);
5299 if (IS_ERR(inode)) {
5300 if (PTR_ERR(inode) == -ENOENT)
5301 inode = NULL;
5302 else
5303 return ERR_CAST(inode);
5304 }
5305
5306 return d_materialise_unique(dentry, inode);
5307}
5308
5309unsigned char btrfs_filetype_table[] = {
5310 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
5311};
5312
5313static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5314{
5315 struct inode *inode = file_inode(file);
5316 struct btrfs_root *root = BTRFS_I(inode)->root;
5317 struct btrfs_item *item;
5318 struct btrfs_dir_item *di;
5319 struct btrfs_key key;
5320 struct btrfs_key found_key;
5321 struct btrfs_path *path;
5322 struct list_head ins_list;
5323 struct list_head del_list;
5324 int ret;
5325 struct extent_buffer *leaf;
5326 int slot;
5327 unsigned char d_type;
5328 int over = 0;
5329 u32 di_cur;
5330 u32 di_total;
5331 u32 di_len;
5332 int key_type = BTRFS_DIR_INDEX_KEY;
5333 char tmp_name[32];
5334 char *name_ptr;
5335 int name_len;
5336 int is_curr = 0;
5337
5338
5339 if (root->fs_info->tree_root == root)
5340 key_type = BTRFS_DIR_ITEM_KEY;
5341
5342 if (!dir_emit_dots(file, ctx))
5343 return 0;
5344
5345 path = btrfs_alloc_path();
5346 if (!path)
5347 return -ENOMEM;
5348
5349 path->reada = 1;
5350
5351 if (key_type == BTRFS_DIR_INDEX_KEY) {
5352 INIT_LIST_HEAD(&ins_list);
5353 INIT_LIST_HEAD(&del_list);
5354 btrfs_get_delayed_items(inode, &ins_list, &del_list);
5355 }
5356
5357 key.type = key_type;
5358 key.offset = ctx->pos;
5359 key.objectid = btrfs_ino(inode);
5360
5361 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5362 if (ret < 0)
5363 goto err;
5364
5365 while (1) {
5366 leaf = path->nodes[0];
5367 slot = path->slots[0];
5368 if (slot >= btrfs_header_nritems(leaf)) {
5369 ret = btrfs_next_leaf(root, path);
5370 if (ret < 0)
5371 goto err;
5372 else if (ret > 0)
5373 break;
5374 continue;
5375 }
5376
5377 item = btrfs_item_nr(slot);
5378 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5379
5380 if (found_key.objectid != key.objectid)
5381 break;
5382 if (found_key.type != key_type)
5383 break;
5384 if (found_key.offset < ctx->pos)
5385 goto next;
5386 if (key_type == BTRFS_DIR_INDEX_KEY &&
5387 btrfs_should_delete_dir_index(&del_list,
5388 found_key.offset))
5389 goto next;
5390
5391 ctx->pos = found_key.offset;
5392 is_curr = 1;
5393
5394 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
5395 di_cur = 0;
5396 di_total = btrfs_item_size(leaf, item);
5397
5398 while (di_cur < di_total) {
5399 struct btrfs_key location;
5400
5401 if (verify_dir_item(root, leaf, di))
5402 break;
5403
5404 name_len = btrfs_dir_name_len(leaf, di);
5405 if (name_len <= sizeof(tmp_name)) {
5406 name_ptr = tmp_name;
5407 } else {
5408 name_ptr = kmalloc(name_len, GFP_NOFS);
5409 if (!name_ptr) {
5410 ret = -ENOMEM;
5411 goto err;
5412 }
5413 }
5414 read_extent_buffer(leaf, name_ptr,
5415 (unsigned long)(di + 1), name_len);
5416
5417 d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
5418 btrfs_dir_item_key_to_cpu(leaf, di, &location);
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428
5429
5430 if (location.type == BTRFS_ROOT_ITEM_KEY &&
5431 location.objectid == root->root_key.objectid) {
5432 over = 0;
5433 goto skip;
5434 }
5435 over = !dir_emit(ctx, name_ptr, name_len,
5436 location.objectid, d_type);
5437
5438skip:
5439 if (name_ptr != tmp_name)
5440 kfree(name_ptr);
5441
5442 if (over)
5443 goto nopos;
5444 di_len = btrfs_dir_name_len(leaf, di) +
5445 btrfs_dir_data_len(leaf, di) + sizeof(*di);
5446 di_cur += di_len;
5447 di = (struct btrfs_dir_item *)((char *)di + di_len);
5448 }
5449next:
5450 path->slots[0]++;
5451 }
5452
5453 if (key_type == BTRFS_DIR_INDEX_KEY) {
5454 if (is_curr)
5455 ctx->pos++;
5456 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
5457 if (ret)
5458 goto nopos;
5459 }
5460
5461
5462 ctx->pos++;
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481 if (key_type == BTRFS_DIR_INDEX_KEY) {
5482 if (ctx->pos >= INT_MAX)
5483 ctx->pos = LLONG_MAX;
5484 else
5485 ctx->pos = INT_MAX;
5486 }
5487nopos:
5488 ret = 0;
5489err:
5490 if (key_type == BTRFS_DIR_INDEX_KEY)
5491 btrfs_put_delayed_items(&ins_list, &del_list);
5492 btrfs_free_path(path);
5493 return ret;
5494}
5495
5496int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
5497{
5498 struct btrfs_root *root = BTRFS_I(inode)->root;
5499 struct btrfs_trans_handle *trans;
5500 int ret = 0;
5501 bool nolock = false;
5502
5503 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
5504 return 0;
5505
5506 if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
5507 nolock = true;
5508
5509 if (wbc->sync_mode == WB_SYNC_ALL) {
5510 if (nolock)
5511 trans = btrfs_join_transaction_nolock(root);
5512 else
5513 trans = btrfs_join_transaction(root);
5514 if (IS_ERR(trans))
5515 return PTR_ERR(trans);
5516 ret = btrfs_commit_transaction(trans, root);
5517 }
5518 return ret;
5519}
5520
5521
5522
5523
5524
5525
5526
5527static int btrfs_dirty_inode(struct inode *inode)
5528{
5529 struct btrfs_root *root = BTRFS_I(inode)->root;
5530 struct btrfs_trans_handle *trans;
5531 int ret;
5532
5533 if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
5534 return 0;
5535
5536 trans = btrfs_join_transaction(root);
5537 if (IS_ERR(trans))
5538 return PTR_ERR(trans);
5539
5540 ret = btrfs_update_inode(trans, root, inode);
5541 if (ret && ret == -ENOSPC) {
5542
5543 btrfs_end_transaction(trans, root);
5544 trans = btrfs_start_transaction(root, 1);
5545 if (IS_ERR(trans))
5546 return PTR_ERR(trans);
5547
5548 ret = btrfs_update_inode(trans, root, inode);
5549 }
5550 btrfs_end_transaction(trans, root);
5551 if (BTRFS_I(inode)->delayed_node)
5552 btrfs_balance_delayed_items(root);
5553
5554 return ret;
5555}
5556
5557
5558
5559
5560
5561static int btrfs_update_time(struct inode *inode, struct timespec *now,
5562 int flags)
5563{
5564 struct btrfs_root *root = BTRFS_I(inode)->root;
5565
5566 if (btrfs_root_readonly(root))
5567 return -EROFS;
5568
5569 if (flags & S_VERSION)
5570 inode_inc_iversion(inode);
5571 if (flags & S_CTIME)
5572 inode->i_ctime = *now;
5573 if (flags & S_MTIME)
5574 inode->i_mtime = *now;
5575 if (flags & S_ATIME)
5576 inode->i_atime = *now;
5577 return btrfs_dirty_inode(inode);
5578}
5579
5580
5581
5582
5583
5584
5585static int btrfs_set_inode_index_count(struct inode *inode)
5586{
5587 struct btrfs_root *root = BTRFS_I(inode)->root;
5588 struct btrfs_key key, found_key;
5589 struct btrfs_path *path;
5590 struct extent_buffer *leaf;
5591 int ret;
5592
5593 key.objectid = btrfs_ino(inode);
5594 key.type = BTRFS_DIR_INDEX_KEY;
5595 key.offset = (u64)-1;
5596
5597 path = btrfs_alloc_path();
5598 if (!path)
5599 return -ENOMEM;
5600
5601 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5602 if (ret < 0)
5603 goto out;
5604
5605 if (ret == 0)
5606 goto out;
5607 ret = 0;
5608
5609
5610
5611
5612
5613
5614
5615 if (path->slots[0] == 0) {
5616 BTRFS_I(inode)->index_cnt = 2;
5617 goto out;
5618 }
5619
5620 path->slots[0]--;
5621
5622 leaf = path->nodes[0];
5623 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5624
5625 if (found_key.objectid != btrfs_ino(inode) ||
5626 found_key.type != BTRFS_DIR_INDEX_KEY) {
5627 BTRFS_I(inode)->index_cnt = 2;
5628 goto out;
5629 }
5630
5631 BTRFS_I(inode)->index_cnt = found_key.offset + 1;
5632out:
5633 btrfs_free_path(path);
5634 return ret;
5635}
5636
5637
5638
5639
5640
5641int btrfs_set_inode_index(struct inode *dir, u64 *index)
5642{
5643 int ret = 0;
5644
5645 if (BTRFS_I(dir)->index_cnt == (u64)-1) {
5646 ret = btrfs_inode_delayed_dir_index_count(dir);
5647 if (ret) {
5648 ret = btrfs_set_inode_index_count(dir);
5649 if (ret)
5650 return ret;
5651 }
5652 }
5653
5654 *index = BTRFS_I(dir)->index_cnt;
5655 BTRFS_I(dir)->index_cnt++;
5656
5657 return ret;
5658}
5659
5660static int btrfs_insert_inode_locked(struct inode *inode)
5661{
5662 struct btrfs_iget_args args;
5663 args.location = &BTRFS_I(inode)->location;
5664 args.root = BTRFS_I(inode)->root;
5665
5666 return insert_inode_locked4(inode,
5667 btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
5668 btrfs_find_actor, &args);
5669}
5670
5671static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
5672 struct btrfs_root *root,
5673 struct inode *dir,
5674 const char *name, int name_len,
5675 u64 ref_objectid, u64 objectid,
5676 umode_t mode, u64 *index)
5677{
5678 struct inode *inode;
5679 struct btrfs_inode_item *inode_item;
5680 struct btrfs_key *location;
5681 struct btrfs_path *path;
5682 struct btrfs_inode_ref *ref;
5683 struct btrfs_key key[2];
5684 u32 sizes[2];
5685 int nitems = name ? 2 : 1;
5686 unsigned long ptr;
5687 int ret;
5688
5689 path = btrfs_alloc_path();
5690 if (!path)
5691 return ERR_PTR(-ENOMEM);
5692
5693 inode = new_inode(root->fs_info->sb);
5694 if (!inode) {
5695 btrfs_free_path(path);
5696 return ERR_PTR(-ENOMEM);
5697 }
5698
5699
5700
5701
5702
5703 if (!name)
5704 set_nlink(inode, 0);
5705
5706
5707
5708
5709
5710 inode->i_ino = objectid;
5711
5712 if (dir && name) {
5713 trace_btrfs_inode_request(dir);
5714
5715 ret = btrfs_set_inode_index(dir, index);
5716 if (ret) {
5717 btrfs_free_path(path);
5718 iput(inode);
5719 return ERR_PTR(ret);
5720 }
5721 } else if (dir) {
5722 *index = 0;
5723 }
5724
5725
5726
5727
5728
5729 BTRFS_I(inode)->index_cnt = 2;
5730 BTRFS_I(inode)->dir_index = *index;
5731 BTRFS_I(inode)->root = root;
5732 BTRFS_I(inode)->generation = trans->transid;
5733 inode->i_generation = BTRFS_I(inode)->generation;
5734
5735
5736
5737
5738
5739
5740
5741 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
5742
5743 key[0].objectid = objectid;
5744 key[0].type = BTRFS_INODE_ITEM_KEY;
5745 key[0].offset = 0;
5746
5747 sizes[0] = sizeof(struct btrfs_inode_item);
5748
5749 if (name) {
5750
5751
5752
5753
5754
5755
5756 key[1].objectid = objectid;
5757 key[1].type = BTRFS_INODE_REF_KEY;
5758 key[1].offset = ref_objectid;
5759
5760 sizes[1] = name_len + sizeof(*ref);
5761 }
5762
5763 location = &BTRFS_I(inode)->location;
5764 location->objectid = objectid;
5765 location->offset = 0;
5766 location->type = BTRFS_INODE_ITEM_KEY;
5767
5768 ret = btrfs_insert_inode_locked(inode);
5769 if (ret < 0)
5770 goto fail;
5771
5772 path->leave_spinning = 1;
5773 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
5774 if (ret != 0)
5775 goto fail_unlock;
5776
5777 inode_init_owner(inode, dir, mode);
5778 inode_set_bytes(inode, 0);
5779 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
5780 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
5781 struct btrfs_inode_item);
5782 memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
5783 sizeof(*inode_item));
5784 fill_inode_item(trans, path->nodes[0], inode_item, inode);
5785
5786 if (name) {
5787 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
5788 struct btrfs_inode_ref);
5789 btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
5790 btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
5791 ptr = (unsigned long)(ref + 1);
5792 write_extent_buffer(path->nodes[0], name, ptr, name_len);
5793 }
5794
5795 btrfs_mark_buffer_dirty(path->nodes[0]);
5796 btrfs_free_path(path);
5797
5798 btrfs_inherit_iflags(inode, dir);
5799
5800 if (S_ISREG(mode)) {
5801 if (btrfs_test_opt(root, NODATASUM))
5802 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
5803 if (btrfs_test_opt(root, NODATACOW))
5804 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
5805 BTRFS_INODE_NODATASUM;
5806 }
5807
5808 inode_tree_add(inode);
5809
5810 trace_btrfs_inode_new(inode);
5811 btrfs_set_inode_last_trans(trans, inode);
5812
5813 btrfs_update_root_times(trans, root);
5814
5815 ret = btrfs_inode_inherit_props(trans, inode, dir);
5816 if (ret)
5817 btrfs_err(root->fs_info,
5818 "error inheriting props for ino %llu (root %llu): %d",
5819 btrfs_ino(inode), root->root_key.objectid, ret);
5820
5821 return inode;
5822
5823fail_unlock:
5824 unlock_new_inode(inode);
5825fail:
5826 if (dir && name)
5827 BTRFS_I(dir)->index_cnt--;
5828 btrfs_free_path(path);
5829 iput(inode);
5830 return ERR_PTR(ret);
5831}
5832
5833static inline u8 btrfs_inode_type(struct inode *inode)
5834{
5835 return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
5836}
5837
5838
5839
5840
5841
5842
5843
5844int btrfs_add_link(struct btrfs_trans_handle *trans,
5845 struct inode *parent_inode, struct inode *inode,
5846 const char *name, int name_len, int add_backref, u64 index)
5847{
5848 int ret = 0;
5849 struct btrfs_key key;
5850 struct btrfs_root *root = BTRFS_I(parent_inode)->root;
5851 u64 ino = btrfs_ino(inode);
5852 u64 parent_ino = btrfs_ino(parent_inode);
5853
5854 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
5855 memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
5856 } else {
5857 key.objectid = ino;
5858 key.type = BTRFS_INODE_ITEM_KEY;
5859 key.offset = 0;
5860 }
5861
5862 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
5863 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
5864 key.objectid, root->root_key.objectid,
5865 parent_ino, index, name, name_len);
5866 } else if (add_backref) {
5867 ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
5868 parent_ino, index);
5869 }
5870
5871
5872 if (ret)
5873 return ret;
5874
5875 ret = btrfs_insert_dir_item(trans, root, name, name_len,
5876 parent_inode, &key,
5877 btrfs_inode_type(inode), index);
5878 if (ret == -EEXIST || ret == -EOVERFLOW)
5879 goto fail_dir_item;
5880 else if (ret) {
5881 btrfs_abort_transaction(trans, root, ret);
5882 return ret;
5883 }
5884
5885 btrfs_i_size_write(parent_inode, parent_inode->i_size +
5886 name_len * 2);
5887 inode_inc_iversion(parent_inode);
5888 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
5889 ret = btrfs_update_inode(trans, root, parent_inode);
5890 if (ret)
5891 btrfs_abort_transaction(trans, root, ret);
5892 return ret;
5893
5894fail_dir_item:
5895 if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
5896 u64 local_index;
5897 int err;
5898 err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
5899 key.objectid, root->root_key.objectid,
5900 parent_ino, &local_index, name, name_len);
5901
5902 } else if (add_backref) {
5903 u64 local_index;
5904 int err;
5905
5906 err = btrfs_del_inode_ref(trans, root, name, name_len,
5907 ino, parent_ino, &local_index);
5908 }
5909 return ret;
5910}
5911
5912static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
5913 struct inode *dir, struct dentry *dentry,
5914 struct inode *inode, int backref, u64 index)
5915{
5916 int err = btrfs_add_link(trans, dir, inode,
5917 dentry->d_name.name, dentry->d_name.len,
5918 backref, index);
5919 if (err > 0)
5920 err = -EEXIST;
5921 return err;
5922}
5923
5924static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
5925 umode_t mode, dev_t rdev)
5926{
5927 struct btrfs_trans_handle *trans;
5928 struct btrfs_root *root = BTRFS_I(dir)->root;
5929 struct inode *inode = NULL;
5930 int err;
5931 int drop_inode = 0;
5932 u64 objectid;
5933 u64 index = 0;
5934
5935 if (!new_valid_dev(rdev))
5936 return -EINVAL;
5937
5938
5939
5940
5941
5942
5943 trans = btrfs_start_transaction(root, 5);
5944 if (IS_ERR(trans))
5945 return PTR_ERR(trans);
5946
5947 err = btrfs_find_free_ino(root, &objectid);
5948 if (err)
5949 goto out_unlock;
5950
5951 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
5952 dentry->d_name.len, btrfs_ino(dir), objectid,
5953 mode, &index);
5954 if (IS_ERR(inode)) {
5955 err = PTR_ERR(inode);
5956 goto out_unlock;
5957 }
5958
5959
5960
5961
5962
5963
5964
5965 inode->i_op = &btrfs_special_inode_operations;
5966 init_special_inode(inode, inode->i_mode, rdev);
5967
5968 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
5969 if (err)
5970 goto out_unlock_inode;
5971
5972 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
5973 if (err) {
5974 goto out_unlock_inode;
5975 } else {
5976 btrfs_update_inode(trans, root, inode);
5977 unlock_new_inode(inode);
5978 d_instantiate(dentry, inode);
5979 }
5980
5981out_unlock:
5982 btrfs_end_transaction(trans, root);
5983 btrfs_balance_delayed_items(root);
5984 btrfs_btree_balance_dirty(root);
5985 if (drop_inode) {
5986 inode_dec_link_count(inode);
5987 iput(inode);
5988 }
5989 return err;
5990
5991out_unlock_inode:
5992 drop_inode = 1;
5993 unlock_new_inode(inode);
5994 goto out_unlock;
5995
5996}
5997
5998static int btrfs_create(struct inode *dir, struct dentry *dentry,
5999 umode_t mode, bool excl)
6000{
6001 struct btrfs_trans_handle *trans;
6002 struct btrfs_root *root = BTRFS_I(dir)->root;
6003 struct inode *inode = NULL;
6004 int drop_inode_on_err = 0;
6005 int err;
6006 u64 objectid;
6007 u64 index = 0;
6008
6009
6010
6011
6012
6013
6014 trans = btrfs_start_transaction(root, 5);
6015 if (IS_ERR(trans))
6016 return PTR_ERR(trans);
6017
6018 err = btrfs_find_free_ino(root, &objectid);
6019 if (err)
6020 goto out_unlock;
6021
6022 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6023 dentry->d_name.len, btrfs_ino(dir), objectid,
6024 mode, &index);
6025 if (IS_ERR(inode)) {
6026 err = PTR_ERR(inode);
6027 goto out_unlock;
6028 }
6029 drop_inode_on_err = 1;
6030
6031
6032
6033
6034
6035
6036 inode->i_fop = &btrfs_file_operations;
6037 inode->i_op = &btrfs_file_inode_operations;
6038 inode->i_mapping->a_ops = &btrfs_aops;
6039 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
6040
6041 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6042 if (err)
6043 goto out_unlock_inode;
6044
6045 err = btrfs_update_inode(trans, root, inode);
6046 if (err)
6047 goto out_unlock_inode;
6048
6049 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6050 if (err)
6051 goto out_unlock_inode;
6052
6053 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
6054 unlock_new_inode(inode);
6055 d_instantiate(dentry, inode);
6056
6057out_unlock:
6058 btrfs_end_transaction(trans, root);
6059 if (err && drop_inode_on_err) {
6060 inode_dec_link_count(inode);
6061 iput(inode);
6062 }
6063 btrfs_balance_delayed_items(root);
6064 btrfs_btree_balance_dirty(root);
6065 return err;
6066
6067out_unlock_inode:
6068 unlock_new_inode(inode);
6069 goto out_unlock;
6070
6071}
6072
6073static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
6074 struct dentry *dentry)
6075{
6076 struct btrfs_trans_handle *trans;
6077 struct btrfs_root *root = BTRFS_I(dir)->root;
6078 struct inode *inode = old_dentry->d_inode;
6079 u64 index;
6080 int err;
6081 int drop_inode = 0;
6082
6083
6084 if (root->objectid != BTRFS_I(inode)->root->objectid)
6085 return -EXDEV;
6086
6087 if (inode->i_nlink >= BTRFS_LINK_MAX)
6088 return -EMLINK;
6089
6090 err = btrfs_set_inode_index(dir, &index);
6091 if (err)
6092 goto fail;
6093
6094
6095
6096
6097
6098
6099 trans = btrfs_start_transaction(root, 5);
6100 if (IS_ERR(trans)) {
6101 err = PTR_ERR(trans);
6102 goto fail;
6103 }
6104
6105
6106 BTRFS_I(inode)->dir_index = 0ULL;
6107 inc_nlink(inode);
6108 inode_inc_iversion(inode);
6109 inode->i_ctime = CURRENT_TIME;
6110 ihold(inode);
6111 set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
6112
6113 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
6114
6115 if (err) {
6116 drop_inode = 1;
6117 } else {
6118 struct dentry *parent = dentry->d_parent;
6119 err = btrfs_update_inode(trans, root, inode);
6120 if (err)
6121 goto fail;
6122 if (inode->i_nlink == 1) {
6123
6124
6125
6126
6127 err = btrfs_orphan_del(trans, inode);
6128 if (err)
6129 goto fail;
6130 }
6131 d_instantiate(dentry, inode);
6132 btrfs_log_new_name(trans, inode, NULL, parent);
6133 }
6134
6135 btrfs_end_transaction(trans, root);
6136 btrfs_balance_delayed_items(root);
6137fail:
6138 if (drop_inode) {
6139 inode_dec_link_count(inode);
6140 iput(inode);
6141 }
6142 btrfs_btree_balance_dirty(root);
6143 return err;
6144}
6145
6146static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
6147{
6148 struct inode *inode = NULL;
6149 struct btrfs_trans_handle *trans;
6150 struct btrfs_root *root = BTRFS_I(dir)->root;
6151 int err = 0;
6152 int drop_on_err = 0;
6153 u64 objectid = 0;
6154 u64 index = 0;
6155
6156
6157
6158
6159
6160
6161 trans = btrfs_start_transaction(root, 5);
6162 if (IS_ERR(trans))
6163 return PTR_ERR(trans);
6164
6165 err = btrfs_find_free_ino(root, &objectid);
6166 if (err)
6167 goto out_fail;
6168
6169 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6170 dentry->d_name.len, btrfs_ino(dir), objectid,
6171 S_IFDIR | mode, &index);
6172 if (IS_ERR(inode)) {
6173 err = PTR_ERR(inode);
6174 goto out_fail;
6175 }
6176
6177 drop_on_err = 1;
6178
6179 inode->i_op = &btrfs_dir_inode_operations;
6180 inode->i_fop = &btrfs_dir_file_operations;
6181
6182 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
6183 if (err)
6184 goto out_fail_inode;
6185
6186 btrfs_i_size_write(inode, 0);
6187 err = btrfs_update_inode(trans, root, inode);
6188 if (err)
6189 goto out_fail_inode;
6190
6191 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
6192 dentry->d_name.len, 0, index);
6193 if (err)
6194 goto out_fail_inode;
6195
6196 d_instantiate(dentry, inode);
6197
6198
6199
6200
6201 unlock_new_inode(inode);
6202 drop_on_err = 0;
6203
6204out_fail:
6205 btrfs_end_transaction(trans, root);
6206 if (drop_on_err)
6207 iput(inode);
6208 btrfs_balance_delayed_items(root);
6209 btrfs_btree_balance_dirty(root);
6210 return err;
6211
6212out_fail_inode:
6213 unlock_new_inode(inode);
6214 goto out_fail;
6215}
6216
6217
6218static struct extent_map *next_extent_map(struct extent_map *em)
6219{
6220 struct rb_node *next;
6221
6222 next = rb_next(&em->rb_node);
6223 if (!next)
6224 return NULL;
6225 return container_of(next, struct extent_map, rb_node);
6226}
6227
6228static struct extent_map *prev_extent_map(struct extent_map *em)
6229{
6230 struct rb_node *prev;
6231
6232 prev = rb_prev(&em->rb_node);
6233 if (!prev)
6234 return NULL;
6235 return container_of(prev, struct extent_map, rb_node);
6236}
6237
6238
6239
6240
6241
6242
6243static int merge_extent_mapping(struct extent_map_tree *em_tree,
6244 struct extent_map *existing,
6245 struct extent_map *em,
6246 u64 map_start)
6247{
6248 struct extent_map *prev;
6249 struct extent_map *next;
6250 u64 start;
6251 u64 end;
6252 u64 start_diff;
6253
6254 BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
6255
6256 if (existing->start > map_start) {
6257 next = existing;
6258 prev = prev_extent_map(next);
6259 } else {
6260 prev = existing;
6261 next = next_extent_map(prev);
6262 }
6263
6264 start = prev ? extent_map_end(prev) : em->start;
6265 start = max_t(u64, start, em->start);
6266 end = next ? next->start : extent_map_end(em);
6267 end = min_t(u64, end, extent_map_end(em));
6268 start_diff = start - em->start;
6269 em->start = start;
6270 em->len = end - start;
6271 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
6272 !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
6273 em->block_start += start_diff;
6274 em->block_len -= start_diff;
6275 }
6276 return add_extent_mapping(em_tree, em, 0);
6277}
6278
6279static noinline int uncompress_inline(struct btrfs_path *path,
6280 struct inode *inode, struct page *page,
6281 size_t pg_offset, u64 extent_offset,
6282 struct btrfs_file_extent_item *item)
6283{
6284 int ret;
6285 struct extent_buffer *leaf = path->nodes[0];
6286 char *tmp;
6287 size_t max_size;
6288 unsigned long inline_size;
6289 unsigned long ptr;
6290 int compress_type;
6291
6292 WARN_ON(pg_offset != 0);
6293 compress_type = btrfs_file_extent_compression(leaf, item);
6294 max_size = btrfs_file_extent_ram_bytes(leaf, item);
6295 inline_size = btrfs_file_extent_inline_item_len(leaf,
6296 btrfs_item_nr(path->slots[0]));
6297 tmp = kmalloc(inline_size, GFP_NOFS);
6298 if (!tmp)
6299 return -ENOMEM;
6300 ptr = btrfs_file_extent_inline_start(item);
6301
6302 read_extent_buffer(leaf, tmp, ptr, inline_size);
6303
6304 max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
6305 ret = btrfs_decompress(compress_type, tmp, page,
6306 extent_offset, inline_size, max_size);
6307 kfree(tmp);
6308 return ret;
6309}
6310
6311
6312
6313
6314
6315
6316
6317
6318
6319
6320struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
6321 size_t pg_offset, u64 start, u64 len,
6322 int create)
6323{
6324 int ret;
6325 int err = 0;
6326 u64 extent_start = 0;
6327 u64 extent_end = 0;
6328 u64 objectid = btrfs_ino(inode);
6329 u32 found_type;
6330 struct btrfs_path *path = NULL;
6331 struct btrfs_root *root = BTRFS_I(inode)->root;
6332 struct btrfs_file_extent_item *item;
6333 struct extent_buffer *leaf;
6334 struct btrfs_key found_key;
6335 struct extent_map *em = NULL;
6336 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
6337 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6338 struct btrfs_trans_handle *trans = NULL;
6339 const bool new_inline = !page || create;
6340
6341again:
6342 read_lock(&em_tree->lock);
6343 em = lookup_extent_mapping(em_tree, start, len);
6344 if (em)
6345 em->bdev = root->fs_info->fs_devices->latest_bdev;
6346 read_unlock(&em_tree->lock);
6347
6348 if (em) {
6349 if (em->start > start || em->start + em->len <= start)
6350 free_extent_map(em);
6351 else if (em->block_start == EXTENT_MAP_INLINE && page)
6352 free_extent_map(em);
6353 else
6354 goto out;
6355 }
6356 em = alloc_extent_map();
6357 if (!em) {
6358 err = -ENOMEM;
6359 goto out;
6360 }
6361 em->bdev = root->fs_info->fs_devices->latest_bdev;
6362 em->start = EXTENT_MAP_HOLE;
6363 em->orig_start = EXTENT_MAP_HOLE;
6364 em->len = (u64)-1;
6365 em->block_len = (u64)-1;
6366
6367 if (!path) {
6368 path = btrfs_alloc_path();
6369 if (!path) {
6370 err = -ENOMEM;
6371 goto out;
6372 }
6373
6374
6375
6376
6377 path->reada = 1;
6378 }
6379
6380 ret = btrfs_lookup_file_extent(trans, root, path,
6381 objectid, start, trans != NULL);
6382 if (ret < 0) {
6383 err = ret;
6384 goto out;
6385 }
6386
6387 if (ret != 0) {
6388 if (path->slots[0] == 0)
6389 goto not_found;
6390 path->slots[0]--;
6391 }
6392
6393 leaf = path->nodes[0];
6394 item = btrfs_item_ptr(leaf, path->slots[0],
6395 struct btrfs_file_extent_item);
6396
6397 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6398 found_type = found_key.type;
6399 if (found_key.objectid != objectid ||
6400 found_type != BTRFS_EXTENT_DATA_KEY) {
6401
6402
6403
6404
6405
6406
6407 extent_end = start;
6408 goto next;
6409 }
6410
6411 found_type = btrfs_file_extent_type(leaf, item);
6412 extent_start = found_key.offset;
6413 if (found_type == BTRFS_FILE_EXTENT_REG ||
6414 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6415 extent_end = extent_start +
6416 btrfs_file_extent_num_bytes(leaf, item);
6417 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6418 size_t size;
6419 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6420 extent_end = ALIGN(extent_start + size, root->sectorsize);
6421 }
6422next:
6423 if (start >= extent_end) {
6424 path->slots[0]++;
6425 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
6426 ret = btrfs_next_leaf(root, path);
6427 if (ret < 0) {
6428 err = ret;
6429 goto out;
6430 }
6431 if (ret > 0)
6432 goto not_found;
6433 leaf = path->nodes[0];
6434 }
6435 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6436 if (found_key.objectid != objectid ||
6437 found_key.type != BTRFS_EXTENT_DATA_KEY)
6438 goto not_found;
6439 if (start + len <= found_key.offset)
6440 goto not_found;
6441 if (start > found_key.offset)
6442 goto next;
6443 em->start = start;
6444 em->orig_start = start;
6445 em->len = found_key.offset - start;
6446 goto not_found_em;
6447 }
6448
6449 btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
6450
6451 if (found_type == BTRFS_FILE_EXTENT_REG ||
6452 found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6453 goto insert;
6454 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
6455 unsigned long ptr;
6456 char *map;
6457 size_t size;
6458 size_t extent_offset;
6459 size_t copy_size;
6460
6461 if (new_inline)
6462 goto out;
6463
6464 size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
6465 extent_offset = page_offset(page) + pg_offset - extent_start;
6466 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
6467 size - extent_offset);
6468 em->start = extent_start + extent_offset;
6469 em->len = ALIGN(copy_size, root->sectorsize);
6470 em->orig_block_len = em->len;
6471 em->orig_start = em->start;
6472 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
6473 if (create == 0 && !PageUptodate(page)) {
6474 if (btrfs_file_extent_compression(leaf, item) !=
6475 BTRFS_COMPRESS_NONE) {
6476 ret = uncompress_inline(path, inode, page,
6477 pg_offset,
6478 extent_offset, item);
6479 if (ret) {
6480 err = ret;
6481 goto out;
6482 }
6483 } else {
6484 map = kmap(page);
6485 read_extent_buffer(leaf, map + pg_offset, ptr,
6486 copy_size);
6487 if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
6488 memset(map + pg_offset + copy_size, 0,
6489 PAGE_CACHE_SIZE - pg_offset -
6490 copy_size);
6491 }
6492 kunmap(page);
6493 }
6494 flush_dcache_page(page);
6495 } else if (create && PageUptodate(page)) {
6496 BUG();
6497 if (!trans) {
6498 kunmap(page);
6499 free_extent_map(em);
6500 em = NULL;
6501
6502 btrfs_release_path(path);
6503 trans = btrfs_join_transaction(root);
6504
6505 if (IS_ERR(trans))
6506 return ERR_CAST(trans);
6507 goto again;
6508 }
6509 map = kmap(page);
6510 write_extent_buffer(leaf, map + pg_offset, ptr,
6511 copy_size);
6512 kunmap(page);
6513 btrfs_mark_buffer_dirty(leaf);
6514 }
6515 set_extent_uptodate(io_tree, em->start,
6516 extent_map_end(em) - 1, NULL, GFP_NOFS);
6517 goto insert;
6518 }
6519not_found:
6520 em->start = start;
6521 em->orig_start = start;
6522 em->len = len;
6523not_found_em:
6524 em->block_start = EXTENT_MAP_HOLE;
6525 set_bit(EXTENT_FLAG_VACANCY, &em->flags);
6526insert:
6527 btrfs_release_path(path);
6528 if (em->start > start || extent_map_end(em) <= start) {
6529 btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
6530 em->start, em->len, start, len);
6531 err = -EIO;
6532 goto out;
6533 }
6534
6535 err = 0;
6536 write_lock(&em_tree->lock);
6537 ret = add_extent_mapping(em_tree, em, 0);
6538
6539
6540
6541
6542 if (ret == -EEXIST) {
6543 struct extent_map *existing;
6544
6545 ret = 0;
6546
6547 existing = search_extent_mapping(em_tree, start, len);
6548
6549
6550
6551
6552 if (start >= extent_map_end(existing) ||
6553 start <= existing->start) {
6554
6555
6556
6557
6558 err = merge_extent_mapping(em_tree, existing,
6559 em, start);
6560 free_extent_map(existing);
6561 if (err) {
6562 free_extent_map(em);
6563 em = NULL;
6564 }
6565 } else {
6566 free_extent_map(em);
6567 em = existing;
6568 err = 0;
6569 }
6570 }
6571 write_unlock(&em_tree->lock);
6572out:
6573
6574 trace_btrfs_get_extent(root, em);
6575
6576 if (path)
6577 btrfs_free_path(path);
6578 if (trans) {
6579 ret = btrfs_end_transaction(trans, root);
6580 if (!err)
6581 err = ret;
6582 }
6583 if (err) {
6584 free_extent_map(em);
6585 return ERR_PTR(err);
6586 }
6587 BUG_ON(!em);
6588 return em;
6589}
6590
6591struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
6592 size_t pg_offset, u64 start, u64 len,
6593 int create)
6594{
6595 struct extent_map *em;
6596 struct extent_map *hole_em = NULL;
6597 u64 range_start = start;
6598 u64 end;
6599 u64 found;
6600 u64 found_end;
6601 int err = 0;
6602
6603 em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
6604 if (IS_ERR(em))
6605 return em;
6606 if (em) {
6607
6608
6609
6610
6611
6612
6613 if (em->block_start != EXTENT_MAP_HOLE &&
6614 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
6615 return em;
6616 else
6617 hole_em = em;
6618 }
6619
6620
6621 end = start + len;
6622 if (end < start)
6623 end = (u64)-1;
6624 else
6625 end -= 1;
6626
6627 em = NULL;
6628
6629
6630 found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
6631 end, len, EXTENT_DELALLOC, 1);
6632 found_end = range_start + found;
6633 if (found_end < range_start)
6634 found_end = (u64)-1;
6635
6636
6637
6638
6639
6640 if (range_start > end || found_end <= start) {
6641 em = hole_em;
6642 hole_em = NULL;
6643 goto out;
6644 }
6645
6646
6647
6648
6649 range_start = max(start, range_start);
6650 found = found_end - range_start;
6651
6652 if (found > 0) {
6653 u64 hole_start = start;
6654 u64 hole_len = len;
6655
6656 em = alloc_extent_map();
6657 if (!em) {
6658 err = -ENOMEM;
6659 goto out;
6660 }
6661
6662
6663
6664
6665
6666
6667
6668
6669 if (hole_em) {
6670 u64 calc_end = extent_map_end(hole_em);
6671
6672 if (calc_end <= start || (hole_em->start > end)) {
6673 free_extent_map(hole_em);
6674 hole_em = NULL;
6675 } else {
6676 hole_start = max(hole_em->start, start);
6677 hole_len = calc_end - hole_start;
6678 }
6679 }
6680 em->bdev = NULL;
6681 if (hole_em && range_start > hole_start) {
6682
6683
6684
6685
6686 em->len = min(hole_len,
6687 range_start - hole_start);
6688 em->start = hole_start;
6689 em->orig_start = hole_start;
6690
6691
6692
6693
6694 em->block_start = hole_em->block_start;
6695 em->block_len = hole_len;
6696 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
6697 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
6698 } else {
6699 em->start = range_start;
6700 em->len = found;
6701 em->orig_start = range_start;
6702 em->block_start = EXTENT_MAP_DELALLOC;
6703 em->block_len = found;
6704 }
6705 } else if (hole_em) {
6706 return hole_em;
6707 }
6708out:
6709
6710 free_extent_map(hole_em);
6711 if (err) {
6712 free_extent_map(em);
6713 return ERR_PTR(err);
6714 }
6715 return em;
6716}
6717
6718static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
6719 u64 start, u64 len)
6720{
6721 struct btrfs_root *root = BTRFS_I(inode)->root;
6722 struct extent_map *em;
6723 struct btrfs_key ins;
6724 u64 alloc_hint;
6725 int ret;
6726
6727 alloc_hint = get_extent_allocation_hint(inode, start, len);
6728 ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
6729 alloc_hint, &ins, 1, 1);
6730 if (ret)
6731 return ERR_PTR(ret);
6732
6733 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
6734 ins.offset, ins.offset, ins.offset, 0);
6735 if (IS_ERR(em)) {
6736 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
6737 return em;
6738 }
6739
6740 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
6741 ins.offset, ins.offset, 0);
6742 if (ret) {
6743 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
6744 free_extent_map(em);
6745 return ERR_PTR(ret);
6746 }
6747
6748 return em;
6749}
6750
6751
6752
6753
6754
6755noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
6756 u64 *orig_start, u64 *orig_block_len,
6757 u64 *ram_bytes)
6758{
6759 struct btrfs_trans_handle *trans;
6760 struct btrfs_path *path;
6761 int ret;
6762 struct extent_buffer *leaf;
6763 struct btrfs_root *root = BTRFS_I(inode)->root;
6764 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6765 struct btrfs_file_extent_item *fi;
6766 struct btrfs_key key;
6767 u64 disk_bytenr;
6768 u64 backref_offset;
6769 u64 extent_end;
6770 u64 num_bytes;
6771 int slot;
6772 int found_type;
6773 bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
6774
6775 path = btrfs_alloc_path();
6776 if (!path)
6777 return -ENOMEM;
6778
6779 ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
6780 offset, 0);
6781 if (ret < 0)
6782 goto out;
6783
6784 slot = path->slots[0];
6785 if (ret == 1) {
6786 if (slot == 0) {
6787
6788 ret = 0;
6789 goto out;
6790 }
6791 slot--;
6792 }
6793 ret = 0;
6794 leaf = path->nodes[0];
6795 btrfs_item_key_to_cpu(leaf, &key, slot);
6796 if (key.objectid != btrfs_ino(inode) ||
6797 key.type != BTRFS_EXTENT_DATA_KEY) {
6798
6799 goto out;
6800 }
6801
6802 if (key.offset > offset) {
6803
6804 goto out;
6805 }
6806
6807 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
6808 found_type = btrfs_file_extent_type(leaf, fi);
6809 if (found_type != BTRFS_FILE_EXTENT_REG &&
6810 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
6811
6812 goto out;
6813 }
6814
6815 if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
6816 goto out;
6817
6818 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
6819 if (extent_end <= offset)
6820 goto out;
6821
6822 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
6823 if (disk_bytenr == 0)
6824 goto out;
6825
6826 if (btrfs_file_extent_compression(leaf, fi) ||
6827 btrfs_file_extent_encryption(leaf, fi) ||
6828 btrfs_file_extent_other_encoding(leaf, fi))
6829 goto out;
6830
6831 backref_offset = btrfs_file_extent_offset(leaf, fi);
6832
6833 if (orig_start) {
6834 *orig_start = key.offset - backref_offset;
6835 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
6836 *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
6837 }
6838
6839 if (btrfs_extent_readonly(root, disk_bytenr))
6840 goto out;
6841
6842 num_bytes = min(offset + *len, extent_end) - offset;
6843 if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
6844 u64 range_end;
6845
6846 range_end = round_up(offset + num_bytes, root->sectorsize) - 1;
6847 ret = test_range_bit(io_tree, offset, range_end,
6848 EXTENT_DELALLOC, 0, NULL);
6849 if (ret) {
6850 ret = -EAGAIN;
6851 goto out;
6852 }
6853 }
6854
6855 btrfs_release_path(path);
6856
6857
6858
6859
6860
6861 trans = btrfs_join_transaction(root);
6862 if (IS_ERR(trans)) {
6863 ret = 0;
6864 goto out;
6865 }
6866
6867 ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
6868 key.offset - backref_offset, disk_bytenr);
6869 btrfs_end_transaction(trans, root);
6870 if (ret) {
6871 ret = 0;
6872 goto out;
6873 }
6874
6875
6876
6877
6878
6879
6880
6881 disk_bytenr += backref_offset;
6882 disk_bytenr += offset - key.offset;
6883 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
6884 goto out;
6885
6886
6887
6888
6889 *len = num_bytes;
6890 ret = 1;
6891out:
6892 btrfs_free_path(path);
6893 return ret;
6894}
6895
6896bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
6897{
6898 struct radix_tree_root *root = &inode->i_mapping->page_tree;
6899 int found = false;
6900 void **pagep = NULL;
6901 struct page *page = NULL;
6902 int start_idx;
6903 int end_idx;
6904
6905 start_idx = start >> PAGE_CACHE_SHIFT;
6906
6907
6908
6909
6910 end_idx = end >> PAGE_CACHE_SHIFT;
6911
6912 rcu_read_lock();
6913
6914
6915
6916
6917
6918
6919
6920 while (page == NULL &&
6921 radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
6922 page = radix_tree_deref_slot(pagep);
6923 if (unlikely(!page))
6924 break;
6925
6926 if (radix_tree_exception(page)) {
6927 if (radix_tree_deref_retry(page)) {
6928 page = NULL;
6929 continue;
6930 }
6931
6932
6933
6934
6935
6936 page = NULL;
6937 break;
6938 }
6939
6940 if (!page_cache_get_speculative(page)) {
6941 page = NULL;
6942 continue;
6943 }
6944
6945
6946
6947
6948
6949
6950 if (unlikely(page != *pagep)) {
6951 page_cache_release(page);
6952 page = NULL;
6953 }
6954 }
6955
6956 if (page) {
6957 if (page->index <= end_idx)
6958 found = true;
6959 page_cache_release(page);
6960 }
6961
6962 rcu_read_unlock();
6963 return found;
6964}
6965
6966static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
6967 struct extent_state **cached_state, int writing)
6968{
6969 struct btrfs_ordered_extent *ordered;
6970 int ret = 0;
6971
6972 while (1) {
6973 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6974 0, cached_state);
6975
6976
6977
6978
6979
6980 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6981 lockend - lockstart + 1);
6982
6983
6984
6985
6986
6987
6988
6989
6990 if (!ordered &&
6991 (!writing ||
6992 !btrfs_page_exists_in_range(inode, lockstart, lockend)))
6993 break;
6994
6995 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6996 cached_state, GFP_NOFS);
6997
6998 if (ordered) {
6999 btrfs_start_ordered_extent(inode, ordered, 1);
7000 btrfs_put_ordered_extent(ordered);
7001 } else {
7002
7003 ret = filemap_write_and_wait_range(inode->i_mapping,
7004 lockstart,
7005 lockend);
7006 if (ret)
7007 break;
7008
7009
7010
7011
7012
7013 ret = invalidate_inode_pages2_range(inode->i_mapping,
7014 lockstart >> PAGE_CACHE_SHIFT,
7015 lockend >> PAGE_CACHE_SHIFT);
7016 if (ret)
7017 break;
7018 }
7019
7020 cond_resched();
7021 }
7022
7023 return ret;
7024}
7025
7026static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
7027 u64 len, u64 orig_start,
7028 u64 block_start, u64 block_len,
7029 u64 orig_block_len, u64 ram_bytes,
7030 int type)
7031{
7032 struct extent_map_tree *em_tree;
7033 struct extent_map *em;
7034 struct btrfs_root *root = BTRFS_I(inode)->root;
7035 int ret;
7036
7037 em_tree = &BTRFS_I(inode)->extent_tree;
7038 em = alloc_extent_map();
7039 if (!em)
7040 return ERR_PTR(-ENOMEM);
7041
7042 em->start = start;
7043 em->orig_start = orig_start;
7044 em->mod_start = start;
7045 em->mod_len = len;
7046 em->len = len;
7047 em->block_len = block_len;
7048 em->block_start = block_start;
7049 em->bdev = root->fs_info->fs_devices->latest_bdev;
7050 em->orig_block_len = orig_block_len;
7051 em->ram_bytes = ram_bytes;
7052 em->generation = -1;
7053 set_bit(EXTENT_FLAG_PINNED, &em->flags);
7054 if (type == BTRFS_ORDERED_PREALLOC)
7055 set_bit(EXTENT_FLAG_FILLING, &em->flags);
7056
7057 do {
7058 btrfs_drop_extent_cache(inode, em->start,
7059 em->start + em->len - 1, 0);
7060 write_lock(&em_tree->lock);
7061 ret = add_extent_mapping(em_tree, em, 1);
7062 write_unlock(&em_tree->lock);
7063 } while (ret == -EEXIST);
7064
7065 if (ret) {
7066 free_extent_map(em);
7067 return ERR_PTR(ret);
7068 }
7069
7070 return em;
7071}
7072
7073
7074static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7075 struct buffer_head *bh_result, int create)
7076{
7077 struct extent_map *em;
7078 struct btrfs_root *root = BTRFS_I(inode)->root;
7079 struct extent_state *cached_state = NULL;
7080 u64 start = iblock << inode->i_blkbits;
7081 u64 lockstart, lockend;
7082 u64 len = bh_result->b_size;
7083 int unlock_bits = EXTENT_LOCKED;
7084 int ret = 0;
7085
7086 if (create)
7087 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
7088 else
7089 len = min_t(u64, len, root->sectorsize);
7090
7091 lockstart = start;
7092 lockend = start + len - 1;
7093
7094
7095
7096
7097
7098 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
7099 return -ENOTBLK;
7100
7101 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
7102 if (IS_ERR(em)) {
7103 ret = PTR_ERR(em);
7104 goto unlock_err;
7105 }
7106
7107
7108
7109
7110
7111
7112
7113
7114
7115
7116
7117
7118
7119
7120
7121 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
7122 em->block_start == EXTENT_MAP_INLINE) {
7123 free_extent_map(em);
7124 ret = -ENOTBLK;
7125 goto unlock_err;
7126 }
7127
7128
7129 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
7130 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
7131 free_extent_map(em);
7132 goto unlock_err;
7133 }
7134
7135
7136
7137
7138
7139
7140
7141
7142
7143
7144 if (!create) {
7145 len = min(len, em->len - (start - em->start));
7146 lockstart = start + len;
7147 goto unlock;
7148 }
7149
7150 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
7151 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7152 em->block_start != EXTENT_MAP_HOLE)) {
7153 int type;
7154 int ret;
7155 u64 block_start, orig_start, orig_block_len, ram_bytes;
7156
7157 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7158 type = BTRFS_ORDERED_PREALLOC;
7159 else
7160 type = BTRFS_ORDERED_NOCOW;
7161 len = min(len, em->len - (start - em->start));
7162 block_start = em->block_start + (start - em->start);
7163
7164 if (can_nocow_extent(inode, start, &len, &orig_start,
7165 &orig_block_len, &ram_bytes) == 1) {
7166 if (type == BTRFS_ORDERED_PREALLOC) {
7167 free_extent_map(em);
7168 em = create_pinned_em(inode, start, len,
7169 orig_start,
7170 block_start, len,
7171 orig_block_len,
7172 ram_bytes, type);
7173 if (IS_ERR(em)) {
7174 ret = PTR_ERR(em);
7175 goto unlock_err;
7176 }
7177 }
7178
7179 ret = btrfs_add_ordered_extent_dio(inode, start,
7180 block_start, len, len, type);
7181 if (ret) {
7182 free_extent_map(em);
7183 goto unlock_err;
7184 }
7185 goto unlock;
7186 }
7187 }
7188
7189
7190
7191
7192
7193 len = bh_result->b_size;
7194 free_extent_map(em);
7195 em = btrfs_new_extent_direct(inode, start, len);
7196 if (IS_ERR(em)) {
7197 ret = PTR_ERR(em);
7198 goto unlock_err;
7199 }
7200 len = min(len, em->len - (start - em->start));
7201unlock:
7202 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
7203 inode->i_blkbits;
7204 bh_result->b_size = len;
7205 bh_result->b_bdev = em->bdev;
7206 set_buffer_mapped(bh_result);
7207 if (create) {
7208 if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
7209 set_buffer_new(bh_result);
7210
7211
7212
7213
7214
7215 if (start + len > i_size_read(inode))
7216 i_size_write(inode, start + len);
7217
7218 spin_lock(&BTRFS_I(inode)->lock);
7219 BTRFS_I(inode)->outstanding_extents++;
7220 spin_unlock(&BTRFS_I(inode)->lock);
7221
7222 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7223 lockstart + len - 1, EXTENT_DELALLOC, NULL,
7224 &cached_state, GFP_NOFS);
7225 BUG_ON(ret);
7226 }
7227
7228
7229
7230
7231
7232
7233 if (lockstart < lockend) {
7234 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
7235 lockend, unlock_bits, 1, 0,
7236 &cached_state, GFP_NOFS);
7237 } else {
7238 free_extent_state(cached_state);
7239 }
7240
7241 free_extent_map(em);
7242
7243 return 0;
7244
7245unlock_err:
7246 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7247 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7248 return ret;
7249}
7250
7251static inline int submit_dio_repair_bio(struct inode *inode, struct bio *bio,
7252 int rw, int mirror_num)
7253{
7254 struct btrfs_root *root = BTRFS_I(inode)->root;
7255 int ret;
7256
7257 BUG_ON(rw & REQ_WRITE);
7258
7259 bio_get(bio);
7260
7261 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
7262 BTRFS_WQ_ENDIO_DIO_REPAIR);
7263 if (ret)
7264 goto err;
7265
7266 ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
7267err:
7268 bio_put(bio);
7269 return ret;
7270}
7271
7272static int btrfs_check_dio_repairable(struct inode *inode,
7273 struct bio *failed_bio,
7274 struct io_failure_record *failrec,
7275 int failed_mirror)
7276{
7277 int num_copies;
7278
7279 num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info,
7280 failrec->logical, failrec->len);
7281 if (num_copies == 1) {
7282
7283
7284
7285
7286
7287 pr_debug("Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
7288 num_copies, failrec->this_mirror, failed_mirror);
7289 return 0;
7290 }
7291
7292 failrec->failed_mirror = failed_mirror;
7293 failrec->this_mirror++;
7294 if (failrec->this_mirror == failed_mirror)
7295 failrec->this_mirror++;
7296
7297 if (failrec->this_mirror > num_copies) {
7298 pr_debug("Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
7299 num_copies, failrec->this_mirror, failed_mirror);
7300 return 0;
7301 }
7302
7303 return 1;
7304}
7305
7306static int dio_read_error(struct inode *inode, struct bio *failed_bio,
7307 struct page *page, u64 start, u64 end,
7308 int failed_mirror, bio_end_io_t *repair_endio,
7309 void *repair_arg)
7310{
7311 struct io_failure_record *failrec;
7312 struct bio *bio;
7313 int isector;
7314 int read_mode;
7315 int ret;
7316
7317 BUG_ON(failed_bio->bi_rw & REQ_WRITE);
7318
7319 ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
7320 if (ret)
7321 return ret;
7322
7323 ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
7324 failed_mirror);
7325 if (!ret) {
7326 free_io_failure(inode, failrec);
7327 return -EIO;
7328 }
7329
7330 if (failed_bio->bi_vcnt > 1)
7331 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
7332 else
7333 read_mode = READ_SYNC;
7334
7335 isector = start - btrfs_io_bio(failed_bio)->logical;
7336 isector >>= inode->i_sb->s_blocksize_bits;
7337 bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
7338 0, isector, repair_endio, repair_arg);
7339 if (!bio) {
7340 free_io_failure(inode, failrec);
7341 return -EIO;
7342 }
7343
7344 btrfs_debug(BTRFS_I(inode)->root->fs_info,
7345 "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
7346 read_mode, failrec->this_mirror, failrec->in_validation);
7347
7348 ret = submit_dio_repair_bio(inode, bio, read_mode,
7349 failrec->this_mirror);
7350 if (ret) {
7351 free_io_failure(inode, failrec);
7352 bio_put(bio);
7353 }
7354
7355 return ret;
7356}
7357
7358struct btrfs_retry_complete {
7359 struct completion done;
7360 struct inode *inode;
7361 u64 start;
7362 int uptodate;
7363};
7364
7365static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
7366{
7367 struct btrfs_retry_complete *done = bio->bi_private;
7368 struct bio_vec *bvec;
7369 int i;
7370
7371 if (err)
7372 goto end;
7373
7374 done->uptodate = 1;
7375 bio_for_each_segment_all(bvec, bio, i)
7376 clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
7377end:
7378 complete(&done->done);
7379 bio_put(bio);
7380}
7381
7382static int __btrfs_correct_data_nocsum(struct inode *inode,
7383 struct btrfs_io_bio *io_bio)
7384{
7385 struct bio_vec *bvec;
7386 struct btrfs_retry_complete done;
7387 u64 start;
7388 int i;
7389 int ret;
7390
7391 start = io_bio->logical;
7392 done.inode = inode;
7393
7394 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7395try_again:
7396 done.uptodate = 0;
7397 done.start = start;
7398 init_completion(&done.done);
7399
7400 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
7401 start + bvec->bv_len - 1,
7402 io_bio->mirror_num,
7403 btrfs_retry_endio_nocsum, &done);
7404 if (ret)
7405 return ret;
7406
7407 wait_for_completion(&done.done);
7408
7409 if (!done.uptodate) {
7410
7411 goto try_again;
7412 }
7413
7414 start += bvec->bv_len;
7415 }
7416
7417 return 0;
7418}
7419
7420static void btrfs_retry_endio(struct bio *bio, int err)
7421{
7422 struct btrfs_retry_complete *done = bio->bi_private;
7423 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7424 struct bio_vec *bvec;
7425 int uptodate;
7426 int ret;
7427 int i;
7428
7429 if (err)
7430 goto end;
7431
7432 uptodate = 1;
7433 bio_for_each_segment_all(bvec, bio, i) {
7434 ret = __readpage_endio_check(done->inode, io_bio, i,
7435 bvec->bv_page, 0,
7436 done->start, bvec->bv_len);
7437 if (!ret)
7438 clean_io_failure(done->inode, done->start,
7439 bvec->bv_page, 0);
7440 else
7441 uptodate = 0;
7442 }
7443
7444 done->uptodate = uptodate;
7445end:
7446 complete(&done->done);
7447 bio_put(bio);
7448}
7449
7450static int __btrfs_subio_endio_read(struct inode *inode,
7451 struct btrfs_io_bio *io_bio, int err)
7452{
7453 struct bio_vec *bvec;
7454 struct btrfs_retry_complete done;
7455 u64 start;
7456 u64 offset = 0;
7457 int i;
7458 int ret;
7459
7460 err = 0;
7461 start = io_bio->logical;
7462 done.inode = inode;
7463
7464 bio_for_each_segment_all(bvec, &io_bio->bio, i) {
7465 ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
7466 0, start, bvec->bv_len);
7467 if (likely(!ret))
7468 goto next;
7469try_again:
7470 done.uptodate = 0;
7471 done.start = start;
7472 init_completion(&done.done);
7473
7474 ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
7475 start + bvec->bv_len - 1,
7476 io_bio->mirror_num,
7477 btrfs_retry_endio, &done);
7478 if (ret) {
7479 err = ret;
7480 goto next;
7481 }
7482
7483 wait_for_completion(&done.done);
7484
7485 if (!done.uptodate) {
7486
7487 goto try_again;
7488 }
7489next:
7490 offset += bvec->bv_len;
7491 start += bvec->bv_len;
7492 }
7493
7494 return err;
7495}
7496
7497static int btrfs_subio_endio_read(struct inode *inode,
7498 struct btrfs_io_bio *io_bio, int err)
7499{
7500 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7501
7502 if (skip_csum) {
7503 if (unlikely(err))
7504 return __btrfs_correct_data_nocsum(inode, io_bio);
7505 else
7506 return 0;
7507 } else {
7508 return __btrfs_subio_endio_read(inode, io_bio, err);
7509 }
7510}
7511
7512static void btrfs_endio_direct_read(struct bio *bio, int err)
7513{
7514 struct btrfs_dio_private *dip = bio->bi_private;
7515 struct inode *inode = dip->inode;
7516 struct bio *dio_bio;
7517 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7518
7519 if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
7520 err = btrfs_subio_endio_read(inode, io_bio, err);
7521
7522 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
7523 dip->logical_offset + dip->bytes - 1);
7524 dio_bio = dip->dio_bio;
7525
7526 kfree(dip);
7527
7528
7529 if (err)
7530 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
7531 dio_end_io(dio_bio, err);
7532
7533 if (io_bio->end_io)
7534 io_bio->end_io(io_bio, err);
7535 bio_put(bio);
7536}
7537
7538static void btrfs_endio_direct_write(struct bio *bio, int err)
7539{
7540 struct btrfs_dio_private *dip = bio->bi_private;
7541 struct inode *inode = dip->inode;
7542 struct btrfs_root *root = BTRFS_I(inode)->root;
7543 struct btrfs_ordered_extent *ordered = NULL;
7544 u64 ordered_offset = dip->logical_offset;
7545 u64 ordered_bytes = dip->bytes;
7546 struct bio *dio_bio;
7547 int ret;
7548
7549 if (err)
7550 goto out_done;
7551again:
7552 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
7553 &ordered_offset,
7554 ordered_bytes, !err);
7555 if (!ret)
7556 goto out_test;
7557
7558 btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
7559 finish_ordered_fn, NULL, NULL);
7560 btrfs_queue_work(root->fs_info->endio_write_workers,
7561 &ordered->work);
7562out_test:
7563
7564
7565
7566
7567 if (ordered_offset < dip->logical_offset + dip->bytes) {
7568 ordered_bytes = dip->logical_offset + dip->bytes -
7569 ordered_offset;
7570 ordered = NULL;
7571 goto again;
7572 }
7573out_done:
7574 dio_bio = dip->dio_bio;
7575
7576 kfree(dip);
7577
7578
7579 if (err)
7580 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
7581 dio_end_io(dio_bio, err);
7582 bio_put(bio);
7583}
7584
7585static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
7586 struct bio *bio, int mirror_num,
7587 unsigned long bio_flags, u64 offset)
7588{
7589 int ret;
7590 struct btrfs_root *root = BTRFS_I(inode)->root;
7591 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
7592 BUG_ON(ret);
7593 return 0;
7594}
7595
7596static void btrfs_end_dio_bio(struct bio *bio, int err)
7597{
7598 struct btrfs_dio_private *dip = bio->bi_private;
7599
7600 if (err)
7601 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
7602 "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
7603 btrfs_ino(dip->inode), bio->bi_rw,
7604 (unsigned long long)bio->bi_iter.bi_sector,
7605 bio->bi_iter.bi_size, err);
7606
7607 if (dip->subio_endio)
7608 err = dip->subio_endio(dip->inode, btrfs_io_bio(bio), err);
7609
7610 if (err) {
7611 dip->errors = 1;
7612
7613
7614
7615
7616
7617 smp_mb__before_atomic();
7618 }
7619
7620
7621 if (!atomic_dec_and_test(&dip->pending_bios))
7622 goto out;
7623
7624 if (dip->errors) {
7625 bio_io_error(dip->orig_bio);
7626 } else {
7627 set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
7628 bio_endio(dip->orig_bio, 0);
7629 }
7630out:
7631 bio_put(bio);
7632}
7633
7634static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
7635 u64 first_sector, gfp_t gfp_flags)
7636{
7637 int nr_vecs = bio_get_nr_vecs(bdev);
7638 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
7639}
7640
7641static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
7642 struct inode *inode,
7643 struct btrfs_dio_private *dip,
7644 struct bio *bio,
7645 u64 file_offset)
7646{
7647 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
7648 struct btrfs_io_bio *orig_io_bio = btrfs_io_bio(dip->orig_bio);
7649 int ret;
7650
7651
7652
7653
7654
7655
7656 if (dip->logical_offset == file_offset) {
7657 ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
7658 file_offset);
7659 if (ret)
7660 return ret;
7661 }
7662
7663 if (bio == dip->orig_bio)
7664 return 0;
7665
7666 file_offset -= dip->logical_offset;
7667 file_offset >>= inode->i_sb->s_blocksize_bits;
7668 io_bio->csum = (u8 *)(((u32 *)orig_io_bio->csum) + file_offset);
7669
7670 return 0;
7671}
7672
7673static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
7674 int rw, u64 file_offset, int skip_sum,
7675 int async_submit)
7676{
7677 struct btrfs_dio_private *dip = bio->bi_private;
7678 int write = rw & REQ_WRITE;
7679 struct btrfs_root *root = BTRFS_I(inode)->root;
7680 int ret;
7681
7682 if (async_submit)
7683 async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
7684
7685 bio_get(bio);
7686
7687 if (!write) {
7688 ret = btrfs_bio_wq_end_io(root->fs_info, bio,
7689 BTRFS_WQ_ENDIO_DATA);
7690 if (ret)
7691 goto err;
7692 }
7693
7694 if (skip_sum)
7695 goto map;
7696
7697 if (write && async_submit) {
7698 ret = btrfs_wq_submit_bio(root->fs_info,
7699 inode, rw, bio, 0, 0,
7700 file_offset,
7701 __btrfs_submit_bio_start_direct_io,
7702 __btrfs_submit_bio_done);
7703 goto err;
7704 } else if (write) {
7705
7706
7707
7708
7709 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
7710 if (ret)
7711 goto err;
7712 } else {
7713 ret = btrfs_lookup_and_bind_dio_csum(root, inode, dip, bio,
7714 file_offset);
7715 if (ret)
7716 goto err;
7717 }
7718map:
7719 ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
7720err:
7721 bio_put(bio);
7722 return ret;
7723}
7724
7725static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
7726 int skip_sum)
7727{
7728 struct inode *inode = dip->inode;
7729 struct btrfs_root *root = BTRFS_I(inode)->root;
7730 struct bio *bio;
7731 struct bio *orig_bio = dip->orig_bio;
7732 struct bio_vec *bvec = orig_bio->bi_io_vec;
7733 u64 start_sector = orig_bio->bi_iter.bi_sector;
7734 u64 file_offset = dip->logical_offset;
7735 u64 submit_len = 0;
7736 u64 map_length;
7737 int nr_pages = 0;
7738 int ret;
7739 int async_submit = 0;
7740
7741 map_length = orig_bio->bi_iter.bi_size;
7742 ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
7743 &map_length, NULL, 0);
7744 if (ret)
7745 return -EIO;
7746
7747 if (map_length >= orig_bio->bi_iter.bi_size) {
7748 bio = orig_bio;
7749 dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
7750 goto submit;
7751 }
7752
7753
7754 if (btrfs_get_alloc_profile(root, 1) &
7755 (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
7756 async_submit = 0;
7757 else
7758 async_submit = 1;
7759
7760 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
7761 if (!bio)
7762 return -ENOMEM;
7763
7764 bio->bi_private = dip;
7765 bio->bi_end_io = btrfs_end_dio_bio;
7766 btrfs_io_bio(bio)->logical = file_offset;
7767 atomic_inc(&dip->pending_bios);
7768
7769 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
7770 if (map_length < submit_len + bvec->bv_len ||
7771 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
7772 bvec->bv_offset) < bvec->bv_len) {
7773
7774
7775
7776
7777
7778
7779 atomic_inc(&dip->pending_bios);
7780 ret = __btrfs_submit_dio_bio(bio, inode, rw,
7781 file_offset, skip_sum,
7782 async_submit);
7783 if (ret) {
7784 bio_put(bio);
7785 atomic_dec(&dip->pending_bios);
7786 goto out_err;
7787 }
7788
7789 start_sector += submit_len >> 9;
7790 file_offset += submit_len;
7791
7792 submit_len = 0;
7793 nr_pages = 0;
7794
7795 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
7796 start_sector, GFP_NOFS);
7797 if (!bio)
7798 goto out_err;
7799 bio->bi_private = dip;
7800 bio->bi_end_io = btrfs_end_dio_bio;
7801 btrfs_io_bio(bio)->logical = file_offset;
7802
7803 map_length = orig_bio->bi_iter.bi_size;
7804 ret = btrfs_map_block(root->fs_info, rw,
7805 start_sector << 9,
7806 &map_length, NULL, 0);
7807 if (ret) {
7808 bio_put(bio);
7809 goto out_err;
7810 }
7811 } else {
7812 submit_len += bvec->bv_len;
7813 nr_pages++;
7814 bvec++;
7815 }
7816 }
7817
7818submit:
7819 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
7820 async_submit);
7821 if (!ret)
7822 return 0;
7823
7824 bio_put(bio);
7825out_err:
7826 dip->errors = 1;
7827
7828
7829
7830
7831 smp_mb__before_atomic();
7832 if (atomic_dec_and_test(&dip->pending_bios))
7833 bio_io_error(dip->orig_bio);
7834
7835
7836 return 0;
7837}
7838
7839static void btrfs_submit_direct(int rw, struct bio *dio_bio,
7840 struct inode *inode, loff_t file_offset)
7841{
7842 struct btrfs_root *root = BTRFS_I(inode)->root;
7843 struct btrfs_dio_private *dip;
7844 struct bio *io_bio;
7845 struct btrfs_io_bio *btrfs_bio;
7846 int skip_sum;
7847 int write = rw & REQ_WRITE;
7848 int ret = 0;
7849
7850 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
7851
7852 io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
7853 if (!io_bio) {
7854 ret = -ENOMEM;
7855 goto free_ordered;
7856 }
7857
7858 dip = kzalloc(sizeof(*dip), GFP_NOFS);
7859 if (!dip) {
7860 ret = -ENOMEM;
7861 goto free_io_bio;
7862 }
7863
7864 dip->private = dio_bio->bi_private;
7865 dip->inode = inode;
7866 dip->logical_offset = file_offset;
7867 dip->bytes = dio_bio->bi_iter.bi_size;
7868 dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
7869 io_bio->bi_private = dip;
7870 dip->orig_bio = io_bio;
7871 dip->dio_bio = dio_bio;
7872 atomic_set(&dip->pending_bios, 0);
7873 btrfs_bio = btrfs_io_bio(io_bio);
7874 btrfs_bio->logical = file_offset;
7875
7876 if (write) {
7877 io_bio->bi_end_io = btrfs_endio_direct_write;
7878 } else {
7879 io_bio->bi_end_io = btrfs_endio_direct_read;
7880 dip->subio_endio = btrfs_subio_endio_read;
7881 }
7882
7883 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
7884 if (!ret)
7885 return;
7886
7887 if (btrfs_bio->end_io)
7888 btrfs_bio->end_io(btrfs_bio, ret);
7889free_io_bio:
7890 bio_put(io_bio);
7891
7892free_ordered:
7893
7894
7895
7896
7897 if (write) {
7898 struct btrfs_ordered_extent *ordered;
7899 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
7900 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
7901 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
7902 btrfs_free_reserved_extent(root, ordered->start,
7903 ordered->disk_len, 1);
7904 btrfs_put_ordered_extent(ordered);
7905 btrfs_put_ordered_extent(ordered);
7906 }
7907 bio_endio(dio_bio, ret);
7908}
7909
7910static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
7911 const struct iov_iter *iter, loff_t offset)
7912{
7913 int seg;
7914 int i;
7915 unsigned blocksize_mask = root->sectorsize - 1;
7916 ssize_t retval = -EINVAL;
7917
7918 if (offset & blocksize_mask)
7919 goto out;
7920
7921 if (iov_iter_alignment(iter) & blocksize_mask)
7922 goto out;
7923
7924
7925 if (rw & WRITE)
7926 return 0;
7927
7928
7929
7930
7931
7932 for (seg = 0; seg < iter->nr_segs; seg++) {
7933 for (i = seg + 1; i < iter->nr_segs; i++) {
7934 if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
7935 goto out;
7936 }
7937 }
7938 retval = 0;
7939out:
7940 return retval;
7941}
7942
7943static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
7944 struct iov_iter *iter, loff_t offset)
7945{
7946 struct file *file = iocb->ki_filp;
7947 struct inode *inode = file->f_mapping->host;
7948 size_t count = 0;
7949 int flags = 0;
7950 bool wakeup = true;
7951 bool relock = false;
7952 ssize_t ret;
7953
7954 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
7955 return 0;
7956
7957 atomic_inc(&inode->i_dio_count);
7958 smp_mb__after_atomic();
7959
7960
7961
7962
7963
7964
7965
7966 count = iov_iter_count(iter);
7967 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
7968 &BTRFS_I(inode)->runtime_flags))
7969 filemap_fdatawrite_range(inode->i_mapping, offset,
7970 offset + count - 1);
7971
7972 if (rw & WRITE) {
7973
7974
7975
7976
7977
7978 if (offset + count <= inode->i_size) {
7979 mutex_unlock(&inode->i_mutex);
7980 relock = true;
7981 }
7982 ret = btrfs_delalloc_reserve_space(inode, count);
7983 if (ret)
7984 goto out;
7985 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
7986 &BTRFS_I(inode)->runtime_flags)) {
7987 inode_dio_done(inode);
7988 flags = DIO_LOCKING | DIO_SKIP_HOLES;
7989 wakeup = false;
7990 }
7991
7992 ret = __blockdev_direct_IO(rw, iocb, inode,
7993 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
7994 iter, offset, btrfs_get_blocks_direct, NULL,
7995 btrfs_submit_direct, flags);
7996 if (rw & WRITE) {
7997 if (ret < 0 && ret != -EIOCBQUEUED)
7998 btrfs_delalloc_release_space(inode, count);
7999 else if (ret >= 0 && (size_t)ret < count)
8000 btrfs_delalloc_release_space(inode,
8001 count - (size_t)ret);
8002 else
8003 btrfs_delalloc_release_metadata(inode, 0);
8004 }
8005out:
8006 if (wakeup)
8007 inode_dio_done(inode);
8008 if (relock)
8009 mutex_lock(&inode->i_mutex);
8010
8011 return ret;
8012}
8013
8014#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
8015
8016static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
8017 __u64 start, __u64 len)
8018{
8019 int ret;
8020
8021 ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
8022 if (ret)
8023 return ret;
8024
8025 return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
8026}
8027
8028int btrfs_readpage(struct file *file, struct page *page)
8029{
8030 struct extent_io_tree *tree;
8031 tree = &BTRFS_I(page->mapping->host)->io_tree;
8032 return extent_read_full_page(tree, page, btrfs_get_extent, 0);
8033}
8034
8035static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
8036{
8037 struct extent_io_tree *tree;
8038
8039
8040 if (current->flags & PF_MEMALLOC) {
8041 redirty_page_for_writepage(wbc, page);
8042 unlock_page(page);
8043 return 0;
8044 }
8045 tree = &BTRFS_I(page->mapping->host)->io_tree;
8046 return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
8047}
8048
8049static int btrfs_writepages(struct address_space *mapping,
8050 struct writeback_control *wbc)
8051{
8052 struct extent_io_tree *tree;
8053
8054 tree = &BTRFS_I(mapping->host)->io_tree;
8055 return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
8056}
8057
8058static int
8059btrfs_readpages(struct file *file, struct address_space *mapping,
8060 struct list_head *pages, unsigned nr_pages)
8061{
8062 struct extent_io_tree *tree;
8063 tree = &BTRFS_I(mapping->host)->io_tree;
8064 return extent_readpages(tree, mapping, pages, nr_pages,
8065 btrfs_get_extent);
8066}
8067static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8068{
8069 struct extent_io_tree *tree;
8070 struct extent_map_tree *map;
8071 int ret;
8072
8073 tree = &BTRFS_I(page->mapping->host)->io_tree;
8074 map = &BTRFS_I(page->mapping->host)->extent_tree;
8075 ret = try_release_extent_mapping(map, tree, page, gfp_flags);
8076 if (ret == 1) {
8077 ClearPagePrivate(page);
8078 set_page_private(page, 0);
8079 page_cache_release(page);
8080 }
8081 return ret;
8082}
8083
8084static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
8085{
8086 if (PageWriteback(page) || PageDirty(page))
8087 return 0;
8088 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
8089}
8090
8091static void btrfs_invalidatepage(struct page *page, unsigned int offset,
8092 unsigned int length)
8093{
8094 struct inode *inode = page->mapping->host;
8095 struct extent_io_tree *tree;
8096 struct btrfs_ordered_extent *ordered;
8097 struct extent_state *cached_state = NULL;
8098 u64 page_start = page_offset(page);
8099 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
8100 int inode_evicting = inode->i_state & I_FREEING;
8101
8102
8103
8104
8105
8106
8107
8108
8109 wait_on_page_writeback(page);
8110
8111 tree = &BTRFS_I(inode)->io_tree;
8112 if (offset) {
8113 btrfs_releasepage(page, GFP_NOFS);
8114 return;
8115 }
8116
8117 if (!inode_evicting)
8118 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
8119 ordered = btrfs_lookup_ordered_extent(inode, page_start);
8120 if (ordered) {
8121
8122
8123
8124
8125 if (!inode_evicting)
8126 clear_extent_bit(tree, page_start, page_end,
8127 EXTENT_DIRTY | EXTENT_DELALLOC |
8128 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
8129 EXTENT_DEFRAG, 1, 0, &cached_state,
8130 GFP_NOFS);
8131
8132
8133
8134
8135 if (TestClearPagePrivate2(page)) {
8136 struct btrfs_ordered_inode_tree *tree;
8137 u64 new_len;
8138
8139 tree = &BTRFS_I(inode)->ordered_tree;
8140
8141 spin_lock_irq(&tree->lock);
8142 set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
8143 new_len = page_start - ordered->file_offset;
8144 if (new_len < ordered->truncated_len)
8145 ordered->truncated_len = new_len;
8146 spin_unlock_irq(&tree->lock);
8147
8148 if (btrfs_dec_test_ordered_pending(inode, &ordered,
8149 page_start,
8150 PAGE_CACHE_SIZE, 1))
8151 btrfs_finish_ordered_io(ordered);
8152 }
8153 btrfs_put_ordered_extent(ordered);
8154 if (!inode_evicting) {
8155 cached_state = NULL;
8156 lock_extent_bits(tree, page_start, page_end, 0,
8157 &cached_state);
8158 }
8159 }
8160
8161 if (!inode_evicting) {
8162 clear_extent_bit(tree, page_start, page_end,
8163 EXTENT_LOCKED | EXTENT_DIRTY |
8164 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
8165 EXTENT_DEFRAG, 1, 1,
8166 &cached_state, GFP_NOFS);
8167
8168 __btrfs_releasepage(page, GFP_NOFS);
8169 }
8170
8171 ClearPageChecked(page);
8172 if (PagePrivate(page)) {
8173 ClearPagePrivate(page);
8174 set_page_private(page, 0);
8175 page_cache_release(page);
8176 }
8177}
8178
8179
8180
8181
8182
8183
8184
8185
8186
8187
8188
8189
8190
8191
8192
8193
8194int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
8195{
8196 struct page *page = vmf->page;
8197 struct inode *inode = file_inode(vma->vm_file);
8198 struct btrfs_root *root = BTRFS_I(inode)->root;
8199 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
8200 struct btrfs_ordered_extent *ordered;
8201 struct extent_state *cached_state = NULL;
8202 char *kaddr;
8203 unsigned long zero_start;
8204 loff_t size;
8205 int ret;
8206 int reserved = 0;
8207 u64 page_start;
8208 u64 page_end;
8209
8210 sb_start_pagefault(inode->i_sb);
8211 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
8212 if (!ret) {
8213 ret = file_update_time(vma->vm_file);
8214 reserved = 1;
8215 }
8216 if (ret) {
8217 if (ret == -ENOMEM)
8218 ret = VM_FAULT_OOM;
8219 else
8220 ret = VM_FAULT_SIGBUS;
8221 if (reserved)
8222 goto out;
8223 goto out_noreserve;
8224 }
8225
8226 ret = VM_FAULT_NOPAGE;
8227again:
8228 lock_page(page);
8229 size = i_size_read(inode);
8230 page_start = page_offset(page);
8231 page_end = page_start + PAGE_CACHE_SIZE - 1;
8232
8233 if ((page->mapping != inode->i_mapping) ||
8234 (page_start >= size)) {
8235
8236 goto out_unlock;
8237 }
8238 wait_on_page_writeback(page);
8239
8240 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
8241 set_page_extent_mapped(page);
8242
8243
8244
8245
8246
8247 ordered = btrfs_lookup_ordered_extent(inode, page_start);
8248 if (ordered) {
8249 unlock_extent_cached(io_tree, page_start, page_end,
8250 &cached_state, GFP_NOFS);
8251 unlock_page(page);
8252 btrfs_start_ordered_extent(inode, ordered, 1);
8253 btrfs_put_ordered_extent(ordered);
8254 goto again;
8255 }
8256
8257
8258
8259
8260
8261
8262
8263
8264 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
8265 EXTENT_DIRTY | EXTENT_DELALLOC |
8266 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
8267 0, 0, &cached_state, GFP_NOFS);
8268
8269 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
8270 &cached_state);
8271 if (ret) {
8272 unlock_extent_cached(io_tree, page_start, page_end,
8273 &cached_state, GFP_NOFS);
8274 ret = VM_FAULT_SIGBUS;
8275 goto out_unlock;
8276 }
8277 ret = 0;
8278
8279
8280 if (page_start + PAGE_CACHE_SIZE > size)
8281 zero_start = size & ~PAGE_CACHE_MASK;
8282 else
8283 zero_start = PAGE_CACHE_SIZE;
8284
8285 if (zero_start != PAGE_CACHE_SIZE) {
8286 kaddr = kmap(page);
8287 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
8288 flush_dcache_page(page);
8289 kunmap(page);
8290 }
8291 ClearPageChecked(page);
8292 set_page_dirty(page);
8293 SetPageUptodate(page);
8294
8295 BTRFS_I(inode)->last_trans = root->fs_info->generation;
8296 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
8297 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
8298
8299 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
8300
8301out_unlock:
8302 if (!ret) {
8303 sb_end_pagefault(inode->i_sb);
8304 return VM_FAULT_LOCKED;
8305 }
8306 unlock_page(page);
8307out:
8308 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
8309out_noreserve:
8310 sb_end_pagefault(inode->i_sb);
8311 return ret;
8312}
8313
8314static int btrfs_truncate(struct inode *inode)
8315{
8316 struct btrfs_root *root = BTRFS_I(inode)->root;
8317 struct btrfs_block_rsv *rsv;
8318 int ret = 0;
8319 int err = 0;
8320 struct btrfs_trans_handle *trans;
8321 u64 mask = root->sectorsize - 1;
8322 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
8323
8324 ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
8325 (u64)-1);
8326 if (ret)
8327 return ret;
8328
8329
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340
8341
8342
8343
8344
8345
8346
8347
8348
8349
8350
8351
8352
8353
8354
8355
8356
8357
8358
8359
8360
8361
8362
8363
8364
8365 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
8366 if (!rsv)
8367 return -ENOMEM;
8368 rsv->size = min_size;
8369 rsv->failfast = 1;
8370
8371
8372
8373
8374
8375 trans = btrfs_start_transaction(root, 2);
8376 if (IS_ERR(trans)) {
8377 err = PTR_ERR(trans);
8378 goto out;
8379 }
8380
8381
8382 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
8383 min_size);
8384 BUG_ON(ret);
8385
8386
8387
8388
8389
8390
8391
8392
8393 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
8394 trans->block_rsv = rsv;
8395
8396 while (1) {
8397 ret = btrfs_truncate_inode_items(trans, root, inode,
8398 inode->i_size,
8399 BTRFS_EXTENT_DATA_KEY);
8400 if (ret != -ENOSPC) {
8401 err = ret;
8402 break;
8403 }
8404
8405 trans->block_rsv = &root->fs_info->trans_block_rsv;
8406 ret = btrfs_update_inode(trans, root, inode);
8407 if (ret) {
8408 err = ret;
8409 break;
8410 }
8411
8412 btrfs_end_transaction(trans, root);
8413 btrfs_btree_balance_dirty(root);
8414
8415 trans = btrfs_start_transaction(root, 2);
8416 if (IS_ERR(trans)) {
8417 ret = err = PTR_ERR(trans);
8418 trans = NULL;
8419 break;
8420 }
8421
8422 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
8423 rsv, min_size);
8424 BUG_ON(ret);
8425 trans->block_rsv = rsv;
8426 }
8427
8428 if (ret == 0 && inode->i_nlink > 0) {
8429 trans->block_rsv = root->orphan_block_rsv;
8430 ret = btrfs_orphan_del(trans, inode);
8431 if (ret)
8432 err = ret;
8433 }
8434
8435 if (trans) {
8436 trans->block_rsv = &root->fs_info->trans_block_rsv;
8437 ret = btrfs_update_inode(trans, root, inode);
8438 if (ret && !err)
8439 err = ret;
8440
8441 ret = btrfs_end_transaction(trans, root);
8442 btrfs_btree_balance_dirty(root);
8443 }
8444
8445out:
8446 btrfs_free_block_rsv(root, rsv);
8447
8448 if (ret && !err)
8449 err = ret;
8450
8451 return err;
8452}
8453
8454
8455
8456
8457int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
8458 struct btrfs_root *new_root,
8459 struct btrfs_root *parent_root,
8460 u64 new_dirid)
8461{
8462 struct inode *inode;
8463 int err;
8464 u64 index = 0;
8465
8466 inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
8467 new_dirid, new_dirid,
8468 S_IFDIR | (~current_umask() & S_IRWXUGO),
8469 &index);
8470 if (IS_ERR(inode))
8471 return PTR_ERR(inode);
8472 inode->i_op = &btrfs_dir_inode_operations;
8473 inode->i_fop = &btrfs_dir_file_operations;
8474
8475 set_nlink(inode, 1);
8476 btrfs_i_size_write(inode, 0);
8477 unlock_new_inode(inode);
8478
8479 err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
8480 if (err)
8481 btrfs_err(new_root->fs_info,
8482 "error inheriting subvolume %llu properties: %d",
8483 new_root->root_key.objectid, err);
8484
8485 err = btrfs_update_inode(trans, new_root, inode);
8486
8487 iput(inode);
8488 return err;
8489}
8490
8491struct inode *btrfs_alloc_inode(struct super_block *sb)
8492{
8493 struct btrfs_inode *ei;
8494 struct inode *inode;
8495
8496 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
8497 if (!ei)
8498 return NULL;
8499
8500 ei->root = NULL;
8501 ei->generation = 0;
8502 ei->last_trans = 0;
8503 ei->last_sub_trans = 0;
8504 ei->logged_trans = 0;
8505 ei->delalloc_bytes = 0;
8506 ei->defrag_bytes = 0;
8507 ei->disk_i_size = 0;
8508 ei->flags = 0;
8509 ei->csum_bytes = 0;
8510 ei->index_cnt = (u64)-1;
8511 ei->dir_index = 0;
8512 ei->last_unlink_trans = 0;
8513 ei->last_log_commit = 0;
8514
8515 spin_lock_init(&ei->lock);
8516 ei->outstanding_extents = 0;
8517 ei->reserved_extents = 0;
8518
8519 ei->runtime_flags = 0;
8520 ei->force_compress = BTRFS_COMPRESS_NONE;
8521
8522 ei->delayed_node = NULL;
8523
8524 inode = &ei->vfs_inode;
8525 extent_map_tree_init(&ei->extent_tree);
8526 extent_io_tree_init(&ei->io_tree, &inode->i_data);
8527 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
8528 ei->io_tree.track_uptodate = 1;
8529 ei->io_failure_tree.track_uptodate = 1;
8530 atomic_set(&ei->sync_writers, 0);
8531 mutex_init(&ei->log_mutex);
8532 mutex_init(&ei->delalloc_mutex);
8533 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8534 INIT_LIST_HEAD(&ei->delalloc_inodes);
8535 RB_CLEAR_NODE(&ei->rb_node);
8536
8537 return inode;
8538}
8539
8540#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
8541void btrfs_test_destroy_inode(struct inode *inode)
8542{
8543 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
8544 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8545}
8546#endif
8547
8548static void btrfs_i_callback(struct rcu_head *head)
8549{
8550 struct inode *inode = container_of(head, struct inode, i_rcu);
8551 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
8552}
8553
8554void btrfs_destroy_inode(struct inode *inode)
8555{
8556 struct btrfs_ordered_extent *ordered;
8557 struct btrfs_root *root = BTRFS_I(inode)->root;
8558
8559 WARN_ON(!hlist_empty(&inode->i_dentry));
8560 WARN_ON(inode->i_data.nrpages);
8561 WARN_ON(BTRFS_I(inode)->outstanding_extents);
8562 WARN_ON(BTRFS_I(inode)->reserved_extents);
8563 WARN_ON(BTRFS_I(inode)->delalloc_bytes);
8564 WARN_ON(BTRFS_I(inode)->csum_bytes);
8565 WARN_ON(BTRFS_I(inode)->defrag_bytes);
8566
8567
8568
8569
8570
8571
8572 if (!root)
8573 goto free;
8574
8575 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
8576 &BTRFS_I(inode)->runtime_flags)) {
8577 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
8578 btrfs_ino(inode));
8579 atomic_dec(&root->orphan_inodes);
8580 }
8581
8582 while (1) {
8583 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
8584 if (!ordered)
8585 break;
8586 else {
8587 btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
8588 ordered->file_offset, ordered->len);
8589 btrfs_remove_ordered_extent(inode, ordered);
8590 btrfs_put_ordered_extent(ordered);
8591 btrfs_put_ordered_extent(ordered);
8592 }
8593 }
8594 inode_tree_del(inode);
8595 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
8596free:
8597 call_rcu(&inode->i_rcu, btrfs_i_callback);
8598}
8599
8600int btrfs_drop_inode(struct inode *inode)
8601{
8602 struct btrfs_root *root = BTRFS_I(inode)->root;
8603
8604 if (root == NULL)
8605 return 1;
8606
8607
8608 if (btrfs_root_refs(&root->root_item) == 0)
8609 return 1;
8610 else
8611 return generic_drop_inode(inode);
8612}
8613
8614static void init_once(void *foo)
8615{
8616 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
8617
8618 inode_init_once(&ei->vfs_inode);
8619}
8620
8621void btrfs_destroy_cachep(void)
8622{
8623
8624
8625
8626
8627 rcu_barrier();
8628 if (btrfs_inode_cachep)
8629 kmem_cache_destroy(btrfs_inode_cachep);
8630 if (btrfs_trans_handle_cachep)
8631 kmem_cache_destroy(btrfs_trans_handle_cachep);
8632 if (btrfs_transaction_cachep)
8633 kmem_cache_destroy(btrfs_transaction_cachep);
8634 if (btrfs_path_cachep)
8635 kmem_cache_destroy(btrfs_path_cachep);
8636 if (btrfs_free_space_cachep)
8637 kmem_cache_destroy(btrfs_free_space_cachep);
8638 if (btrfs_delalloc_work_cachep)
8639 kmem_cache_destroy(btrfs_delalloc_work_cachep);
8640}
8641
8642int btrfs_init_cachep(void)
8643{
8644 btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
8645 sizeof(struct btrfs_inode), 0,
8646 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
8647 if (!btrfs_inode_cachep)
8648 goto fail;
8649
8650 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle",
8651 sizeof(struct btrfs_trans_handle), 0,
8652 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
8653 if (!btrfs_trans_handle_cachep)
8654 goto fail;
8655
8656 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction",
8657 sizeof(struct btrfs_transaction), 0,
8658 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
8659 if (!btrfs_transaction_cachep)
8660 goto fail;
8661
8662 btrfs_path_cachep = kmem_cache_create("btrfs_path",
8663 sizeof(struct btrfs_path), 0,
8664 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
8665 if (!btrfs_path_cachep)
8666 goto fail;
8667
8668 btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space",
8669 sizeof(struct btrfs_free_space), 0,
8670 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
8671 if (!btrfs_free_space_cachep)
8672 goto fail;
8673
8674 btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
8675 sizeof(struct btrfs_delalloc_work), 0,
8676 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
8677 NULL);
8678 if (!btrfs_delalloc_work_cachep)
8679 goto fail;
8680
8681 return 0;
8682fail:
8683 btrfs_destroy_cachep();
8684 return -ENOMEM;
8685}
8686
8687static int btrfs_getattr(struct vfsmount *mnt,
8688 struct dentry *dentry, struct kstat *stat)
8689{
8690 u64 delalloc_bytes;
8691 struct inode *inode = dentry->d_inode;
8692 u32 blocksize = inode->i_sb->s_blocksize;
8693
8694 generic_fillattr(inode, stat);
8695 stat->dev = BTRFS_I(inode)->root->anon_dev;
8696 stat->blksize = PAGE_CACHE_SIZE;
8697
8698 spin_lock(&BTRFS_I(inode)->lock);
8699 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
8700 spin_unlock(&BTRFS_I(inode)->lock);
8701 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
8702 ALIGN(delalloc_bytes, blocksize)) >> 9;
8703 return 0;
8704}
8705
8706static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8707 struct inode *new_dir, struct dentry *new_dentry)
8708{
8709 struct btrfs_trans_handle *trans;
8710 struct btrfs_root *root = BTRFS_I(old_dir)->root;
8711 struct btrfs_root *dest = BTRFS_I(new_dir)->root;
8712 struct inode *new_inode = new_dentry->d_inode;
8713 struct inode *old_inode = old_dentry->d_inode;
8714 struct timespec ctime = CURRENT_TIME;
8715 u64 index = 0;
8716 u64 root_objectid;
8717 int ret;
8718 u64 old_ino = btrfs_ino(old_inode);
8719
8720 if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
8721 return -EPERM;
8722
8723
8724 if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
8725 return -EXDEV;
8726
8727 if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
8728 (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
8729 return -ENOTEMPTY;
8730
8731 if (S_ISDIR(old_inode->i_mode) && new_inode &&
8732 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
8733 return -ENOTEMPTY;
8734
8735
8736
8737 ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
8738 new_dentry->d_name.name,
8739 new_dentry->d_name.len);
8740
8741 if (ret) {
8742 if (ret == -EEXIST) {
8743
8744
8745 if (WARN_ON(!new_inode)) {
8746 return ret;
8747 }
8748 } else {
8749
8750 return ret;
8751 }
8752 }
8753 ret = 0;
8754
8755
8756
8757
8758
8759 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
8760 filemap_flush(old_inode->i_mapping);
8761
8762
8763 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
8764 down_read(&root->fs_info->subvol_sem);
8765
8766
8767
8768
8769
8770
8771
8772
8773 trans = btrfs_start_transaction(root, 11);
8774 if (IS_ERR(trans)) {
8775 ret = PTR_ERR(trans);
8776 goto out_notrans;
8777 }
8778
8779 if (dest != root)
8780 btrfs_record_root_in_trans(trans, dest);
8781
8782 ret = btrfs_set_inode_index(new_dir, &index);
8783 if (ret)
8784 goto out_fail;
8785
8786 BTRFS_I(old_inode)->dir_index = 0ULL;
8787 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
8788
8789 btrfs_set_log_full_commit(root->fs_info, trans);
8790 } else {
8791 ret = btrfs_insert_inode_ref(trans, dest,
8792 new_dentry->d_name.name,
8793 new_dentry->d_name.len,
8794 old_ino,
8795 btrfs_ino(new_dir), index);
8796 if (ret)
8797 goto out_fail;
8798
8799
8800
8801
8802
8803
8804
8805 btrfs_pin_log_trans(root);
8806 }
8807
8808 inode_inc_iversion(old_dir);
8809 inode_inc_iversion(new_dir);
8810 inode_inc_iversion(old_inode);
8811 old_dir->i_ctime = old_dir->i_mtime = ctime;
8812 new_dir->i_ctime = new_dir->i_mtime = ctime;
8813 old_inode->i_ctime = ctime;
8814
8815 if (old_dentry->d_parent != new_dentry->d_parent)
8816 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
8817
8818 if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
8819 root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
8820 ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
8821 old_dentry->d_name.name,
8822 old_dentry->d_name.len);
8823 } else {
8824 ret = __btrfs_unlink_inode(trans, root, old_dir,
8825 old_dentry->d_inode,
8826 old_dentry->d_name.name,
8827 old_dentry->d_name.len);
8828 if (!ret)
8829 ret = btrfs_update_inode(trans, root, old_inode);
8830 }
8831 if (ret) {
8832 btrfs_abort_transaction(trans, root, ret);
8833 goto out_fail;
8834 }
8835
8836 if (new_inode) {
8837 inode_inc_iversion(new_inode);
8838 new_inode->i_ctime = CURRENT_TIME;
8839 if (unlikely(btrfs_ino(new_inode) ==
8840 BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
8841 root_objectid = BTRFS_I(new_inode)->location.objectid;
8842 ret = btrfs_unlink_subvol(trans, dest, new_dir,
8843 root_objectid,
8844 new_dentry->d_name.name,
8845 new_dentry->d_name.len);
8846 BUG_ON(new_inode->i_nlink == 0);
8847 } else {
8848 ret = btrfs_unlink_inode(trans, dest, new_dir,
8849 new_dentry->d_inode,
8850 new_dentry->d_name.name,
8851 new_dentry->d_name.len);
8852 }
8853 if (!ret && new_inode->i_nlink == 0)
8854 ret = btrfs_orphan_add(trans, new_dentry->d_inode);
8855 if (ret) {
8856 btrfs_abort_transaction(trans, root, ret);
8857 goto out_fail;
8858 }
8859 }
8860
8861 ret = btrfs_add_link(trans, new_dir, old_inode,
8862 new_dentry->d_name.name,
8863 new_dentry->d_name.len, 0, index);
8864 if (ret) {
8865 btrfs_abort_transaction(trans, root, ret);
8866 goto out_fail;
8867 }
8868
8869 if (old_inode->i_nlink == 1)
8870 BTRFS_I(old_inode)->dir_index = index;
8871
8872 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
8873 struct dentry *parent = new_dentry->d_parent;
8874 btrfs_log_new_name(trans, old_inode, old_dir, parent);
8875 btrfs_end_log_trans(root);
8876 }
8877out_fail:
8878 btrfs_end_transaction(trans, root);
8879out_notrans:
8880 if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
8881 up_read(&root->fs_info->subvol_sem);
8882
8883 return ret;
8884}
8885
8886static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
8887 struct inode *new_dir, struct dentry *new_dentry,
8888 unsigned int flags)
8889{
8890 if (flags & ~RENAME_NOREPLACE)
8891 return -EINVAL;
8892
8893 return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
8894}
8895
8896static void btrfs_run_delalloc_work(struct btrfs_work *work)
8897{
8898 struct btrfs_delalloc_work *delalloc_work;
8899 struct inode *inode;
8900
8901 delalloc_work = container_of(work, struct btrfs_delalloc_work,
8902 work);
8903 inode = delalloc_work->inode;
8904 if (delalloc_work->wait) {
8905 btrfs_wait_ordered_range(inode, 0, (u64)-1);
8906 } else {
8907 filemap_flush(inode->i_mapping);
8908 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
8909 &BTRFS_I(inode)->runtime_flags))
8910 filemap_flush(inode->i_mapping);
8911 }
8912
8913 if (delalloc_work->delay_iput)
8914 btrfs_add_delayed_iput(inode);
8915 else
8916 iput(inode);
8917 complete(&delalloc_work->completion);
8918}
8919
8920struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
8921 int wait, int delay_iput)
8922{
8923 struct btrfs_delalloc_work *work;
8924
8925 work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
8926 if (!work)
8927 return NULL;
8928
8929 init_completion(&work->completion);
8930 INIT_LIST_HEAD(&work->list);
8931 work->inode = inode;
8932 work->wait = wait;
8933 work->delay_iput = delay_iput;
8934 WARN_ON_ONCE(!inode);
8935 btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
8936 btrfs_run_delalloc_work, NULL, NULL);
8937
8938 return work;
8939}
8940
8941void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
8942{
8943 wait_for_completion(&work->completion);
8944 kmem_cache_free(btrfs_delalloc_work_cachep, work);
8945}
8946
8947
8948
8949
8950
8951static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
8952 int nr)
8953{
8954 struct btrfs_inode *binode;
8955 struct inode *inode;
8956 struct btrfs_delalloc_work *work, *next;
8957 struct list_head works;
8958 struct list_head splice;
8959 int ret = 0;
8960
8961 INIT_LIST_HEAD(&works);
8962 INIT_LIST_HEAD(&splice);
8963
8964 mutex_lock(&root->delalloc_mutex);
8965 spin_lock(&root->delalloc_lock);
8966 list_splice_init(&root->delalloc_inodes, &splice);
8967 while (!list_empty(&splice)) {
8968 binode = list_entry(splice.next, struct btrfs_inode,
8969 delalloc_inodes);
8970
8971 list_move_tail(&binode->delalloc_inodes,
8972 &root->delalloc_inodes);
8973 inode = igrab(&binode->vfs_inode);
8974 if (!inode) {
8975 cond_resched_lock(&root->delalloc_lock);
8976 continue;
8977 }
8978 spin_unlock(&root->delalloc_lock);
8979
8980 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
8981 if (!work) {
8982 if (delay_iput)
8983 btrfs_add_delayed_iput(inode);
8984 else
8985 iput(inode);
8986 ret = -ENOMEM;
8987 goto out;
8988 }
8989 list_add_tail(&work->list, &works);
8990 btrfs_queue_work(root->fs_info->flush_workers,
8991 &work->work);
8992 ret++;
8993 if (nr != -1 && ret >= nr)
8994 goto out;
8995 cond_resched();
8996 spin_lock(&root->delalloc_lock);
8997 }
8998 spin_unlock(&root->delalloc_lock);
8999
9000out:
9001 list_for_each_entry_safe(work, next, &works, list) {
9002 list_del_init(&work->list);
9003 btrfs_wait_and_free_delalloc_work(work);
9004 }
9005
9006 if (!list_empty_careful(&splice)) {
9007 spin_lock(&root->delalloc_lock);
9008 list_splice_tail(&splice, &root->delalloc_inodes);
9009 spin_unlock(&root->delalloc_lock);
9010 }
9011 mutex_unlock(&root->delalloc_mutex);
9012 return ret;
9013}
9014
9015int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
9016{
9017 int ret;
9018
9019 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
9020 return -EROFS;
9021
9022 ret = __start_delalloc_inodes(root, delay_iput, -1);
9023 if (ret > 0)
9024 ret = 0;
9025
9026
9027
9028
9029
9030 atomic_inc(&root->fs_info->async_submit_draining);
9031 while (atomic_read(&root->fs_info->nr_async_submits) ||
9032 atomic_read(&root->fs_info->async_delalloc_pages)) {
9033 wait_event(root->fs_info->async_submit_wait,
9034 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
9035 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
9036 }
9037 atomic_dec(&root->fs_info->async_submit_draining);
9038 return ret;
9039}
9040
9041int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
9042 int nr)
9043{
9044 struct btrfs_root *root;
9045 struct list_head splice;
9046 int ret;
9047
9048 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
9049 return -EROFS;
9050
9051 INIT_LIST_HEAD(&splice);
9052
9053 mutex_lock(&fs_info->delalloc_root_mutex);
9054 spin_lock(&fs_info->delalloc_root_lock);
9055 list_splice_init(&fs_info->delalloc_roots, &splice);
9056 while (!list_empty(&splice) && nr) {
9057 root = list_first_entry(&splice, struct btrfs_root,
9058 delalloc_root);
9059 root = btrfs_grab_fs_root(root);
9060 BUG_ON(!root);
9061 list_move_tail(&root->delalloc_root,
9062 &fs_info->delalloc_roots);
9063 spin_unlock(&fs_info->delalloc_root_lock);
9064
9065 ret = __start_delalloc_inodes(root, delay_iput, nr);
9066 btrfs_put_fs_root(root);
9067 if (ret < 0)
9068 goto out;
9069
9070 if (nr != -1) {
9071 nr -= ret;
9072 WARN_ON(nr < 0);
9073 }
9074 spin_lock(&fs_info->delalloc_root_lock);
9075 }
9076 spin_unlock(&fs_info->delalloc_root_lock);
9077
9078 ret = 0;
9079 atomic_inc(&fs_info->async_submit_draining);
9080 while (atomic_read(&fs_info->nr_async_submits) ||
9081 atomic_read(&fs_info->async_delalloc_pages)) {
9082 wait_event(fs_info->async_submit_wait,
9083 (atomic_read(&fs_info->nr_async_submits) == 0 &&
9084 atomic_read(&fs_info->async_delalloc_pages) == 0));
9085 }
9086 atomic_dec(&fs_info->async_submit_draining);
9087out:
9088 if (!list_empty_careful(&splice)) {
9089 spin_lock(&fs_info->delalloc_root_lock);
9090 list_splice_tail(&splice, &fs_info->delalloc_roots);
9091 spin_unlock(&fs_info->delalloc_root_lock);
9092 }
9093 mutex_unlock(&fs_info->delalloc_root_mutex);
9094 return ret;
9095}
9096
9097static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
9098 const char *symname)
9099{
9100 struct btrfs_trans_handle *trans;
9101 struct btrfs_root *root = BTRFS_I(dir)->root;
9102 struct btrfs_path *path;
9103 struct btrfs_key key;
9104 struct inode *inode = NULL;
9105 int err;
9106 int drop_inode = 0;
9107 u64 objectid;
9108 u64 index = 0;
9109 int name_len;
9110 int datasize;
9111 unsigned long ptr;
9112 struct btrfs_file_extent_item *ei;
9113 struct extent_buffer *leaf;
9114
9115 name_len = strlen(symname);
9116 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
9117 return -ENAMETOOLONG;
9118
9119
9120
9121
9122
9123
9124 trans = btrfs_start_transaction(root, 5);
9125 if (IS_ERR(trans))
9126 return PTR_ERR(trans);
9127
9128 err = btrfs_find_free_ino(root, &objectid);
9129 if (err)
9130 goto out_unlock;
9131
9132 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
9133 dentry->d_name.len, btrfs_ino(dir), objectid,
9134 S_IFLNK|S_IRWXUGO, &index);
9135 if (IS_ERR(inode)) {
9136 err = PTR_ERR(inode);
9137 goto out_unlock;
9138 }
9139
9140
9141
9142
9143
9144
9145
9146 inode->i_fop = &btrfs_file_operations;
9147 inode->i_op = &btrfs_file_inode_operations;
9148 inode->i_mapping->a_ops = &btrfs_aops;
9149 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9150 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9151
9152 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
9153 if (err)
9154 goto out_unlock_inode;
9155
9156 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
9157 if (err)
9158 goto out_unlock_inode;
9159
9160 path = btrfs_alloc_path();
9161 if (!path) {
9162 err = -ENOMEM;
9163 goto out_unlock_inode;
9164 }
9165 key.objectid = btrfs_ino(inode);
9166 key.offset = 0;
9167 key.type = BTRFS_EXTENT_DATA_KEY;
9168 datasize = btrfs_file_extent_calc_inline_size(name_len);
9169 err = btrfs_insert_empty_item(trans, root, path, &key,
9170 datasize);
9171 if (err) {
9172 btrfs_free_path(path);
9173 goto out_unlock_inode;
9174 }
9175 leaf = path->nodes[0];
9176 ei = btrfs_item_ptr(leaf, path->slots[0],
9177 struct btrfs_file_extent_item);
9178 btrfs_set_file_extent_generation(leaf, ei, trans->transid);
9179 btrfs_set_file_extent_type(leaf, ei,
9180 BTRFS_FILE_EXTENT_INLINE);
9181 btrfs_set_file_extent_encryption(leaf, ei, 0);
9182 btrfs_set_file_extent_compression(leaf, ei, 0);
9183 btrfs_set_file_extent_other_encoding(leaf, ei, 0);
9184 btrfs_set_file_extent_ram_bytes(leaf, ei, name_len);
9185
9186 ptr = btrfs_file_extent_inline_start(ei);
9187 write_extent_buffer(leaf, symname, ptr, name_len);
9188 btrfs_mark_buffer_dirty(leaf);
9189 btrfs_free_path(path);
9190
9191 inode->i_op = &btrfs_symlink_inode_operations;
9192 inode->i_mapping->a_ops = &btrfs_symlink_aops;
9193 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9194 inode_set_bytes(inode, name_len);
9195 btrfs_i_size_write(inode, name_len);
9196 err = btrfs_update_inode(trans, root, inode);
9197 if (err) {
9198 drop_inode = 1;
9199 goto out_unlock_inode;
9200 }
9201
9202 unlock_new_inode(inode);
9203 d_instantiate(dentry, inode);
9204
9205out_unlock:
9206 btrfs_end_transaction(trans, root);
9207 if (drop_inode) {
9208 inode_dec_link_count(inode);
9209 iput(inode);
9210 }
9211 btrfs_btree_balance_dirty(root);
9212 return err;
9213
9214out_unlock_inode:
9215 drop_inode = 1;
9216 unlock_new_inode(inode);
9217 goto out_unlock;
9218}
9219
9220static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
9221 u64 start, u64 num_bytes, u64 min_size,
9222 loff_t actual_len, u64 *alloc_hint,
9223 struct btrfs_trans_handle *trans)
9224{
9225 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
9226 struct extent_map *em;
9227 struct btrfs_root *root = BTRFS_I(inode)->root;
9228 struct btrfs_key ins;
9229 u64 cur_offset = start;
9230 u64 i_size;
9231 u64 cur_bytes;
9232 int ret = 0;
9233 bool own_trans = true;
9234
9235 if (trans)
9236 own_trans = false;
9237 while (num_bytes > 0) {
9238 if (own_trans) {
9239 trans = btrfs_start_transaction(root, 3);
9240 if (IS_ERR(trans)) {
9241 ret = PTR_ERR(trans);
9242 break;
9243 }
9244 }
9245
9246 cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
9247 cur_bytes = max(cur_bytes, min_size);
9248 ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
9249 *alloc_hint, &ins, 1, 0);
9250 if (ret) {
9251 if (own_trans)
9252 btrfs_end_transaction(trans, root);
9253 break;
9254 }
9255
9256 ret = insert_reserved_file_extent(trans, inode,
9257 cur_offset, ins.objectid,
9258 ins.offset, ins.offset,
9259 ins.offset, 0, 0, 0,
9260 BTRFS_FILE_EXTENT_PREALLOC);
9261 if (ret) {
9262 btrfs_free_reserved_extent(root, ins.objectid,
9263 ins.offset, 0);
9264 btrfs_abort_transaction(trans, root, ret);
9265 if (own_trans)
9266 btrfs_end_transaction(trans, root);
9267 break;
9268 }
9269 btrfs_drop_extent_cache(inode, cur_offset,
9270 cur_offset + ins.offset -1, 0);
9271
9272 em = alloc_extent_map();
9273 if (!em) {
9274 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
9275 &BTRFS_I(inode)->runtime_flags);
9276 goto next;
9277 }
9278
9279 em->start = cur_offset;
9280 em->orig_start = cur_offset;
9281 em->len = ins.offset;
9282 em->block_start = ins.objectid;
9283 em->block_len = ins.offset;
9284 em->orig_block_len = ins.offset;
9285 em->ram_bytes = ins.offset;
9286 em->bdev = root->fs_info->fs_devices->latest_bdev;
9287 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
9288 em->generation = trans->transid;
9289
9290 while (1) {
9291 write_lock(&em_tree->lock);
9292 ret = add_extent_mapping(em_tree, em, 1);
9293 write_unlock(&em_tree->lock);
9294 if (ret != -EEXIST)
9295 break;
9296 btrfs_drop_extent_cache(inode, cur_offset,
9297 cur_offset + ins.offset - 1,
9298 0);
9299 }
9300 free_extent_map(em);
9301next:
9302 num_bytes -= ins.offset;
9303 cur_offset += ins.offset;
9304 *alloc_hint = ins.objectid + ins.offset;
9305
9306 inode_inc_iversion(inode);
9307 inode->i_ctime = CURRENT_TIME;
9308 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
9309 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
9310 (actual_len > inode->i_size) &&
9311 (cur_offset > inode->i_size)) {
9312 if (cur_offset > actual_len)
9313 i_size = actual_len;
9314 else
9315 i_size = cur_offset;
9316 i_size_write(inode, i_size);
9317 btrfs_ordered_update_i_size(inode, i_size, NULL);
9318 }
9319
9320 ret = btrfs_update_inode(trans, root, inode);
9321
9322 if (ret) {
9323 btrfs_abort_transaction(trans, root, ret);
9324 if (own_trans)
9325 btrfs_end_transaction(trans, root);
9326 break;
9327 }
9328
9329 if (own_trans)
9330 btrfs_end_transaction(trans, root);
9331 }
9332 return ret;
9333}
9334
9335int btrfs_prealloc_file_range(struct inode *inode, int mode,
9336 u64 start, u64 num_bytes, u64 min_size,
9337 loff_t actual_len, u64 *alloc_hint)
9338{
9339 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
9340 min_size, actual_len, alloc_hint,
9341 NULL);
9342}
9343
9344int btrfs_prealloc_file_range_trans(struct inode *inode,
9345 struct btrfs_trans_handle *trans, int mode,
9346 u64 start, u64 num_bytes, u64 min_size,
9347 loff_t actual_len, u64 *alloc_hint)
9348{
9349 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
9350 min_size, actual_len, alloc_hint, trans);
9351}
9352
9353static int btrfs_set_page_dirty(struct page *page)
9354{
9355 return __set_page_dirty_nobuffers(page);
9356}
9357
9358static int btrfs_permission(struct inode *inode, int mask)
9359{
9360 struct btrfs_root *root = BTRFS_I(inode)->root;
9361 umode_t mode = inode->i_mode;
9362
9363 if (mask & MAY_WRITE &&
9364 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
9365 if (btrfs_root_readonly(root))
9366 return -EROFS;
9367 if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
9368 return -EACCES;
9369 }
9370 return generic_permission(inode, mask);
9371}
9372
9373static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
9374{
9375 struct btrfs_trans_handle *trans;
9376 struct btrfs_root *root = BTRFS_I(dir)->root;
9377 struct inode *inode = NULL;
9378 u64 objectid;
9379 u64 index;
9380 int ret = 0;
9381
9382
9383
9384
9385 trans = btrfs_start_transaction(root, 5);
9386 if (IS_ERR(trans))
9387 return PTR_ERR(trans);
9388
9389 ret = btrfs_find_free_ino(root, &objectid);
9390 if (ret)
9391 goto out;
9392
9393 inode = btrfs_new_inode(trans, root, dir, NULL, 0,
9394 btrfs_ino(dir), objectid, mode, &index);
9395 if (IS_ERR(inode)) {
9396 ret = PTR_ERR(inode);
9397 inode = NULL;
9398 goto out;
9399 }
9400
9401 inode->i_fop = &btrfs_file_operations;
9402 inode->i_op = &btrfs_file_inode_operations;
9403
9404 inode->i_mapping->a_ops = &btrfs_aops;
9405 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
9406 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
9407
9408 ret = btrfs_init_inode_security(trans, inode, dir, NULL);
9409 if (ret)
9410 goto out_inode;
9411
9412 ret = btrfs_update_inode(trans, root, inode);
9413 if (ret)
9414 goto out_inode;
9415 ret = btrfs_orphan_add(trans, inode);
9416 if (ret)
9417 goto out_inode;
9418
9419
9420
9421
9422
9423
9424
9425
9426 set_nlink(inode, 1);
9427 unlock_new_inode(inode);
9428 d_tmpfile(dentry, inode);
9429 mark_inode_dirty(inode);
9430
9431out:
9432 btrfs_end_transaction(trans, root);
9433 if (ret)
9434 iput(inode);
9435 btrfs_balance_delayed_items(root);
9436 btrfs_btree_balance_dirty(root);
9437 return ret;
9438
9439out_inode:
9440 unlock_new_inode(inode);
9441 goto out;
9442
9443}
9444
9445static const struct inode_operations btrfs_dir_inode_operations = {
9446 .getattr = btrfs_getattr,
9447 .lookup = btrfs_lookup,
9448 .create = btrfs_create,
9449 .unlink = btrfs_unlink,
9450 .link = btrfs_link,
9451 .mkdir = btrfs_mkdir,
9452 .rmdir = btrfs_rmdir,
9453 .rename2 = btrfs_rename2,
9454 .symlink = btrfs_symlink,
9455 .setattr = btrfs_setattr,
9456 .mknod = btrfs_mknod,
9457 .setxattr = btrfs_setxattr,
9458 .getxattr = btrfs_getxattr,
9459 .listxattr = btrfs_listxattr,
9460 .removexattr = btrfs_removexattr,
9461 .permission = btrfs_permission,
9462 .get_acl = btrfs_get_acl,
9463 .set_acl = btrfs_set_acl,
9464 .update_time = btrfs_update_time,
9465 .tmpfile = btrfs_tmpfile,
9466};
9467static const struct inode_operations btrfs_dir_ro_inode_operations = {
9468 .lookup = btrfs_lookup,
9469 .permission = btrfs_permission,
9470 .get_acl = btrfs_get_acl,
9471 .set_acl = btrfs_set_acl,
9472 .update_time = btrfs_update_time,
9473};
9474
9475static const struct file_operations btrfs_dir_file_operations = {
9476 .llseek = generic_file_llseek,
9477 .read = generic_read_dir,
9478 .iterate = btrfs_real_readdir,
9479 .unlocked_ioctl = btrfs_ioctl,
9480#ifdef CONFIG_COMPAT
9481 .compat_ioctl = btrfs_ioctl,
9482#endif
9483 .release = btrfs_release_file,
9484 .fsync = btrfs_sync_file,
9485};
9486
9487static struct extent_io_ops btrfs_extent_io_ops = {
9488 .fill_delalloc = run_delalloc_range,
9489 .submit_bio_hook = btrfs_submit_bio_hook,
9490 .merge_bio_hook = btrfs_merge_bio_hook,
9491 .readpage_end_io_hook = btrfs_readpage_end_io_hook,
9492 .writepage_end_io_hook = btrfs_writepage_end_io_hook,
9493 .writepage_start_hook = btrfs_writepage_start_hook,
9494 .set_bit_hook = btrfs_set_bit_hook,
9495 .clear_bit_hook = btrfs_clear_bit_hook,
9496 .merge_extent_hook = btrfs_merge_extent_hook,
9497 .split_extent_hook = btrfs_split_extent_hook,
9498};
9499
9500
9501
9502
9503
9504
9505
9506
9507
9508
9509
9510
9511
9512static const struct address_space_operations btrfs_aops = {
9513 .readpage = btrfs_readpage,
9514 .writepage = btrfs_writepage,
9515 .writepages = btrfs_writepages,
9516 .readpages = btrfs_readpages,
9517 .direct_IO = btrfs_direct_IO,
9518 .invalidatepage = btrfs_invalidatepage,
9519 .releasepage = btrfs_releasepage,
9520 .set_page_dirty = btrfs_set_page_dirty,
9521 .error_remove_page = generic_error_remove_page,
9522};
9523
9524static const struct address_space_operations btrfs_symlink_aops = {
9525 .readpage = btrfs_readpage,
9526 .writepage = btrfs_writepage,
9527 .invalidatepage = btrfs_invalidatepage,
9528 .releasepage = btrfs_releasepage,
9529};
9530
9531static const struct inode_operations btrfs_file_inode_operations = {
9532 .getattr = btrfs_getattr,
9533 .setattr = btrfs_setattr,
9534 .setxattr = btrfs_setxattr,
9535 .getxattr = btrfs_getxattr,
9536 .listxattr = btrfs_listxattr,
9537 .removexattr = btrfs_removexattr,
9538 .permission = btrfs_permission,
9539 .fiemap = btrfs_fiemap,
9540 .get_acl = btrfs_get_acl,
9541 .set_acl = btrfs_set_acl,
9542 .update_time = btrfs_update_time,
9543};
9544static const struct inode_operations btrfs_special_inode_operations = {
9545 .getattr = btrfs_getattr,
9546 .setattr = btrfs_setattr,
9547 .permission = btrfs_permission,
9548 .setxattr = btrfs_setxattr,
9549 .getxattr = btrfs_getxattr,
9550 .listxattr = btrfs_listxattr,
9551 .removexattr = btrfs_removexattr,
9552 .get_acl = btrfs_get_acl,
9553 .set_acl = btrfs_set_acl,
9554 .update_time = btrfs_update_time,
9555};
9556static const struct inode_operations btrfs_symlink_inode_operations = {
9557 .readlink = generic_readlink,
9558 .follow_link = page_follow_link_light,
9559 .put_link = page_put_link,
9560 .getattr = btrfs_getattr,
9561 .setattr = btrfs_setattr,
9562 .permission = btrfs_permission,
9563 .setxattr = btrfs_setxattr,
9564 .getxattr = btrfs_getxattr,
9565 .listxattr = btrfs_listxattr,
9566 .removexattr = btrfs_removexattr,
9567 .update_time = btrfs_update_time,
9568};
9569
9570const struct dentry_operations btrfs_dentry_operations = {
9571 .d_delete = btrfs_dentry_delete,
9572 .d_release = btrfs_dentry_release,
9573};
9574