1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/backing-dev.h>
26#include <linux/mpage.h>
27#include <linux/falloc.h>
28#include <linux/swap.h>
29#include <linux/writeback.h>
30#include <linux/compat.h>
31#include <linux/slab.h>
32#include <linux/btrfs.h>
33#include <linux/uio.h>
34#include <linux/iversion.h>
35#include "ctree.h"
36#include "disk-io.h"
37#include "transaction.h"
38#include "btrfs_inode.h"
39#include "print-tree.h"
40#include "tree-log.h"
41#include "locking.h"
42#include "volumes.h"
43#include "qgroup.h"
44#include "compression.h"
45
46static struct kmem_cache *btrfs_inode_defrag_cachep;
47
48
49
50
51
52struct inode_defrag {
53 struct rb_node rb_node;
54
55 u64 ino;
56
57
58
59
60 u64 transid;
61
62
63 u64 root;
64
65
66 u64 last_offset;
67
68
69 int cycled;
70};
71
72static int __compare_inode_defrag(struct inode_defrag *defrag1,
73 struct inode_defrag *defrag2)
74{
75 if (defrag1->root > defrag2->root)
76 return 1;
77 else if (defrag1->root < defrag2->root)
78 return -1;
79 else if (defrag1->ino > defrag2->ino)
80 return 1;
81 else if (defrag1->ino < defrag2->ino)
82 return -1;
83 else
84 return 0;
85}
86
87
88
89
90
91
92
93
94
95
96static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
97 struct inode_defrag *defrag)
98{
99 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
100 struct inode_defrag *entry;
101 struct rb_node **p;
102 struct rb_node *parent = NULL;
103 int ret;
104
105 p = &fs_info->defrag_inodes.rb_node;
106 while (*p) {
107 parent = *p;
108 entry = rb_entry(parent, struct inode_defrag, rb_node);
109
110 ret = __compare_inode_defrag(defrag, entry);
111 if (ret < 0)
112 p = &parent->rb_left;
113 else if (ret > 0)
114 p = &parent->rb_right;
115 else {
116
117
118
119
120 if (defrag->transid < entry->transid)
121 entry->transid = defrag->transid;
122 if (defrag->last_offset > entry->last_offset)
123 entry->last_offset = defrag->last_offset;
124 return -EEXIST;
125 }
126 }
127 set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
128 rb_link_node(&defrag->rb_node, parent, p);
129 rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
130 return 0;
131}
132
133static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
134{
135 if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
136 return 0;
137
138 if (btrfs_fs_closing(fs_info))
139 return 0;
140
141 return 1;
142}
143
144
145
146
147
148int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
149 struct btrfs_inode *inode)
150{
151 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
152 struct btrfs_root *root = inode->root;
153 struct inode_defrag *defrag;
154 u64 transid;
155 int ret;
156
157 if (!__need_auto_defrag(fs_info))
158 return 0;
159
160 if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
161 return 0;
162
163 if (trans)
164 transid = trans->transid;
165 else
166 transid = inode->root->last_trans;
167
168 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
169 if (!defrag)
170 return -ENOMEM;
171
172 defrag->ino = btrfs_ino(inode);
173 defrag->transid = transid;
174 defrag->root = root->root_key.objectid;
175
176 spin_lock(&fs_info->defrag_inodes_lock);
177 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
178
179
180
181
182
183 ret = __btrfs_add_inode_defrag(inode, defrag);
184 if (ret)
185 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
186 } else {
187 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
188 }
189 spin_unlock(&fs_info->defrag_inodes_lock);
190 return 0;
191}
192
193
194
195
196
197
198static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
199 struct inode_defrag *defrag)
200{
201 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
202 int ret;
203
204 if (!__need_auto_defrag(fs_info))
205 goto out;
206
207
208
209
210
211 spin_lock(&fs_info->defrag_inodes_lock);
212 ret = __btrfs_add_inode_defrag(inode, defrag);
213 spin_unlock(&fs_info->defrag_inodes_lock);
214 if (ret)
215 goto out;
216 return;
217out:
218 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
219}
220
221
222
223
224
225static struct inode_defrag *
226btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
227{
228 struct inode_defrag *entry = NULL;
229 struct inode_defrag tmp;
230 struct rb_node *p;
231 struct rb_node *parent = NULL;
232 int ret;
233
234 tmp.ino = ino;
235 tmp.root = root;
236
237 spin_lock(&fs_info->defrag_inodes_lock);
238 p = fs_info->defrag_inodes.rb_node;
239 while (p) {
240 parent = p;
241 entry = rb_entry(parent, struct inode_defrag, rb_node);
242
243 ret = __compare_inode_defrag(&tmp, entry);
244 if (ret < 0)
245 p = parent->rb_left;
246 else if (ret > 0)
247 p = parent->rb_right;
248 else
249 goto out;
250 }
251
252 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
253 parent = rb_next(parent);
254 if (parent)
255 entry = rb_entry(parent, struct inode_defrag, rb_node);
256 else
257 entry = NULL;
258 }
259out:
260 if (entry)
261 rb_erase(parent, &fs_info->defrag_inodes);
262 spin_unlock(&fs_info->defrag_inodes_lock);
263 return entry;
264}
265
266void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
267{
268 struct inode_defrag *defrag;
269 struct rb_node *node;
270
271 spin_lock(&fs_info->defrag_inodes_lock);
272 node = rb_first(&fs_info->defrag_inodes);
273 while (node) {
274 rb_erase(node, &fs_info->defrag_inodes);
275 defrag = rb_entry(node, struct inode_defrag, rb_node);
276 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
277
278 cond_resched_lock(&fs_info->defrag_inodes_lock);
279
280 node = rb_first(&fs_info->defrag_inodes);
281 }
282 spin_unlock(&fs_info->defrag_inodes_lock);
283}
284
285#define BTRFS_DEFRAG_BATCH 1024
286
287static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
288 struct inode_defrag *defrag)
289{
290 struct btrfs_root *inode_root;
291 struct inode *inode;
292 struct btrfs_key key;
293 struct btrfs_ioctl_defrag_range_args range;
294 int num_defrag;
295 int index;
296 int ret;
297
298
299 key.objectid = defrag->root;
300 key.type = BTRFS_ROOT_ITEM_KEY;
301 key.offset = (u64)-1;
302
303 index = srcu_read_lock(&fs_info->subvol_srcu);
304
305 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
306 if (IS_ERR(inode_root)) {
307 ret = PTR_ERR(inode_root);
308 goto cleanup;
309 }
310
311 key.objectid = defrag->ino;
312 key.type = BTRFS_INODE_ITEM_KEY;
313 key.offset = 0;
314 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
315 if (IS_ERR(inode)) {
316 ret = PTR_ERR(inode);
317 goto cleanup;
318 }
319 srcu_read_unlock(&fs_info->subvol_srcu, index);
320
321
322 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
323 memset(&range, 0, sizeof(range));
324 range.len = (u64)-1;
325 range.start = defrag->last_offset;
326
327 sb_start_write(fs_info->sb);
328 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
329 BTRFS_DEFRAG_BATCH);
330 sb_end_write(fs_info->sb);
331
332
333
334
335
336 if (num_defrag == BTRFS_DEFRAG_BATCH) {
337 defrag->last_offset = range.start;
338 btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
339 } else if (defrag->last_offset && !defrag->cycled) {
340
341
342
343
344
345 defrag->last_offset = 0;
346 defrag->cycled = 1;
347 btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
348 } else {
349 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
350 }
351
352 iput(inode);
353 return 0;
354cleanup:
355 srcu_read_unlock(&fs_info->subvol_srcu, index);
356 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
357 return ret;
358}
359
360
361
362
363
364int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
365{
366 struct inode_defrag *defrag;
367 u64 first_ino = 0;
368 u64 root_objectid = 0;
369
370 atomic_inc(&fs_info->defrag_running);
371 while (1) {
372
373 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
374 &fs_info->fs_state))
375 break;
376
377 if (!__need_auto_defrag(fs_info))
378 break;
379
380
381 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
382 first_ino);
383 if (!defrag) {
384 if (root_objectid || first_ino) {
385 root_objectid = 0;
386 first_ino = 0;
387 continue;
388 } else {
389 break;
390 }
391 }
392
393 first_ino = defrag->ino + 1;
394 root_objectid = defrag->root;
395
396 __btrfs_run_defrag_inode(fs_info, defrag);
397 }
398 atomic_dec(&fs_info->defrag_running);
399
400
401
402
403
404 wake_up(&fs_info->transaction_wait);
405 return 0;
406}
407
408
409
410
411static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
412 struct page **prepared_pages,
413 struct iov_iter *i)
414{
415 size_t copied = 0;
416 size_t total_copied = 0;
417 int pg = 0;
418 int offset = pos & (PAGE_SIZE - 1);
419
420 while (write_bytes > 0) {
421 size_t count = min_t(size_t,
422 PAGE_SIZE - offset, write_bytes);
423 struct page *page = prepared_pages[pg];
424
425
426
427 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
428
429
430 flush_dcache_page(page);
431
432
433
434
435
436
437
438
439
440
441 if (!PageUptodate(page) && copied < count)
442 copied = 0;
443
444 iov_iter_advance(i, copied);
445 write_bytes -= copied;
446 total_copied += copied;
447
448
449 if (unlikely(copied == 0))
450 break;
451
452 if (copied < PAGE_SIZE - offset) {
453 offset += copied;
454 } else {
455 pg++;
456 offset = 0;
457 }
458 }
459 return total_copied;
460}
461
462
463
464
465static void btrfs_drop_pages(struct page **pages, size_t num_pages)
466{
467 size_t i;
468 for (i = 0; i < num_pages; i++) {
469
470
471
472
473
474
475 ClearPageChecked(pages[i]);
476 unlock_page(pages[i]);
477 put_page(pages[i]);
478 }
479}
480
481static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
482 const u64 start,
483 const u64 len,
484 struct extent_state **cached_state)
485{
486 u64 search_start = start;
487 const u64 end = start + len - 1;
488
489 while (search_start < end) {
490 const u64 search_len = end - search_start + 1;
491 struct extent_map *em;
492 u64 em_len;
493 int ret = 0;
494
495 em = btrfs_get_extent(inode, NULL, 0, search_start,
496 search_len, 0);
497 if (IS_ERR(em))
498 return PTR_ERR(em);
499
500 if (em->block_start != EXTENT_MAP_HOLE)
501 goto next;
502
503 em_len = em->len;
504 if (em->start < search_start)
505 em_len -= search_start - em->start;
506 if (em_len > search_len)
507 em_len = search_len;
508
509 ret = set_extent_bit(&inode->io_tree, search_start,
510 search_start + em_len - 1,
511 EXTENT_DELALLOC_NEW,
512 NULL, cached_state, GFP_NOFS);
513next:
514 search_start = extent_map_end(em);
515 free_extent_map(em);
516 if (ret)
517 return ret;
518 }
519 return 0;
520}
521
522
523
524
525
526
527
528
529
530int btrfs_dirty_pages(struct inode *inode, struct page **pages,
531 size_t num_pages, loff_t pos, size_t write_bytes,
532 struct extent_state **cached)
533{
534 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
535 int err = 0;
536 int i;
537 u64 num_bytes;
538 u64 start_pos;
539 u64 end_of_last_block;
540 u64 end_pos = pos + write_bytes;
541 loff_t isize = i_size_read(inode);
542 unsigned int extra_bits = 0;
543
544 start_pos = pos & ~((u64) fs_info->sectorsize - 1);
545 num_bytes = round_up(write_bytes + pos - start_pos,
546 fs_info->sectorsize);
547
548 end_of_last_block = start_pos + num_bytes - 1;
549
550 if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
551 if (start_pos >= isize &&
552 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
553
554
555
556
557
558 extra_bits |= EXTENT_DELALLOC_NEW;
559 } else {
560 err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
561 start_pos,
562 num_bytes, cached);
563 if (err)
564 return err;
565 }
566 }
567
568 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
569 extra_bits, cached, 0);
570 if (err)
571 return err;
572
573 for (i = 0; i < num_pages; i++) {
574 struct page *p = pages[i];
575 SetPageUptodate(p);
576 ClearPageChecked(p);
577 set_page_dirty(p);
578 }
579
580
581
582
583
584
585 if (end_pos > isize)
586 i_size_write(inode, end_pos);
587 return 0;
588}
589
590
591
592
593
594void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
595 int skip_pinned)
596{
597 struct extent_map *em;
598 struct extent_map *split = NULL;
599 struct extent_map *split2 = NULL;
600 struct extent_map_tree *em_tree = &inode->extent_tree;
601 u64 len = end - start + 1;
602 u64 gen;
603 int ret;
604 int testend = 1;
605 unsigned long flags;
606 int compressed = 0;
607 bool modified;
608
609 WARN_ON(end < start);
610 if (end == (u64)-1) {
611 len = (u64)-1;
612 testend = 0;
613 }
614 while (1) {
615 int no_splits = 0;
616
617 modified = false;
618 if (!split)
619 split = alloc_extent_map();
620 if (!split2)
621 split2 = alloc_extent_map();
622 if (!split || !split2)
623 no_splits = 1;
624
625 write_lock(&em_tree->lock);
626 em = lookup_extent_mapping(em_tree, start, len);
627 if (!em) {
628 write_unlock(&em_tree->lock);
629 break;
630 }
631 flags = em->flags;
632 gen = em->generation;
633 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
634 if (testend && em->start + em->len >= start + len) {
635 free_extent_map(em);
636 write_unlock(&em_tree->lock);
637 break;
638 }
639 start = em->start + em->len;
640 if (testend)
641 len = start + len - (em->start + em->len);
642 free_extent_map(em);
643 write_unlock(&em_tree->lock);
644 continue;
645 }
646 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
647 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
648 clear_bit(EXTENT_FLAG_LOGGING, &flags);
649 modified = !list_empty(&em->list);
650 if (no_splits)
651 goto next;
652
653 if (em->start < start) {
654 split->start = em->start;
655 split->len = start - em->start;
656
657 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
658 split->orig_start = em->orig_start;
659 split->block_start = em->block_start;
660
661 if (compressed)
662 split->block_len = em->block_len;
663 else
664 split->block_len = split->len;
665 split->orig_block_len = max(split->block_len,
666 em->orig_block_len);
667 split->ram_bytes = em->ram_bytes;
668 } else {
669 split->orig_start = split->start;
670 split->block_len = 0;
671 split->block_start = em->block_start;
672 split->orig_block_len = 0;
673 split->ram_bytes = split->len;
674 }
675
676 split->generation = gen;
677 split->bdev = em->bdev;
678 split->flags = flags;
679 split->compress_type = em->compress_type;
680 replace_extent_mapping(em_tree, em, split, modified);
681 free_extent_map(split);
682 split = split2;
683 split2 = NULL;
684 }
685 if (testend && em->start + em->len > start + len) {
686 u64 diff = start + len - em->start;
687
688 split->start = start + len;
689 split->len = em->start + em->len - (start + len);
690 split->bdev = em->bdev;
691 split->flags = flags;
692 split->compress_type = em->compress_type;
693 split->generation = gen;
694
695 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
696 split->orig_block_len = max(em->block_len,
697 em->orig_block_len);
698
699 split->ram_bytes = em->ram_bytes;
700 if (compressed) {
701 split->block_len = em->block_len;
702 split->block_start = em->block_start;
703 split->orig_start = em->orig_start;
704 } else {
705 split->block_len = split->len;
706 split->block_start = em->block_start
707 + diff;
708 split->orig_start = em->orig_start;
709 }
710 } else {
711 split->ram_bytes = split->len;
712 split->orig_start = split->start;
713 split->block_len = 0;
714 split->block_start = em->block_start;
715 split->orig_block_len = 0;
716 }
717
718 if (extent_map_in_tree(em)) {
719 replace_extent_mapping(em_tree, em, split,
720 modified);
721 } else {
722 ret = add_extent_mapping(em_tree, split,
723 modified);
724 ASSERT(ret == 0);
725 }
726 free_extent_map(split);
727 split = NULL;
728 }
729next:
730 if (extent_map_in_tree(em))
731 remove_extent_mapping(em_tree, em);
732 write_unlock(&em_tree->lock);
733
734
735 free_extent_map(em);
736
737 free_extent_map(em);
738 }
739 if (split)
740 free_extent_map(split);
741 if (split2)
742 free_extent_map(split2);
743}
744
745
746
747
748
749
750
751
752
753
754int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
755 struct btrfs_root *root, struct inode *inode,
756 struct btrfs_path *path, u64 start, u64 end,
757 u64 *drop_end, int drop_cache,
758 int replace_extent,
759 u32 extent_item_size,
760 int *key_inserted)
761{
762 struct btrfs_fs_info *fs_info = root->fs_info;
763 struct extent_buffer *leaf;
764 struct btrfs_file_extent_item *fi;
765 struct btrfs_key key;
766 struct btrfs_key new_key;
767 u64 ino = btrfs_ino(BTRFS_I(inode));
768 u64 search_start = start;
769 u64 disk_bytenr = 0;
770 u64 num_bytes = 0;
771 u64 extent_offset = 0;
772 u64 extent_end = 0;
773 u64 last_end = start;
774 int del_nr = 0;
775 int del_slot = 0;
776 int extent_type;
777 int recow;
778 int ret;
779 int modify_tree = -1;
780 int update_refs;
781 int found = 0;
782 int leafs_visited = 0;
783
784 if (drop_cache)
785 btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
786
787 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
788 modify_tree = 0;
789
790 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
791 root == fs_info->tree_root);
792 while (1) {
793 recow = 0;
794 ret = btrfs_lookup_file_extent(trans, root, path, ino,
795 search_start, modify_tree);
796 if (ret < 0)
797 break;
798 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
799 leaf = path->nodes[0];
800 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
801 if (key.objectid == ino &&
802 key.type == BTRFS_EXTENT_DATA_KEY)
803 path->slots[0]--;
804 }
805 ret = 0;
806 leafs_visited++;
807next_slot:
808 leaf = path->nodes[0];
809 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
810 BUG_ON(del_nr > 0);
811 ret = btrfs_next_leaf(root, path);
812 if (ret < 0)
813 break;
814 if (ret > 0) {
815 ret = 0;
816 break;
817 }
818 leafs_visited++;
819 leaf = path->nodes[0];
820 recow = 1;
821 }
822
823 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
824
825 if (key.objectid > ino)
826 break;
827 if (WARN_ON_ONCE(key.objectid < ino) ||
828 key.type < BTRFS_EXTENT_DATA_KEY) {
829 ASSERT(del_nr == 0);
830 path->slots[0]++;
831 goto next_slot;
832 }
833 if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
834 break;
835
836 fi = btrfs_item_ptr(leaf, path->slots[0],
837 struct btrfs_file_extent_item);
838 extent_type = btrfs_file_extent_type(leaf, fi);
839
840 if (extent_type == BTRFS_FILE_EXTENT_REG ||
841 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
842 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
843 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
844 extent_offset = btrfs_file_extent_offset(leaf, fi);
845 extent_end = key.offset +
846 btrfs_file_extent_num_bytes(leaf, fi);
847 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
848 extent_end = key.offset +
849 btrfs_file_extent_inline_len(leaf,
850 path->slots[0], fi);
851 } else {
852
853 BUG();
854 }
855
856
857
858
859
860
861
862
863
864
865 if (extent_end == key.offset && extent_end >= search_start) {
866 last_end = extent_end;
867 goto delete_extent_item;
868 }
869
870 if (extent_end <= search_start) {
871 path->slots[0]++;
872 goto next_slot;
873 }
874
875 found = 1;
876 search_start = max(key.offset, start);
877 if (recow || !modify_tree) {
878 modify_tree = -1;
879 btrfs_release_path(path);
880 continue;
881 }
882
883
884
885
886
887 if (start > key.offset && end < extent_end) {
888 BUG_ON(del_nr > 0);
889 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
890 ret = -EOPNOTSUPP;
891 break;
892 }
893
894 memcpy(&new_key, &key, sizeof(new_key));
895 new_key.offset = start;
896 ret = btrfs_duplicate_item(trans, root, path,
897 &new_key);
898 if (ret == -EAGAIN) {
899 btrfs_release_path(path);
900 continue;
901 }
902 if (ret < 0)
903 break;
904
905 leaf = path->nodes[0];
906 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
907 struct btrfs_file_extent_item);
908 btrfs_set_file_extent_num_bytes(leaf, fi,
909 start - key.offset);
910
911 fi = btrfs_item_ptr(leaf, path->slots[0],
912 struct btrfs_file_extent_item);
913
914 extent_offset += start - key.offset;
915 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
916 btrfs_set_file_extent_num_bytes(leaf, fi,
917 extent_end - start);
918 btrfs_mark_buffer_dirty(leaf);
919
920 if (update_refs && disk_bytenr > 0) {
921 ret = btrfs_inc_extent_ref(trans, root,
922 disk_bytenr, num_bytes, 0,
923 root->root_key.objectid,
924 new_key.objectid,
925 start - extent_offset);
926 BUG_ON(ret);
927 }
928 key.offset = start;
929 }
930
931
932
933
934 last_end = extent_end;
935
936
937
938
939
940 if (start <= key.offset && end < extent_end) {
941 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
942 ret = -EOPNOTSUPP;
943 break;
944 }
945
946 memcpy(&new_key, &key, sizeof(new_key));
947 new_key.offset = end;
948 btrfs_set_item_key_safe(fs_info, path, &new_key);
949
950 extent_offset += end - key.offset;
951 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
952 btrfs_set_file_extent_num_bytes(leaf, fi,
953 extent_end - end);
954 btrfs_mark_buffer_dirty(leaf);
955 if (update_refs && disk_bytenr > 0)
956 inode_sub_bytes(inode, end - key.offset);
957 break;
958 }
959
960 search_start = extent_end;
961
962
963
964
965 if (start > key.offset && end >= extent_end) {
966 BUG_ON(del_nr > 0);
967 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
968 ret = -EOPNOTSUPP;
969 break;
970 }
971
972 btrfs_set_file_extent_num_bytes(leaf, fi,
973 start - key.offset);
974 btrfs_mark_buffer_dirty(leaf);
975 if (update_refs && disk_bytenr > 0)
976 inode_sub_bytes(inode, extent_end - start);
977 if (end == extent_end)
978 break;
979
980 path->slots[0]++;
981 goto next_slot;
982 }
983
984
985
986
987
988 if (start <= key.offset && end >= extent_end) {
989delete_extent_item:
990 if (del_nr == 0) {
991 del_slot = path->slots[0];
992 del_nr = 1;
993 } else {
994 BUG_ON(del_slot + del_nr != path->slots[0]);
995 del_nr++;
996 }
997
998 if (update_refs &&
999 extent_type == BTRFS_FILE_EXTENT_INLINE) {
1000 inode_sub_bytes(inode,
1001 extent_end - key.offset);
1002 extent_end = ALIGN(extent_end,
1003 fs_info->sectorsize);
1004 } else if (update_refs && disk_bytenr > 0) {
1005 ret = btrfs_free_extent(trans, root,
1006 disk_bytenr, num_bytes, 0,
1007 root->root_key.objectid,
1008 key.objectid, key.offset -
1009 extent_offset);
1010 BUG_ON(ret);
1011 inode_sub_bytes(inode,
1012 extent_end - key.offset);
1013 }
1014
1015 if (end == extent_end)
1016 break;
1017
1018 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
1019 path->slots[0]++;
1020 goto next_slot;
1021 }
1022
1023 ret = btrfs_del_items(trans, root, path, del_slot,
1024 del_nr);
1025 if (ret) {
1026 btrfs_abort_transaction(trans, ret);
1027 break;
1028 }
1029
1030 del_nr = 0;
1031 del_slot = 0;
1032
1033 btrfs_release_path(path);
1034 continue;
1035 }
1036
1037 BUG_ON(1);
1038 }
1039
1040 if (!ret && del_nr > 0) {
1041
1042
1043
1044
1045
1046
1047 path->slots[0] = del_slot;
1048 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1049 if (ret)
1050 btrfs_abort_transaction(trans, ret);
1051 }
1052
1053 leaf = path->nodes[0];
1054
1055
1056
1057
1058
1059 if (!ret && replace_extent && leafs_visited == 1 &&
1060 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
1061 path->locks[0] == BTRFS_WRITE_LOCK) &&
1062 btrfs_leaf_free_space(fs_info, leaf) >=
1063 sizeof(struct btrfs_item) + extent_item_size) {
1064
1065 key.objectid = ino;
1066 key.type = BTRFS_EXTENT_DATA_KEY;
1067 key.offset = start;
1068 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
1069 struct btrfs_key slot_key;
1070
1071 btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
1072 if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
1073 path->slots[0]++;
1074 }
1075 setup_items_for_insert(root, path, &key,
1076 &extent_item_size,
1077 extent_item_size,
1078 sizeof(struct btrfs_item) +
1079 extent_item_size, 1);
1080 *key_inserted = 1;
1081 }
1082
1083 if (!replace_extent || !(*key_inserted))
1084 btrfs_release_path(path);
1085 if (drop_end)
1086 *drop_end = found ? min(end, last_end) : end;
1087 return ret;
1088}
1089
1090int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1091 struct btrfs_root *root, struct inode *inode, u64 start,
1092 u64 end, int drop_cache)
1093{
1094 struct btrfs_path *path;
1095 int ret;
1096
1097 path = btrfs_alloc_path();
1098 if (!path)
1099 return -ENOMEM;
1100 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
1101 drop_cache, 0, 0, NULL);
1102 btrfs_free_path(path);
1103 return ret;
1104}
1105
1106static int extent_mergeable(struct extent_buffer *leaf, int slot,
1107 u64 objectid, u64 bytenr, u64 orig_offset,
1108 u64 *start, u64 *end)
1109{
1110 struct btrfs_file_extent_item *fi;
1111 struct btrfs_key key;
1112 u64 extent_end;
1113
1114 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1115 return 0;
1116
1117 btrfs_item_key_to_cpu(leaf, &key, slot);
1118 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
1119 return 0;
1120
1121 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1122 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
1123 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
1124 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
1125 btrfs_file_extent_compression(leaf, fi) ||
1126 btrfs_file_extent_encryption(leaf, fi) ||
1127 btrfs_file_extent_other_encoding(leaf, fi))
1128 return 0;
1129
1130 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1131 if ((*start && *start != key.offset) || (*end && *end != extent_end))
1132 return 0;
1133
1134 *start = key.offset;
1135 *end = extent_end;
1136 return 1;
1137}
1138
1139
1140
1141
1142
1143
1144
1145
1146int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1147 struct btrfs_inode *inode, u64 start, u64 end)
1148{
1149 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1150 struct btrfs_root *root = inode->root;
1151 struct extent_buffer *leaf;
1152 struct btrfs_path *path;
1153 struct btrfs_file_extent_item *fi;
1154 struct btrfs_key key;
1155 struct btrfs_key new_key;
1156 u64 bytenr;
1157 u64 num_bytes;
1158 u64 extent_end;
1159 u64 orig_offset;
1160 u64 other_start;
1161 u64 other_end;
1162 u64 split;
1163 int del_nr = 0;
1164 int del_slot = 0;
1165 int recow;
1166 int ret;
1167 u64 ino = btrfs_ino(inode);
1168
1169 path = btrfs_alloc_path();
1170 if (!path)
1171 return -ENOMEM;
1172again:
1173 recow = 0;
1174 split = start;
1175 key.objectid = ino;
1176 key.type = BTRFS_EXTENT_DATA_KEY;
1177 key.offset = split;
1178
1179 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1180 if (ret < 0)
1181 goto out;
1182 if (ret > 0 && path->slots[0] > 0)
1183 path->slots[0]--;
1184
1185 leaf = path->nodes[0];
1186 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1187 if (key.objectid != ino ||
1188 key.type != BTRFS_EXTENT_DATA_KEY) {
1189 ret = -EINVAL;
1190 btrfs_abort_transaction(trans, ret);
1191 goto out;
1192 }
1193 fi = btrfs_item_ptr(leaf, path->slots[0],
1194 struct btrfs_file_extent_item);
1195 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
1196 ret = -EINVAL;
1197 btrfs_abort_transaction(trans, ret);
1198 goto out;
1199 }
1200 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1201 if (key.offset > start || extent_end < end) {
1202 ret = -EINVAL;
1203 btrfs_abort_transaction(trans, ret);
1204 goto out;
1205 }
1206
1207 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1208 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1209 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
1210 memcpy(&new_key, &key, sizeof(new_key));
1211
1212 if (start == key.offset && end < extent_end) {
1213 other_start = 0;
1214 other_end = start;
1215 if (extent_mergeable(leaf, path->slots[0] - 1,
1216 ino, bytenr, orig_offset,
1217 &other_start, &other_end)) {
1218 new_key.offset = end;
1219 btrfs_set_item_key_safe(fs_info, path, &new_key);
1220 fi = btrfs_item_ptr(leaf, path->slots[0],
1221 struct btrfs_file_extent_item);
1222 btrfs_set_file_extent_generation(leaf, fi,
1223 trans->transid);
1224 btrfs_set_file_extent_num_bytes(leaf, fi,
1225 extent_end - end);
1226 btrfs_set_file_extent_offset(leaf, fi,
1227 end - orig_offset);
1228 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1229 struct btrfs_file_extent_item);
1230 btrfs_set_file_extent_generation(leaf, fi,
1231 trans->transid);
1232 btrfs_set_file_extent_num_bytes(leaf, fi,
1233 end - other_start);
1234 btrfs_mark_buffer_dirty(leaf);
1235 goto out;
1236 }
1237 }
1238
1239 if (start > key.offset && end == extent_end) {
1240 other_start = end;
1241 other_end = 0;
1242 if (extent_mergeable(leaf, path->slots[0] + 1,
1243 ino, bytenr, orig_offset,
1244 &other_start, &other_end)) {
1245 fi = btrfs_item_ptr(leaf, path->slots[0],
1246 struct btrfs_file_extent_item);
1247 btrfs_set_file_extent_num_bytes(leaf, fi,
1248 start - key.offset);
1249 btrfs_set_file_extent_generation(leaf, fi,
1250 trans->transid);
1251 path->slots[0]++;
1252 new_key.offset = start;
1253 btrfs_set_item_key_safe(fs_info, path, &new_key);
1254
1255 fi = btrfs_item_ptr(leaf, path->slots[0],
1256 struct btrfs_file_extent_item);
1257 btrfs_set_file_extent_generation(leaf, fi,
1258 trans->transid);
1259 btrfs_set_file_extent_num_bytes(leaf, fi,
1260 other_end - start);
1261 btrfs_set_file_extent_offset(leaf, fi,
1262 start - orig_offset);
1263 btrfs_mark_buffer_dirty(leaf);
1264 goto out;
1265 }
1266 }
1267
1268 while (start > key.offset || end < extent_end) {
1269 if (key.offset == start)
1270 split = end;
1271
1272 new_key.offset = split;
1273 ret = btrfs_duplicate_item(trans, root, path, &new_key);
1274 if (ret == -EAGAIN) {
1275 btrfs_release_path(path);
1276 goto again;
1277 }
1278 if (ret < 0) {
1279 btrfs_abort_transaction(trans, ret);
1280 goto out;
1281 }
1282
1283 leaf = path->nodes[0];
1284 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1285 struct btrfs_file_extent_item);
1286 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1287 btrfs_set_file_extent_num_bytes(leaf, fi,
1288 split - key.offset);
1289
1290 fi = btrfs_item_ptr(leaf, path->slots[0],
1291 struct btrfs_file_extent_item);
1292
1293 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1294 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
1295 btrfs_set_file_extent_num_bytes(leaf, fi,
1296 extent_end - split);
1297 btrfs_mark_buffer_dirty(leaf);
1298
1299 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
1300 0, root->root_key.objectid,
1301 ino, orig_offset);
1302 if (ret) {
1303 btrfs_abort_transaction(trans, ret);
1304 goto out;
1305 }
1306
1307 if (split == start) {
1308 key.offset = start;
1309 } else {
1310 if (start != key.offset) {
1311 ret = -EINVAL;
1312 btrfs_abort_transaction(trans, ret);
1313 goto out;
1314 }
1315 path->slots[0]--;
1316 extent_end = end;
1317 }
1318 recow = 1;
1319 }
1320
1321 other_start = end;
1322 other_end = 0;
1323 if (extent_mergeable(leaf, path->slots[0] + 1,
1324 ino, bytenr, orig_offset,
1325 &other_start, &other_end)) {
1326 if (recow) {
1327 btrfs_release_path(path);
1328 goto again;
1329 }
1330 extent_end = other_end;
1331 del_slot = path->slots[0] + 1;
1332 del_nr++;
1333 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1334 0, root->root_key.objectid,
1335 ino, orig_offset);
1336 if (ret) {
1337 btrfs_abort_transaction(trans, ret);
1338 goto out;
1339 }
1340 }
1341 other_start = 0;
1342 other_end = start;
1343 if (extent_mergeable(leaf, path->slots[0] - 1,
1344 ino, bytenr, orig_offset,
1345 &other_start, &other_end)) {
1346 if (recow) {
1347 btrfs_release_path(path);
1348 goto again;
1349 }
1350 key.offset = other_start;
1351 del_slot = path->slots[0];
1352 del_nr++;
1353 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1354 0, root->root_key.objectid,
1355 ino, orig_offset);
1356 if (ret) {
1357 btrfs_abort_transaction(trans, ret);
1358 goto out;
1359 }
1360 }
1361 if (del_nr == 0) {
1362 fi = btrfs_item_ptr(leaf, path->slots[0],
1363 struct btrfs_file_extent_item);
1364 btrfs_set_file_extent_type(leaf, fi,
1365 BTRFS_FILE_EXTENT_REG);
1366 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1367 btrfs_mark_buffer_dirty(leaf);
1368 } else {
1369 fi = btrfs_item_ptr(leaf, del_slot - 1,
1370 struct btrfs_file_extent_item);
1371 btrfs_set_file_extent_type(leaf, fi,
1372 BTRFS_FILE_EXTENT_REG);
1373 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1374 btrfs_set_file_extent_num_bytes(leaf, fi,
1375 extent_end - key.offset);
1376 btrfs_mark_buffer_dirty(leaf);
1377
1378 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1379 if (ret < 0) {
1380 btrfs_abort_transaction(trans, ret);
1381 goto out;
1382 }
1383 }
1384out:
1385 btrfs_free_path(path);
1386 return 0;
1387}
1388
1389
1390
1391
1392
1393static int prepare_uptodate_page(struct inode *inode,
1394 struct page *page, u64 pos,
1395 bool force_uptodate)
1396{
1397 int ret = 0;
1398
1399 if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
1400 !PageUptodate(page)) {
1401 ret = btrfs_readpage(NULL, page);
1402 if (ret)
1403 return ret;
1404 lock_page(page);
1405 if (!PageUptodate(page)) {
1406 unlock_page(page);
1407 return -EIO;
1408 }
1409 if (page->mapping != inode->i_mapping) {
1410 unlock_page(page);
1411 return -EAGAIN;
1412 }
1413 }
1414 return 0;
1415}
1416
1417
1418
1419
1420static noinline int prepare_pages(struct inode *inode, struct page **pages,
1421 size_t num_pages, loff_t pos,
1422 size_t write_bytes, bool force_uptodate)
1423{
1424 int i;
1425 unsigned long index = pos >> PAGE_SHIFT;
1426 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1427 int err = 0;
1428 int faili;
1429
1430 for (i = 0; i < num_pages; i++) {
1431again:
1432 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1433 mask | __GFP_WRITE);
1434 if (!pages[i]) {
1435 faili = i - 1;
1436 err = -ENOMEM;
1437 goto fail;
1438 }
1439
1440 if (i == 0)
1441 err = prepare_uptodate_page(inode, pages[i], pos,
1442 force_uptodate);
1443 if (!err && i == num_pages - 1)
1444 err = prepare_uptodate_page(inode, pages[i],
1445 pos + write_bytes, false);
1446 if (err) {
1447 put_page(pages[i]);
1448 if (err == -EAGAIN) {
1449 err = 0;
1450 goto again;
1451 }
1452 faili = i - 1;
1453 goto fail;
1454 }
1455 wait_on_page_writeback(pages[i]);
1456 }
1457
1458 return 0;
1459fail:
1460 while (faili >= 0) {
1461 unlock_page(pages[faili]);
1462 put_page(pages[faili]);
1463 faili--;
1464 }
1465 return err;
1466
1467}
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479static noinline int
1480lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1481 size_t num_pages, loff_t pos,
1482 size_t write_bytes,
1483 u64 *lockstart, u64 *lockend,
1484 struct extent_state **cached_state)
1485{
1486 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1487 u64 start_pos;
1488 u64 last_pos;
1489 int i;
1490 int ret = 0;
1491
1492 start_pos = round_down(pos, fs_info->sectorsize);
1493 last_pos = start_pos
1494 + round_up(pos + write_bytes - start_pos,
1495 fs_info->sectorsize) - 1;
1496
1497 if (start_pos < inode->vfs_inode.i_size) {
1498 struct btrfs_ordered_extent *ordered;
1499
1500 lock_extent_bits(&inode->io_tree, start_pos, last_pos,
1501 cached_state);
1502 ordered = btrfs_lookup_ordered_range(inode, start_pos,
1503 last_pos - start_pos + 1);
1504 if (ordered &&
1505 ordered->file_offset + ordered->len > start_pos &&
1506 ordered->file_offset <= last_pos) {
1507 unlock_extent_cached(&inode->io_tree, start_pos,
1508 last_pos, cached_state);
1509 for (i = 0; i < num_pages; i++) {
1510 unlock_page(pages[i]);
1511 put_page(pages[i]);
1512 }
1513 btrfs_start_ordered_extent(&inode->vfs_inode,
1514 ordered, 1);
1515 btrfs_put_ordered_extent(ordered);
1516 return -EAGAIN;
1517 }
1518 if (ordered)
1519 btrfs_put_ordered_extent(ordered);
1520 clear_extent_bit(&inode->io_tree, start_pos, last_pos,
1521 EXTENT_DIRTY | EXTENT_DELALLOC |
1522 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1523 0, 0, cached_state);
1524 *lockstart = start_pos;
1525 *lockend = last_pos;
1526 ret = 1;
1527 }
1528
1529 for (i = 0; i < num_pages; i++) {
1530 if (clear_page_dirty_for_io(pages[i]))
1531 account_page_redirty(pages[i]);
1532 set_page_extent_mapped(pages[i]);
1533 WARN_ON(!PageLocked(pages[i]));
1534 }
1535
1536 return ret;
1537}
1538
1539static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
1540 size_t *write_bytes)
1541{
1542 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1543 struct btrfs_root *root = inode->root;
1544 struct btrfs_ordered_extent *ordered;
1545 u64 lockstart, lockend;
1546 u64 num_bytes;
1547 int ret;
1548
1549 ret = btrfs_start_write_no_snapshotting(root);
1550 if (!ret)
1551 return -ENOSPC;
1552
1553 lockstart = round_down(pos, fs_info->sectorsize);
1554 lockend = round_up(pos + *write_bytes,
1555 fs_info->sectorsize) - 1;
1556
1557 while (1) {
1558 lock_extent(&inode->io_tree, lockstart, lockend);
1559 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1560 lockend - lockstart + 1);
1561 if (!ordered) {
1562 break;
1563 }
1564 unlock_extent(&inode->io_tree, lockstart, lockend);
1565 btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
1566 btrfs_put_ordered_extent(ordered);
1567 }
1568
1569 num_bytes = lockend - lockstart + 1;
1570 ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
1571 NULL, NULL, NULL);
1572 if (ret <= 0) {
1573 ret = 0;
1574 btrfs_end_write_no_snapshotting(root);
1575 } else {
1576 *write_bytes = min_t(size_t, *write_bytes ,
1577 num_bytes - pos + lockstart);
1578 }
1579
1580 unlock_extent(&inode->io_tree, lockstart, lockend);
1581
1582 return ret;
1583}
1584
1585static noinline ssize_t __btrfs_buffered_write(struct file *file,
1586 struct iov_iter *i,
1587 loff_t pos)
1588{
1589 struct inode *inode = file_inode(file);
1590 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1591 struct btrfs_root *root = BTRFS_I(inode)->root;
1592 struct page **pages = NULL;
1593 struct extent_state *cached_state = NULL;
1594 struct extent_changeset *data_reserved = NULL;
1595 u64 release_bytes = 0;
1596 u64 lockstart;
1597 u64 lockend;
1598 size_t num_written = 0;
1599 int nrptrs;
1600 int ret = 0;
1601 bool only_release_metadata = false;
1602 bool force_page_uptodate = false;
1603
1604 nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
1605 PAGE_SIZE / (sizeof(struct page *)));
1606 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1607 nrptrs = max(nrptrs, 8);
1608 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1609 if (!pages)
1610 return -ENOMEM;
1611
1612 while (iov_iter_count(i) > 0) {
1613 size_t offset = pos & (PAGE_SIZE - 1);
1614 size_t sector_offset;
1615 size_t write_bytes = min(iov_iter_count(i),
1616 nrptrs * (size_t)PAGE_SIZE -
1617 offset);
1618 size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
1619 PAGE_SIZE);
1620 size_t reserve_bytes;
1621 size_t dirty_pages;
1622 size_t copied;
1623 size_t dirty_sectors;
1624 size_t num_sectors;
1625 int extents_locked;
1626
1627 WARN_ON(num_pages > nrptrs);
1628
1629
1630
1631
1632
1633 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1634 ret = -EFAULT;
1635 break;
1636 }
1637
1638 sector_offset = pos & (fs_info->sectorsize - 1);
1639 reserve_bytes = round_up(write_bytes + sector_offset,
1640 fs_info->sectorsize);
1641
1642 extent_changeset_release(data_reserved);
1643 ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
1644 write_bytes);
1645 if (ret < 0) {
1646 if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1647 BTRFS_INODE_PREALLOC)) &&
1648 check_can_nocow(BTRFS_I(inode), pos,
1649 &write_bytes) > 0) {
1650
1651
1652
1653
1654 only_release_metadata = true;
1655
1656
1657
1658
1659 num_pages = DIV_ROUND_UP(write_bytes + offset,
1660 PAGE_SIZE);
1661 reserve_bytes = round_up(write_bytes +
1662 sector_offset,
1663 fs_info->sectorsize);
1664 } else {
1665 break;
1666 }
1667 }
1668
1669 WARN_ON(reserve_bytes == 0);
1670 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
1671 reserve_bytes);
1672 if (ret) {
1673 if (!only_release_metadata)
1674 btrfs_free_reserved_data_space(inode,
1675 data_reserved, pos,
1676 write_bytes);
1677 else
1678 btrfs_end_write_no_snapshotting(root);
1679 break;
1680 }
1681
1682 release_bytes = reserve_bytes;
1683again:
1684
1685
1686
1687
1688
1689 ret = prepare_pages(inode, pages, num_pages,
1690 pos, write_bytes,
1691 force_page_uptodate);
1692 if (ret) {
1693 btrfs_delalloc_release_extents(BTRFS_I(inode),
1694 reserve_bytes);
1695 break;
1696 }
1697
1698 extents_locked = lock_and_cleanup_extent_if_need(
1699 BTRFS_I(inode), pages,
1700 num_pages, pos, write_bytes, &lockstart,
1701 &lockend, &cached_state);
1702 if (extents_locked < 0) {
1703 if (extents_locked == -EAGAIN)
1704 goto again;
1705 btrfs_delalloc_release_extents(BTRFS_I(inode),
1706 reserve_bytes);
1707 ret = extents_locked;
1708 break;
1709 }
1710
1711 copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
1712
1713 num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
1714 dirty_sectors = round_up(copied + sector_offset,
1715 fs_info->sectorsize);
1716 dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
1717
1718
1719
1720
1721
1722 if (copied < write_bytes)
1723 nrptrs = 1;
1724
1725 if (copied == 0) {
1726 force_page_uptodate = true;
1727 dirty_sectors = 0;
1728 dirty_pages = 0;
1729 } else {
1730 force_page_uptodate = false;
1731 dirty_pages = DIV_ROUND_UP(copied + offset,
1732 PAGE_SIZE);
1733 }
1734
1735 if (num_sectors > dirty_sectors) {
1736
1737 release_bytes -= dirty_sectors <<
1738 fs_info->sb->s_blocksize_bits;
1739 if (only_release_metadata) {
1740 btrfs_delalloc_release_metadata(BTRFS_I(inode),
1741 release_bytes);
1742 } else {
1743 u64 __pos;
1744
1745 __pos = round_down(pos,
1746 fs_info->sectorsize) +
1747 (dirty_pages << PAGE_SHIFT);
1748 btrfs_delalloc_release_space(inode,
1749 data_reserved, __pos,
1750 release_bytes);
1751 }
1752 }
1753
1754 release_bytes = round_up(copied + sector_offset,
1755 fs_info->sectorsize);
1756
1757 if (copied > 0)
1758 ret = btrfs_dirty_pages(inode, pages, dirty_pages,
1759 pos, copied, &cached_state);
1760 if (extents_locked)
1761 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1762 lockstart, lockend, &cached_state);
1763 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
1764 if (ret) {
1765 btrfs_drop_pages(pages, num_pages);
1766 break;
1767 }
1768
1769 release_bytes = 0;
1770 if (only_release_metadata)
1771 btrfs_end_write_no_snapshotting(root);
1772
1773 if (only_release_metadata && copied > 0) {
1774 lockstart = round_down(pos,
1775 fs_info->sectorsize);
1776 lockend = round_up(pos + copied,
1777 fs_info->sectorsize) - 1;
1778
1779 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1780 lockend, EXTENT_NORESERVE, NULL,
1781 NULL, GFP_NOFS);
1782 only_release_metadata = false;
1783 }
1784
1785 btrfs_drop_pages(pages, num_pages);
1786
1787 cond_resched();
1788
1789 balance_dirty_pages_ratelimited(inode->i_mapping);
1790 if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
1791 btrfs_btree_balance_dirty(fs_info);
1792
1793 pos += copied;
1794 num_written += copied;
1795 }
1796
1797 kfree(pages);
1798
1799 if (release_bytes) {
1800 if (only_release_metadata) {
1801 btrfs_end_write_no_snapshotting(root);
1802 btrfs_delalloc_release_metadata(BTRFS_I(inode),
1803 release_bytes);
1804 } else {
1805 btrfs_delalloc_release_space(inode, data_reserved,
1806 round_down(pos, fs_info->sectorsize),
1807 release_bytes);
1808 }
1809 }
1810
1811 extent_changeset_free(data_reserved);
1812 return num_written ? num_written : ret;
1813}
1814
1815static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
1816{
1817 struct file *file = iocb->ki_filp;
1818 struct inode *inode = file_inode(file);
1819 loff_t pos = iocb->ki_pos;
1820 ssize_t written;
1821 ssize_t written_buffered;
1822 loff_t endbyte;
1823 int err;
1824
1825 written = generic_file_direct_write(iocb, from);
1826
1827 if (written < 0 || !iov_iter_count(from))
1828 return written;
1829
1830 pos += written;
1831 written_buffered = __btrfs_buffered_write(file, from, pos);
1832 if (written_buffered < 0) {
1833 err = written_buffered;
1834 goto out;
1835 }
1836
1837
1838
1839
1840 endbyte = pos + written_buffered - 1;
1841 err = btrfs_fdatawrite_range(inode, pos, endbyte);
1842 if (err)
1843 goto out;
1844 err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
1845 if (err)
1846 goto out;
1847 written += written_buffered;
1848 iocb->ki_pos = pos + written_buffered;
1849 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
1850 endbyte >> PAGE_SHIFT);
1851out:
1852 return written ? written : err;
1853}
1854
1855static void update_time_for_write(struct inode *inode)
1856{
1857 struct timespec now;
1858
1859 if (IS_NOCMTIME(inode))
1860 return;
1861
1862 now = current_time(inode);
1863 if (!timespec_equal(&inode->i_mtime, &now))
1864 inode->i_mtime = now;
1865
1866 if (!timespec_equal(&inode->i_ctime, &now))
1867 inode->i_ctime = now;
1868
1869 if (IS_I_VERSION(inode))
1870 inode_inc_iversion(inode);
1871}
1872
1873static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1874 struct iov_iter *from)
1875{
1876 struct file *file = iocb->ki_filp;
1877 struct inode *inode = file_inode(file);
1878 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1879 struct btrfs_root *root = BTRFS_I(inode)->root;
1880 u64 start_pos;
1881 u64 end_pos;
1882 ssize_t num_written = 0;
1883 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1884 ssize_t err;
1885 loff_t pos;
1886 size_t count = iov_iter_count(from);
1887 loff_t oldsize;
1888 int clean_page = 0;
1889
1890 if (!(iocb->ki_flags & IOCB_DIRECT) &&
1891 (iocb->ki_flags & IOCB_NOWAIT))
1892 return -EOPNOTSUPP;
1893
1894 if (!inode_trylock(inode)) {
1895 if (iocb->ki_flags & IOCB_NOWAIT)
1896 return -EAGAIN;
1897 inode_lock(inode);
1898 }
1899
1900 err = generic_write_checks(iocb, from);
1901 if (err <= 0) {
1902 inode_unlock(inode);
1903 return err;
1904 }
1905
1906 pos = iocb->ki_pos;
1907 if (iocb->ki_flags & IOCB_NOWAIT) {
1908
1909
1910
1911
1912 if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1913 BTRFS_INODE_PREALLOC)) ||
1914 check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
1915 inode_unlock(inode);
1916 return -EAGAIN;
1917 }
1918 }
1919
1920 current->backing_dev_info = inode_to_bdi(inode);
1921 err = file_remove_privs(file);
1922 if (err) {
1923 inode_unlock(inode);
1924 goto out;
1925 }
1926
1927
1928
1929
1930
1931
1932
1933 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1934 inode_unlock(inode);
1935 err = -EROFS;
1936 goto out;
1937 }
1938
1939
1940
1941
1942
1943
1944
1945 update_time_for_write(inode);
1946
1947 start_pos = round_down(pos, fs_info->sectorsize);
1948 oldsize = i_size_read(inode);
1949 if (start_pos > oldsize) {
1950
1951 end_pos = round_up(pos + count,
1952 fs_info->sectorsize);
1953 err = btrfs_cont_expand(inode, oldsize, end_pos);
1954 if (err) {
1955 inode_unlock(inode);
1956 goto out;
1957 }
1958 if (start_pos > round_up(oldsize, fs_info->sectorsize))
1959 clean_page = 1;
1960 }
1961
1962 if (sync)
1963 atomic_inc(&BTRFS_I(inode)->sync_writers);
1964
1965 if (iocb->ki_flags & IOCB_DIRECT) {
1966 num_written = __btrfs_direct_write(iocb, from);
1967 } else {
1968 num_written = __btrfs_buffered_write(file, from, pos);
1969 if (num_written > 0)
1970 iocb->ki_pos = pos + num_written;
1971 if (clean_page)
1972 pagecache_isize_extended(inode, oldsize,
1973 i_size_read(inode));
1974 }
1975
1976 inode_unlock(inode);
1977
1978
1979
1980
1981
1982
1983 spin_lock(&BTRFS_I(inode)->lock);
1984 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1985 spin_unlock(&BTRFS_I(inode)->lock);
1986 if (num_written > 0)
1987 num_written = generic_write_sync(iocb, num_written);
1988
1989 if (sync)
1990 atomic_dec(&BTRFS_I(inode)->sync_writers);
1991out:
1992 current->backing_dev_info = NULL;
1993 return num_written ? num_written : err;
1994}
1995
1996int btrfs_release_file(struct inode *inode, struct file *filp)
1997{
1998 struct btrfs_file_private *private = filp->private_data;
1999
2000 if (private && private->trans)
2001 btrfs_ioctl_trans_end(filp);
2002 if (private && private->filldir_buf)
2003 kfree(private->filldir_buf);
2004 kfree(private);
2005 filp->private_data = NULL;
2006
2007
2008
2009
2010
2011
2012
2013 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
2014 &BTRFS_I(inode)->runtime_flags))
2015 filemap_flush(inode->i_mapping);
2016 return 0;
2017}
2018
2019static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
2020{
2021 int ret;
2022 struct blk_plug plug;
2023
2024
2025
2026
2027
2028
2029
2030 blk_start_plug(&plug);
2031 atomic_inc(&BTRFS_I(inode)->sync_writers);
2032 ret = btrfs_fdatawrite_range(inode, start, end);
2033 atomic_dec(&BTRFS_I(inode)->sync_writers);
2034 blk_finish_plug(&plug);
2035
2036 return ret;
2037}
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2051{
2052 struct dentry *dentry = file_dentry(file);
2053 struct inode *inode = d_inode(dentry);
2054 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2055 struct btrfs_root *root = BTRFS_I(inode)->root;
2056 struct btrfs_trans_handle *trans;
2057 struct btrfs_log_ctx ctx;
2058 int ret = 0, err;
2059 bool full_sync = false;
2060 u64 len;
2061
2062
2063
2064
2065
2066 len = (u64)end - (u64)start + 1;
2067 trace_btrfs_sync_file(file, datasync);
2068
2069 btrfs_init_log_ctx(&ctx, inode);
2070
2071
2072
2073
2074
2075
2076
2077 ret = start_ordered_ops(inode, start, end);
2078 if (ret)
2079 goto out;
2080
2081 inode_lock(inode);
2082 atomic_inc(&root->log_batch);
2083 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2084 &BTRFS_I(inode)->runtime_flags);
2085
2086
2087
2088
2089 if (full_sync) {
2090
2091
2092
2093
2094
2095
2096 ret = btrfs_wait_ordered_range(inode, start, len);
2097 } else {
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130 ret = start_ordered_ops(inode, start, end);
2131 }
2132 if (ret) {
2133 inode_unlock(inode);
2134 goto out;
2135 }
2136 atomic_inc(&root->log_batch);
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166 smp_mb();
2167 if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
2168 (full_sync && BTRFS_I(inode)->last_trans <=
2169 fs_info->last_trans_committed) ||
2170 (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
2171 BTRFS_I(inode)->last_trans
2172 <= fs_info->last_trans_committed)) {
2173
2174
2175
2176
2177
2178 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2179 &BTRFS_I(inode)->runtime_flags);
2180
2181
2182
2183
2184
2185
2186
2187 ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
2188 inode_unlock(inode);
2189 goto out;
2190 }
2191
2192
2193
2194
2195 if (file->private_data)
2196 btrfs_ioctl_trans_end(file);
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209 trans = btrfs_start_transaction(root, 0);
2210 if (IS_ERR(trans)) {
2211 ret = PTR_ERR(trans);
2212 inode_unlock(inode);
2213 goto out;
2214 }
2215 trans->sync = true;
2216
2217 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2218 if (ret < 0) {
2219
2220 ret = 1;
2221 }
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233 inode_unlock(inode);
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248 if (ctx.io_err) {
2249 btrfs_end_transaction(trans);
2250 ret = ctx.io_err;
2251 goto out;
2252 }
2253
2254 if (ret != BTRFS_NO_LOG_SYNC) {
2255 if (!ret) {
2256 ret = btrfs_sync_log(trans, root, &ctx);
2257 if (!ret) {
2258 ret = btrfs_end_transaction(trans);
2259 goto out;
2260 }
2261 }
2262 if (!full_sync) {
2263 ret = btrfs_wait_ordered_range(inode, start, len);
2264 if (ret) {
2265 btrfs_end_transaction(trans);
2266 goto out;
2267 }
2268 }
2269 ret = btrfs_commit_transaction(trans);
2270 } else {
2271 ret = btrfs_end_transaction(trans);
2272 }
2273out:
2274 ASSERT(list_empty(&ctx.list));
2275 err = file_check_and_advance_wb_err(file);
2276 if (!ret)
2277 ret = err;
2278 return ret > 0 ? -EIO : ret;
2279}
2280
2281static const struct vm_operations_struct btrfs_file_vm_ops = {
2282 .fault = filemap_fault,
2283 .map_pages = filemap_map_pages,
2284 .page_mkwrite = btrfs_page_mkwrite,
2285};
2286
2287static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
2288{
2289 struct address_space *mapping = filp->f_mapping;
2290
2291 if (!mapping->a_ops->readpage)
2292 return -ENOEXEC;
2293
2294 file_accessed(filp);
2295 vma->vm_ops = &btrfs_file_vm_ops;
2296
2297 return 0;
2298}
2299
2300static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
2301 int slot, u64 start, u64 end)
2302{
2303 struct btrfs_file_extent_item *fi;
2304 struct btrfs_key key;
2305
2306 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
2307 return 0;
2308
2309 btrfs_item_key_to_cpu(leaf, &key, slot);
2310 if (key.objectid != btrfs_ino(inode) ||
2311 key.type != BTRFS_EXTENT_DATA_KEY)
2312 return 0;
2313
2314 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
2315
2316 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2317 return 0;
2318
2319 if (btrfs_file_extent_disk_bytenr(leaf, fi))
2320 return 0;
2321
2322 if (key.offset == end)
2323 return 1;
2324 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
2325 return 1;
2326 return 0;
2327}
2328
2329static int fill_holes(struct btrfs_trans_handle *trans,
2330 struct btrfs_inode *inode,
2331 struct btrfs_path *path, u64 offset, u64 end)
2332{
2333 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
2334 struct btrfs_root *root = inode->root;
2335 struct extent_buffer *leaf;
2336 struct btrfs_file_extent_item *fi;
2337 struct extent_map *hole_em;
2338 struct extent_map_tree *em_tree = &inode->extent_tree;
2339 struct btrfs_key key;
2340 int ret;
2341
2342 if (btrfs_fs_incompat(fs_info, NO_HOLES))
2343 goto out;
2344
2345 key.objectid = btrfs_ino(inode);
2346 key.type = BTRFS_EXTENT_DATA_KEY;
2347 key.offset = offset;
2348
2349 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2350 if (ret <= 0) {
2351
2352
2353
2354
2355 if (ret == 0)
2356 ret = -EINVAL;
2357 return ret;
2358 }
2359
2360 leaf = path->nodes[0];
2361 if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
2362 u64 num_bytes;
2363
2364 path->slots[0]--;
2365 fi = btrfs_item_ptr(leaf, path->slots[0],
2366 struct btrfs_file_extent_item);
2367 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
2368 end - offset;
2369 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2370 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2371 btrfs_set_file_extent_offset(leaf, fi, 0);
2372 btrfs_mark_buffer_dirty(leaf);
2373 goto out;
2374 }
2375
2376 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2377 u64 num_bytes;
2378
2379 key.offset = offset;
2380 btrfs_set_item_key_safe(fs_info, path, &key);
2381 fi = btrfs_item_ptr(leaf, path->slots[0],
2382 struct btrfs_file_extent_item);
2383 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
2384 offset;
2385 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2386 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2387 btrfs_set_file_extent_offset(leaf, fi, 0);
2388 btrfs_mark_buffer_dirty(leaf);
2389 goto out;
2390 }
2391 btrfs_release_path(path);
2392
2393 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
2394 offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
2395 if (ret)
2396 return ret;
2397
2398out:
2399 btrfs_release_path(path);
2400
2401 hole_em = alloc_extent_map();
2402 if (!hole_em) {
2403 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2404 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
2405 } else {
2406 hole_em->start = offset;
2407 hole_em->len = end - offset;
2408 hole_em->ram_bytes = hole_em->len;
2409 hole_em->orig_start = offset;
2410
2411 hole_em->block_start = EXTENT_MAP_HOLE;
2412 hole_em->block_len = 0;
2413 hole_em->orig_block_len = 0;
2414 hole_em->bdev = fs_info->fs_devices->latest_bdev;
2415 hole_em->compress_type = BTRFS_COMPRESS_NONE;
2416 hole_em->generation = trans->transid;
2417
2418 do {
2419 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2420 write_lock(&em_tree->lock);
2421 ret = add_extent_mapping(em_tree, hole_em, 1);
2422 write_unlock(&em_tree->lock);
2423 } while (ret == -EEXIST);
2424 free_extent_map(hole_em);
2425 if (ret)
2426 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2427 &inode->runtime_flags);
2428 }
2429
2430 return 0;
2431}
2432
2433
2434
2435
2436
2437
2438
2439static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2440{
2441 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2442 struct extent_map *em;
2443 int ret = 0;
2444
2445 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
2446 round_down(*start, fs_info->sectorsize),
2447 round_up(*len, fs_info->sectorsize), 0);
2448 if (IS_ERR(em))
2449 return PTR_ERR(em);
2450
2451
2452 if (em->block_start == EXTENT_MAP_HOLE) {
2453 ret = 1;
2454 *len = em->start + em->len > *start + *len ?
2455 0 : *start + *len - em->start - em->len;
2456 *start = em->start + em->len;
2457 }
2458 free_extent_map(em);
2459 return ret;
2460}
2461
2462static int btrfs_punch_hole_lock_range(struct inode *inode,
2463 const u64 lockstart,
2464 const u64 lockend,
2465 struct extent_state **cached_state)
2466{
2467 while (1) {
2468 struct btrfs_ordered_extent *ordered;
2469 int ret;
2470
2471 truncate_pagecache_range(inode, lockstart, lockend);
2472
2473 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2474 cached_state);
2475 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
2476
2477
2478
2479
2480
2481
2482 if ((!ordered ||
2483 (ordered->file_offset + ordered->len <= lockstart ||
2484 ordered->file_offset > lockend)) &&
2485 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2486 if (ordered)
2487 btrfs_put_ordered_extent(ordered);
2488 break;
2489 }
2490 if (ordered)
2491 btrfs_put_ordered_extent(ordered);
2492 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2493 lockend, cached_state);
2494 ret = btrfs_wait_ordered_range(inode, lockstart,
2495 lockend - lockstart + 1);
2496 if (ret)
2497 return ret;
2498 }
2499 return 0;
2500}
2501
2502static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2503{
2504 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2505 struct btrfs_root *root = BTRFS_I(inode)->root;
2506 struct extent_state *cached_state = NULL;
2507 struct btrfs_path *path;
2508 struct btrfs_block_rsv *rsv;
2509 struct btrfs_trans_handle *trans;
2510 u64 lockstart;
2511 u64 lockend;
2512 u64 tail_start;
2513 u64 tail_len;
2514 u64 orig_start = offset;
2515 u64 cur_offset;
2516 u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1);
2517 u64 drop_end;
2518 int ret = 0;
2519 int err = 0;
2520 unsigned int rsv_count;
2521 bool same_block;
2522 bool no_holes = btrfs_fs_incompat(fs_info, NO_HOLES);
2523 u64 ino_size;
2524 bool truncated_block = false;
2525 bool updated_inode = false;
2526
2527 ret = btrfs_wait_ordered_range(inode, offset, len);
2528 if (ret)
2529 return ret;
2530
2531 inode_lock(inode);
2532 ino_size = round_up(inode->i_size, fs_info->sectorsize);
2533 ret = find_first_non_hole(inode, &offset, &len);
2534 if (ret < 0)
2535 goto out_only_mutex;
2536 if (ret && !len) {
2537
2538 ret = 0;
2539 goto out_only_mutex;
2540 }
2541
2542 lockstart = round_up(offset, btrfs_inode_sectorsize(inode));
2543 lockend = round_down(offset + len,
2544 btrfs_inode_sectorsize(inode)) - 1;
2545 same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
2546 == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
2547
2548
2549
2550
2551
2552
2553
2554
2555 if (same_block && len < fs_info->sectorsize) {
2556 if (offset < ino_size) {
2557 truncated_block = true;
2558 ret = btrfs_truncate_block(inode, offset, len, 0);
2559 } else {
2560 ret = 0;
2561 }
2562 goto out_only_mutex;
2563 }
2564
2565
2566 if (offset < ino_size) {
2567 truncated_block = true;
2568 ret = btrfs_truncate_block(inode, offset, 0, 0);
2569 if (ret) {
2570 inode_unlock(inode);
2571 return ret;
2572 }
2573 }
2574
2575
2576
2577
2578
2579 if (offset == orig_start) {
2580
2581 len = offset + len - lockstart;
2582 offset = lockstart;
2583 ret = find_first_non_hole(inode, &offset, &len);
2584 if (ret < 0)
2585 goto out_only_mutex;
2586 if (ret && !len) {
2587 ret = 0;
2588 goto out_only_mutex;
2589 }
2590 lockstart = offset;
2591 }
2592
2593
2594 tail_start = lockend + 1;
2595 tail_len = offset + len - tail_start;
2596 if (tail_len) {
2597 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2598 if (unlikely(ret < 0))
2599 goto out_only_mutex;
2600 if (!ret) {
2601
2602 if (tail_start + tail_len < ino_size) {
2603 truncated_block = true;
2604 ret = btrfs_truncate_block(inode,
2605 tail_start + tail_len,
2606 0, 1);
2607 if (ret)
2608 goto out_only_mutex;
2609 }
2610 }
2611 }
2612
2613 if (lockend < lockstart) {
2614 ret = 0;
2615 goto out_only_mutex;
2616 }
2617
2618 ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
2619 &cached_state);
2620 if (ret) {
2621 inode_unlock(inode);
2622 goto out_only_mutex;
2623 }
2624
2625 path = btrfs_alloc_path();
2626 if (!path) {
2627 ret = -ENOMEM;
2628 goto out;
2629 }
2630
2631 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
2632 if (!rsv) {
2633 ret = -ENOMEM;
2634 goto out_free;
2635 }
2636 rsv->size = btrfs_calc_trans_metadata_size(fs_info, 1);
2637 rsv->failfast = 1;
2638
2639
2640
2641
2642
2643
2644 rsv_count = no_holes ? 2 : 3;
2645 trans = btrfs_start_transaction(root, rsv_count);
2646 if (IS_ERR(trans)) {
2647 err = PTR_ERR(trans);
2648 goto out_free;
2649 }
2650
2651 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
2652 min_size, 0);
2653 BUG_ON(ret);
2654 trans->block_rsv = rsv;
2655
2656 cur_offset = lockstart;
2657 len = lockend - cur_offset;
2658 while (cur_offset < lockend) {
2659 ret = __btrfs_drop_extents(trans, root, inode, path,
2660 cur_offset, lockend + 1,
2661 &drop_end, 1, 0, 0, NULL);
2662 if (ret != -ENOSPC)
2663 break;
2664
2665 trans->block_rsv = &fs_info->trans_block_rsv;
2666
2667 if (cur_offset < drop_end && cur_offset < ino_size) {
2668 ret = fill_holes(trans, BTRFS_I(inode), path,
2669 cur_offset, drop_end);
2670 if (ret) {
2671
2672
2673
2674
2675
2676
2677 btrfs_abort_transaction(trans, ret);
2678 err = ret;
2679 break;
2680 }
2681 }
2682
2683 cur_offset = drop_end;
2684
2685 ret = btrfs_update_inode(trans, root, inode);
2686 if (ret) {
2687 err = ret;
2688 break;
2689 }
2690
2691 btrfs_end_transaction(trans);
2692 btrfs_btree_balance_dirty(fs_info);
2693
2694 trans = btrfs_start_transaction(root, rsv_count);
2695 if (IS_ERR(trans)) {
2696 ret = PTR_ERR(trans);
2697 trans = NULL;
2698 break;
2699 }
2700
2701 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
2702 rsv, min_size, 0);
2703 BUG_ON(ret);
2704 trans->block_rsv = rsv;
2705
2706 ret = find_first_non_hole(inode, &cur_offset, &len);
2707 if (unlikely(ret < 0))
2708 break;
2709 if (ret && !len) {
2710 ret = 0;
2711 break;
2712 }
2713 }
2714
2715 if (ret) {
2716 err = ret;
2717 goto out_trans;
2718 }
2719
2720 trans->block_rsv = &fs_info->trans_block_rsv;
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732 if (drop_end <= lockend)
2733 drop_end = lockend + 1;
2734
2735
2736
2737
2738
2739 if (cur_offset < ino_size && cur_offset < drop_end) {
2740 ret = fill_holes(trans, BTRFS_I(inode), path,
2741 cur_offset, drop_end);
2742 if (ret) {
2743
2744 btrfs_abort_transaction(trans, ret);
2745 err = ret;
2746 goto out_trans;
2747 }
2748 }
2749
2750out_trans:
2751 if (!trans)
2752 goto out_free;
2753
2754 inode_inc_iversion(inode);
2755 inode->i_mtime = inode->i_ctime = current_time(inode);
2756
2757 trans->block_rsv = &fs_info->trans_block_rsv;
2758 ret = btrfs_update_inode(trans, root, inode);
2759 updated_inode = true;
2760 btrfs_end_transaction(trans);
2761 btrfs_btree_balance_dirty(fs_info);
2762out_free:
2763 btrfs_free_path(path);
2764 btrfs_free_block_rsv(fs_info, rsv);
2765out:
2766 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2767 &cached_state);
2768out_only_mutex:
2769 if (!updated_inode && truncated_block && !ret && !err) {
2770
2771
2772
2773
2774
2775
2776
2777 trans = btrfs_start_transaction(root, 1);
2778 if (IS_ERR(trans)) {
2779 err = PTR_ERR(trans);
2780 } else {
2781 err = btrfs_update_inode(trans, root, inode);
2782 ret = btrfs_end_transaction(trans);
2783 }
2784 }
2785 inode_unlock(inode);
2786 if (ret && !err)
2787 err = ret;
2788 return err;
2789}
2790
2791
2792struct falloc_range {
2793 struct list_head list;
2794 u64 start;
2795 u64 len;
2796};
2797
2798
2799
2800
2801
2802
2803
2804static int add_falloc_range(struct list_head *head, u64 start, u64 len)
2805{
2806 struct falloc_range *prev = NULL;
2807 struct falloc_range *range = NULL;
2808
2809 if (list_empty(head))
2810 goto insert;
2811
2812
2813
2814
2815
2816 prev = list_entry(head->prev, struct falloc_range, list);
2817 if (prev->start + prev->len == start) {
2818 prev->len += len;
2819 return 0;
2820 }
2821insert:
2822 range = kmalloc(sizeof(*range), GFP_KERNEL);
2823 if (!range)
2824 return -ENOMEM;
2825 range->start = start;
2826 range->len = len;
2827 list_add_tail(&range->list, head);
2828 return 0;
2829}
2830
2831static int btrfs_fallocate_update_isize(struct inode *inode,
2832 const u64 end,
2833 const int mode)
2834{
2835 struct btrfs_trans_handle *trans;
2836 struct btrfs_root *root = BTRFS_I(inode)->root;
2837 int ret;
2838 int ret2;
2839
2840 if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
2841 return 0;
2842
2843 trans = btrfs_start_transaction(root, 1);
2844 if (IS_ERR(trans))
2845 return PTR_ERR(trans);
2846
2847 inode->i_ctime = current_time(inode);
2848 i_size_write(inode, end);
2849 btrfs_ordered_update_i_size(inode, end, NULL);
2850 ret = btrfs_update_inode(trans, root, inode);
2851 ret2 = btrfs_end_transaction(trans);
2852
2853 return ret ? ret : ret2;
2854}
2855
2856enum {
2857 RANGE_BOUNDARY_WRITTEN_EXTENT = 0,
2858 RANGE_BOUNDARY_PREALLOC_EXTENT = 1,
2859 RANGE_BOUNDARY_HOLE = 2,
2860};
2861
2862static int btrfs_zero_range_check_range_boundary(struct inode *inode,
2863 u64 offset)
2864{
2865 const u64 sectorsize = btrfs_inode_sectorsize(inode);
2866 struct extent_map *em;
2867 int ret;
2868
2869 offset = round_down(offset, sectorsize);
2870 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
2871 if (IS_ERR(em))
2872 return PTR_ERR(em);
2873
2874 if (em->block_start == EXTENT_MAP_HOLE)
2875 ret = RANGE_BOUNDARY_HOLE;
2876 else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
2877 ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
2878 else
2879 ret = RANGE_BOUNDARY_WRITTEN_EXTENT;
2880
2881 free_extent_map(em);
2882 return ret;
2883}
2884
2885static int btrfs_zero_range(struct inode *inode,
2886 loff_t offset,
2887 loff_t len,
2888 const int mode)
2889{
2890 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
2891 struct extent_map *em;
2892 struct extent_changeset *data_reserved = NULL;
2893 int ret;
2894 u64 alloc_hint = 0;
2895 const u64 sectorsize = btrfs_inode_sectorsize(inode);
2896 u64 alloc_start = round_down(offset, sectorsize);
2897 u64 alloc_end = round_up(offset + len, sectorsize);
2898 u64 bytes_to_reserve = 0;
2899 bool space_reserved = false;
2900
2901 inode_dio_wait(inode);
2902
2903 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
2904 alloc_start, alloc_end - alloc_start, 0);
2905 if (IS_ERR(em)) {
2906 ret = PTR_ERR(em);
2907 goto out;
2908 }
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918 if (em->start <= alloc_start &&
2919 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
2920 const u64 em_end = em->start + em->len;
2921
2922 if (em_end >= offset + len) {
2923
2924
2925
2926
2927
2928 free_extent_map(em);
2929 ret = btrfs_fallocate_update_isize(inode, offset + len,
2930 mode);
2931 goto out;
2932 }
2933
2934
2935
2936
2937 alloc_start = em_end;
2938 ASSERT(IS_ALIGNED(alloc_start, sectorsize));
2939 len = offset + len - alloc_start;
2940 offset = alloc_start;
2941 alloc_hint = em->block_start + em->len;
2942 }
2943 free_extent_map(em);
2944
2945 if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
2946 BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
2947 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
2948 alloc_start, sectorsize, 0);
2949 if (IS_ERR(em)) {
2950 ret = PTR_ERR(em);
2951 goto out;
2952 }
2953
2954 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
2955 free_extent_map(em);
2956 ret = btrfs_fallocate_update_isize(inode, offset + len,
2957 mode);
2958 goto out;
2959 }
2960 if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
2961 free_extent_map(em);
2962 ret = btrfs_truncate_block(inode, offset, len, 0);
2963 if (!ret)
2964 ret = btrfs_fallocate_update_isize(inode,
2965 offset + len,
2966 mode);
2967 return ret;
2968 }
2969 free_extent_map(em);
2970 alloc_start = round_down(offset, sectorsize);
2971 alloc_end = alloc_start + sectorsize;
2972 goto reserve_space;
2973 }
2974
2975 alloc_start = round_up(offset, sectorsize);
2976 alloc_end = round_down(offset + len, sectorsize);
2977
2978
2979
2980
2981
2982
2983
2984 if (!IS_ALIGNED(offset, sectorsize)) {
2985 ret = btrfs_zero_range_check_range_boundary(inode, offset);
2986 if (ret < 0)
2987 goto out;
2988 if (ret == RANGE_BOUNDARY_HOLE) {
2989 alloc_start = round_down(offset, sectorsize);
2990 ret = 0;
2991 } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
2992 ret = btrfs_truncate_block(inode, offset, 0, 0);
2993 if (ret)
2994 goto out;
2995 } else {
2996 ret = 0;
2997 }
2998 }
2999
3000 if (!IS_ALIGNED(offset + len, sectorsize)) {
3001 ret = btrfs_zero_range_check_range_boundary(inode,
3002 offset + len);
3003 if (ret < 0)
3004 goto out;
3005 if (ret == RANGE_BOUNDARY_HOLE) {
3006 alloc_end = round_up(offset + len, sectorsize);
3007 ret = 0;
3008 } else if (ret == RANGE_BOUNDARY_WRITTEN_EXTENT) {
3009 ret = btrfs_truncate_block(inode, offset + len, 0, 1);
3010 if (ret)
3011 goto out;
3012 } else {
3013 ret = 0;
3014 }
3015 }
3016
3017reserve_space:
3018 if (alloc_start < alloc_end) {
3019 struct extent_state *cached_state = NULL;
3020 const u64 lockstart = alloc_start;
3021 const u64 lockend = alloc_end - 1;
3022
3023 bytes_to_reserve = alloc_end - alloc_start;
3024 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
3025 bytes_to_reserve);
3026 if (ret < 0)
3027 goto out;
3028 space_reserved = true;
3029 ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
3030 alloc_start, bytes_to_reserve);
3031 if (ret)
3032 goto out;
3033 ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
3034 &cached_state);
3035 if (ret)
3036 goto out;
3037 ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
3038 alloc_end - alloc_start,
3039 i_blocksize(inode),
3040 offset + len, &alloc_hint);
3041 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
3042 lockend, &cached_state);
3043
3044 if (ret) {
3045 space_reserved = false;
3046 goto out;
3047 }
3048 }
3049 ret = btrfs_fallocate_update_isize(inode, offset + len, mode);
3050 out:
3051 if (ret && space_reserved)
3052 btrfs_free_reserved_data_space(inode, data_reserved,
3053 alloc_start, bytes_to_reserve);
3054 extent_changeset_free(data_reserved);
3055
3056 return ret;
3057}
3058
3059static long btrfs_fallocate(struct file *file, int mode,
3060 loff_t offset, loff_t len)
3061{
3062 struct inode *inode = file_inode(file);
3063 struct extent_state *cached_state = NULL;
3064 struct extent_changeset *data_reserved = NULL;
3065 struct falloc_range *range;
3066 struct falloc_range *tmp;
3067 struct list_head reserve_list;
3068 u64 cur_offset;
3069 u64 last_byte;
3070 u64 alloc_start;
3071 u64 alloc_end;
3072 u64 alloc_hint = 0;
3073 u64 locked_end;
3074 u64 actual_end = 0;
3075 struct extent_map *em;
3076 int blocksize = btrfs_inode_sectorsize(inode);
3077 int ret;
3078
3079 alloc_start = round_down(offset, blocksize);
3080 alloc_end = round_up(offset + len, blocksize);
3081 cur_offset = alloc_start;
3082
3083
3084 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
3085 FALLOC_FL_ZERO_RANGE))
3086 return -EOPNOTSUPP;
3087
3088 if (mode & FALLOC_FL_PUNCH_HOLE)
3089 return btrfs_punch_hole(inode, offset, len);
3090
3091
3092
3093
3094
3095
3096 if (!(mode & FALLOC_FL_ZERO_RANGE)) {
3097 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
3098 alloc_end - alloc_start);
3099 if (ret < 0)
3100 return ret;
3101 }
3102
3103 inode_lock(inode);
3104
3105 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
3106 ret = inode_newsize_ok(inode, offset + len);
3107 if (ret)
3108 goto out;
3109 }
3110
3111
3112
3113
3114
3115
3116
3117
3118 if (alloc_start > inode->i_size) {
3119 ret = btrfs_cont_expand(inode, i_size_read(inode),
3120 alloc_start);
3121 if (ret)
3122 goto out;
3123 } else if (offset + len > inode->i_size) {
3124
3125
3126
3127
3128
3129 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
3130 if (ret)
3131 goto out;
3132 }
3133
3134
3135
3136
3137
3138 ret = btrfs_wait_ordered_range(inode, alloc_start,
3139 alloc_end - alloc_start);
3140 if (ret)
3141 goto out;
3142
3143 if (mode & FALLOC_FL_ZERO_RANGE) {
3144 ret = btrfs_zero_range(inode, offset, len, mode);
3145 inode_unlock(inode);
3146 return ret;
3147 }
3148
3149 locked_end = alloc_end - 1;
3150 while (1) {
3151 struct btrfs_ordered_extent *ordered;
3152
3153
3154
3155
3156 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
3157 locked_end, &cached_state);
3158 ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
3159
3160 if (ordered &&
3161 ordered->file_offset + ordered->len > alloc_start &&
3162 ordered->file_offset < alloc_end) {
3163 btrfs_put_ordered_extent(ordered);
3164 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
3165 alloc_start, locked_end,
3166 &cached_state);
3167
3168
3169
3170
3171 ret = btrfs_wait_ordered_range(inode, alloc_start,
3172 alloc_end - alloc_start);
3173 if (ret)
3174 goto out;
3175 } else {
3176 if (ordered)
3177 btrfs_put_ordered_extent(ordered);
3178 break;
3179 }
3180 }
3181
3182
3183 INIT_LIST_HEAD(&reserve_list);
3184 while (cur_offset < alloc_end) {
3185 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
3186 alloc_end - cur_offset, 0);
3187 if (IS_ERR(em)) {
3188 ret = PTR_ERR(em);
3189 break;
3190 }
3191 last_byte = min(extent_map_end(em), alloc_end);
3192 actual_end = min_t(u64, extent_map_end(em), offset + len);
3193 last_byte = ALIGN(last_byte, blocksize);
3194 if (em->block_start == EXTENT_MAP_HOLE ||
3195 (cur_offset >= inode->i_size &&
3196 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
3197 ret = add_falloc_range(&reserve_list, cur_offset,
3198 last_byte - cur_offset);
3199 if (ret < 0) {
3200 free_extent_map(em);
3201 break;
3202 }
3203 ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
3204 cur_offset, last_byte - cur_offset);
3205 if (ret < 0) {
3206 free_extent_map(em);
3207 break;
3208 }
3209 } else {
3210
3211
3212
3213
3214
3215 btrfs_free_reserved_data_space(inode, data_reserved,
3216 cur_offset, last_byte - cur_offset);
3217 }
3218 free_extent_map(em);
3219 cur_offset = last_byte;
3220 }
3221
3222
3223
3224
3225
3226 list_for_each_entry_safe(range, tmp, &reserve_list, list) {
3227 if (!ret)
3228 ret = btrfs_prealloc_file_range(inode, mode,
3229 range->start,
3230 range->len, i_blocksize(inode),
3231 offset + len, &alloc_hint);
3232 else
3233 btrfs_free_reserved_data_space(inode,
3234 data_reserved, range->start,
3235 range->len);
3236 list_del(&range->list);
3237 kfree(range);
3238 }
3239 if (ret < 0)
3240 goto out_unlock;
3241
3242
3243
3244
3245
3246 ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
3247out_unlock:
3248 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
3249 &cached_state);
3250out:
3251 inode_unlock(inode);
3252
3253 if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
3254 btrfs_free_reserved_data_space(inode, data_reserved,
3255 alloc_start, alloc_end - cur_offset);
3256 extent_changeset_free(data_reserved);
3257 return ret;
3258}
3259
3260static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
3261{
3262 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3263 struct extent_map *em = NULL;
3264 struct extent_state *cached_state = NULL;
3265 u64 lockstart;
3266 u64 lockend;
3267 u64 start;
3268 u64 len;
3269 int ret = 0;
3270
3271 if (inode->i_size == 0)
3272 return -ENXIO;
3273
3274
3275
3276
3277
3278 start = max_t(loff_t, 0, *offset);
3279
3280 lockstart = round_down(start, fs_info->sectorsize);
3281 lockend = round_up(i_size_read(inode),
3282 fs_info->sectorsize);
3283 if (lockend <= lockstart)
3284 lockend = lockstart + fs_info->sectorsize;
3285 lockend--;
3286 len = lockend - lockstart + 1;
3287
3288 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
3289 &cached_state);
3290
3291 while (start < inode->i_size) {
3292 em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
3293 start, len, 0);
3294 if (IS_ERR(em)) {
3295 ret = PTR_ERR(em);
3296 em = NULL;
3297 break;
3298 }
3299
3300 if (whence == SEEK_HOLE &&
3301 (em->block_start == EXTENT_MAP_HOLE ||
3302 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
3303 break;
3304 else if (whence == SEEK_DATA &&
3305 (em->block_start != EXTENT_MAP_HOLE &&
3306 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
3307 break;
3308
3309 start = em->start + em->len;
3310 free_extent_map(em);
3311 em = NULL;
3312 cond_resched();
3313 }
3314 free_extent_map(em);
3315 if (!ret) {
3316 if (whence == SEEK_DATA && start >= inode->i_size)
3317 ret = -ENXIO;
3318 else
3319 *offset = min_t(loff_t, start, inode->i_size);
3320 }
3321 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
3322 &cached_state);
3323 return ret;
3324}
3325
3326static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
3327{
3328 struct inode *inode = file->f_mapping->host;
3329 int ret;
3330
3331 inode_lock(inode);
3332 switch (whence) {
3333 case SEEK_END:
3334 case SEEK_CUR:
3335 offset = generic_file_llseek(file, offset, whence);
3336 goto out;
3337 case SEEK_DATA:
3338 case SEEK_HOLE:
3339 if (offset >= i_size_read(inode)) {
3340 inode_unlock(inode);
3341 return -ENXIO;
3342 }
3343
3344 ret = find_desired_extent(inode, &offset, whence);
3345 if (ret) {
3346 inode_unlock(inode);
3347 return ret;
3348 }
3349 }
3350
3351 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
3352out:
3353 inode_unlock(inode);
3354 return offset;
3355}
3356
3357static int btrfs_file_open(struct inode *inode, struct file *filp)
3358{
3359 filp->f_mode |= FMODE_NOWAIT;
3360 return generic_file_open(inode, filp);
3361}
3362
3363const struct file_operations btrfs_file_operations = {
3364 .llseek = btrfs_file_llseek,
3365 .read_iter = generic_file_read_iter,
3366 .splice_read = generic_file_splice_read,
3367 .write_iter = btrfs_file_write_iter,
3368 .mmap = btrfs_file_mmap,
3369 .open = btrfs_file_open,
3370 .release = btrfs_release_file,
3371 .fsync = btrfs_sync_file,
3372 .fallocate = btrfs_fallocate,
3373 .unlocked_ioctl = btrfs_ioctl,
3374#ifdef CONFIG_COMPAT
3375 .compat_ioctl = btrfs_compat_ioctl,
3376#endif
3377 .clone_file_range = btrfs_clone_file_range,
3378 .dedupe_file_range = btrfs_dedupe_file_range,
3379};
3380
3381void btrfs_auto_defrag_exit(void)
3382{
3383 kmem_cache_destroy(btrfs_inode_defrag_cachep);
3384}
3385
3386int __init btrfs_auto_defrag_init(void)
3387{
3388 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
3389 sizeof(struct inode_defrag), 0,
3390 SLAB_MEM_SPREAD,
3391 NULL);
3392 if (!btrfs_inode_defrag_cachep)
3393 return -ENOMEM;
3394
3395 return 0;
3396}
3397
3398int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
3399{
3400 int ret;
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3417 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
3418 &BTRFS_I(inode)->runtime_flags))
3419 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3420
3421 return ret;
3422}
3423