1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/backing-dev.h>
26#include <linux/mpage.h>
27#include <linux/falloc.h>
28#include <linux/swap.h>
29#include <linux/writeback.h>
30#include <linux/statfs.h>
31#include <linux/compat.h>
32#include <linux/slab.h>
33#include <linux/btrfs.h>
34#include <linux/uio.h>
35#include "ctree.h"
36#include "disk-io.h"
37#include "transaction.h"
38#include "btrfs_inode.h"
39#include "print-tree.h"
40#include "tree-log.h"
41#include "locking.h"
42#include "volumes.h"
43#include "qgroup.h"
44
45static struct kmem_cache *btrfs_inode_defrag_cachep;
46
47
48
49
50
51struct inode_defrag {
52 struct rb_node rb_node;
53
54 u64 ino;
55
56
57
58
59 u64 transid;
60
61
62 u64 root;
63
64
65 u64 last_offset;
66
67
68 int cycled;
69};
70
71static int __compare_inode_defrag(struct inode_defrag *defrag1,
72 struct inode_defrag *defrag2)
73{
74 if (defrag1->root > defrag2->root)
75 return 1;
76 else if (defrag1->root < defrag2->root)
77 return -1;
78 else if (defrag1->ino > defrag2->ino)
79 return 1;
80 else if (defrag1->ino < defrag2->ino)
81 return -1;
82 else
83 return 0;
84}
85
86
87
88
89
90
91
92
93
94
95static int __btrfs_add_inode_defrag(struct inode *inode,
96 struct inode_defrag *defrag)
97{
98 struct btrfs_root *root = BTRFS_I(inode)->root;
99 struct inode_defrag *entry;
100 struct rb_node **p;
101 struct rb_node *parent = NULL;
102 int ret;
103
104 p = &root->fs_info->defrag_inodes.rb_node;
105 while (*p) {
106 parent = *p;
107 entry = rb_entry(parent, struct inode_defrag, rb_node);
108
109 ret = __compare_inode_defrag(defrag, entry);
110 if (ret < 0)
111 p = &parent->rb_left;
112 else if (ret > 0)
113 p = &parent->rb_right;
114 else {
115
116
117
118
119 if (defrag->transid < entry->transid)
120 entry->transid = defrag->transid;
121 if (defrag->last_offset > entry->last_offset)
122 entry->last_offset = defrag->last_offset;
123 return -EEXIST;
124 }
125 }
126 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
127 rb_link_node(&defrag->rb_node, parent, p);
128 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
129 return 0;
130}
131
132static inline int __need_auto_defrag(struct btrfs_root *root)
133{
134 if (!btrfs_test_opt(root, AUTO_DEFRAG))
135 return 0;
136
137 if (btrfs_fs_closing(root->fs_info))
138 return 0;
139
140 return 1;
141}
142
143
144
145
146
147int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
148 struct inode *inode)
149{
150 struct btrfs_root *root = BTRFS_I(inode)->root;
151 struct inode_defrag *defrag;
152 u64 transid;
153 int ret;
154
155 if (!__need_auto_defrag(root))
156 return 0;
157
158 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
159 return 0;
160
161 if (trans)
162 transid = trans->transid;
163 else
164 transid = BTRFS_I(inode)->root->last_trans;
165
166 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
167 if (!defrag)
168 return -ENOMEM;
169
170 defrag->ino = btrfs_ino(inode);
171 defrag->transid = transid;
172 defrag->root = root->root_key.objectid;
173
174 spin_lock(&root->fs_info->defrag_inodes_lock);
175 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
176
177
178
179
180
181 ret = __btrfs_add_inode_defrag(inode, defrag);
182 if (ret)
183 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
184 } else {
185 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
186 }
187 spin_unlock(&root->fs_info->defrag_inodes_lock);
188 return 0;
189}
190
191
192
193
194
195
196static void btrfs_requeue_inode_defrag(struct inode *inode,
197 struct inode_defrag *defrag)
198{
199 struct btrfs_root *root = BTRFS_I(inode)->root;
200 int ret;
201
202 if (!__need_auto_defrag(root))
203 goto out;
204
205
206
207
208
209 spin_lock(&root->fs_info->defrag_inodes_lock);
210 ret = __btrfs_add_inode_defrag(inode, defrag);
211 spin_unlock(&root->fs_info->defrag_inodes_lock);
212 if (ret)
213 goto out;
214 return;
215out:
216 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
217}
218
219
220
221
222
223static struct inode_defrag *
224btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
225{
226 struct inode_defrag *entry = NULL;
227 struct inode_defrag tmp;
228 struct rb_node *p;
229 struct rb_node *parent = NULL;
230 int ret;
231
232 tmp.ino = ino;
233 tmp.root = root;
234
235 spin_lock(&fs_info->defrag_inodes_lock);
236 p = fs_info->defrag_inodes.rb_node;
237 while (p) {
238 parent = p;
239 entry = rb_entry(parent, struct inode_defrag, rb_node);
240
241 ret = __compare_inode_defrag(&tmp, entry);
242 if (ret < 0)
243 p = parent->rb_left;
244 else if (ret > 0)
245 p = parent->rb_right;
246 else
247 goto out;
248 }
249
250 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
251 parent = rb_next(parent);
252 if (parent)
253 entry = rb_entry(parent, struct inode_defrag, rb_node);
254 else
255 entry = NULL;
256 }
257out:
258 if (entry)
259 rb_erase(parent, &fs_info->defrag_inodes);
260 spin_unlock(&fs_info->defrag_inodes_lock);
261 return entry;
262}
263
264void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
265{
266 struct inode_defrag *defrag;
267 struct rb_node *node;
268
269 spin_lock(&fs_info->defrag_inodes_lock);
270 node = rb_first(&fs_info->defrag_inodes);
271 while (node) {
272 rb_erase(node, &fs_info->defrag_inodes);
273 defrag = rb_entry(node, struct inode_defrag, rb_node);
274 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
275
276 cond_resched_lock(&fs_info->defrag_inodes_lock);
277
278 node = rb_first(&fs_info->defrag_inodes);
279 }
280 spin_unlock(&fs_info->defrag_inodes_lock);
281}
282
283#define BTRFS_DEFRAG_BATCH 1024
284
285static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
286 struct inode_defrag *defrag)
287{
288 struct btrfs_root *inode_root;
289 struct inode *inode;
290 struct btrfs_key key;
291 struct btrfs_ioctl_defrag_range_args range;
292 int num_defrag;
293 int index;
294 int ret;
295
296
297 key.objectid = defrag->root;
298 key.type = BTRFS_ROOT_ITEM_KEY;
299 key.offset = (u64)-1;
300
301 index = srcu_read_lock(&fs_info->subvol_srcu);
302
303 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
304 if (IS_ERR(inode_root)) {
305 ret = PTR_ERR(inode_root);
306 goto cleanup;
307 }
308
309 key.objectid = defrag->ino;
310 key.type = BTRFS_INODE_ITEM_KEY;
311 key.offset = 0;
312 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
313 if (IS_ERR(inode)) {
314 ret = PTR_ERR(inode);
315 goto cleanup;
316 }
317 srcu_read_unlock(&fs_info->subvol_srcu, index);
318
319
320 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
321 memset(&range, 0, sizeof(range));
322 range.len = (u64)-1;
323 range.start = defrag->last_offset;
324
325 sb_start_write(fs_info->sb);
326 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
327 BTRFS_DEFRAG_BATCH);
328 sb_end_write(fs_info->sb);
329
330
331
332
333
334 if (num_defrag == BTRFS_DEFRAG_BATCH) {
335 defrag->last_offset = range.start;
336 btrfs_requeue_inode_defrag(inode, defrag);
337 } else if (defrag->last_offset && !defrag->cycled) {
338
339
340
341
342
343 defrag->last_offset = 0;
344 defrag->cycled = 1;
345 btrfs_requeue_inode_defrag(inode, defrag);
346 } else {
347 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
348 }
349
350 iput(inode);
351 return 0;
352cleanup:
353 srcu_read_unlock(&fs_info->subvol_srcu, index);
354 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
355 return ret;
356}
357
358
359
360
361
362int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
363{
364 struct inode_defrag *defrag;
365 u64 first_ino = 0;
366 u64 root_objectid = 0;
367
368 atomic_inc(&fs_info->defrag_running);
369 while (1) {
370
371 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
372 &fs_info->fs_state))
373 break;
374
375 if (!__need_auto_defrag(fs_info->tree_root))
376 break;
377
378
379 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
380 first_ino);
381 if (!defrag) {
382 if (root_objectid || first_ino) {
383 root_objectid = 0;
384 first_ino = 0;
385 continue;
386 } else {
387 break;
388 }
389 }
390
391 first_ino = defrag->ino + 1;
392 root_objectid = defrag->root;
393
394 __btrfs_run_defrag_inode(fs_info, defrag);
395 }
396 atomic_dec(&fs_info->defrag_running);
397
398
399
400
401
402 wake_up(&fs_info->transaction_wait);
403 return 0;
404}
405
406
407
408
409static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
410 size_t write_bytes,
411 struct page **prepared_pages,
412 struct iov_iter *i)
413{
414 size_t copied = 0;
415 size_t total_copied = 0;
416 int pg = 0;
417 int offset = pos & (PAGE_CACHE_SIZE - 1);
418
419 while (write_bytes > 0) {
420 size_t count = min_t(size_t,
421 PAGE_CACHE_SIZE - offset, write_bytes);
422 struct page *page = prepared_pages[pg];
423
424
425
426 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
427
428
429 flush_dcache_page(page);
430
431
432
433
434
435
436
437
438
439
440 if (!PageUptodate(page) && copied < count)
441 copied = 0;
442
443 iov_iter_advance(i, copied);
444 write_bytes -= copied;
445 total_copied += copied;
446
447
448 if (unlikely(copied == 0))
449 break;
450
451 if (copied < PAGE_CACHE_SIZE - offset) {
452 offset += copied;
453 } else {
454 pg++;
455 offset = 0;
456 }
457 }
458 return total_copied;
459}
460
461
462
463
464static void btrfs_drop_pages(struct page **pages, size_t num_pages)
465{
466 size_t i;
467 for (i = 0; i < num_pages; i++) {
468
469
470
471
472
473
474 ClearPageChecked(pages[i]);
475 unlock_page(pages[i]);
476 page_cache_release(pages[i]);
477 }
478}
479
480
481
482
483
484
485
486
487
488int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
489 struct page **pages, size_t num_pages,
490 loff_t pos, size_t write_bytes,
491 struct extent_state **cached)
492{
493 int err = 0;
494 int i;
495 u64 num_bytes;
496 u64 start_pos;
497 u64 end_of_last_block;
498 u64 end_pos = pos + write_bytes;
499 loff_t isize = i_size_read(inode);
500
501 start_pos = pos & ~((u64)root->sectorsize - 1);
502 num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
503
504 end_of_last_block = start_pos + num_bytes - 1;
505 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
506 cached);
507 if (err)
508 return err;
509
510 for (i = 0; i < num_pages; i++) {
511 struct page *p = pages[i];
512 SetPageUptodate(p);
513 ClearPageChecked(p);
514 set_page_dirty(p);
515 }
516
517
518
519
520
521
522 if (end_pos > isize)
523 i_size_write(inode, end_pos);
524 return 0;
525}
526
527
528
529
530
531void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
532 int skip_pinned)
533{
534 struct extent_map *em;
535 struct extent_map *split = NULL;
536 struct extent_map *split2 = NULL;
537 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
538 u64 len = end - start + 1;
539 u64 gen;
540 int ret;
541 int testend = 1;
542 unsigned long flags;
543 int compressed = 0;
544 bool modified;
545
546 WARN_ON(end < start);
547 if (end == (u64)-1) {
548 len = (u64)-1;
549 testend = 0;
550 }
551 while (1) {
552 int no_splits = 0;
553
554 modified = false;
555 if (!split)
556 split = alloc_extent_map();
557 if (!split2)
558 split2 = alloc_extent_map();
559 if (!split || !split2)
560 no_splits = 1;
561
562 write_lock(&em_tree->lock);
563 em = lookup_extent_mapping(em_tree, start, len);
564 if (!em) {
565 write_unlock(&em_tree->lock);
566 break;
567 }
568 flags = em->flags;
569 gen = em->generation;
570 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
571 if (testend && em->start + em->len >= start + len) {
572 free_extent_map(em);
573 write_unlock(&em_tree->lock);
574 break;
575 }
576 start = em->start + em->len;
577 if (testend)
578 len = start + len - (em->start + em->len);
579 free_extent_map(em);
580 write_unlock(&em_tree->lock);
581 continue;
582 }
583 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
584 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
585 clear_bit(EXTENT_FLAG_LOGGING, &flags);
586 modified = !list_empty(&em->list);
587 if (no_splits)
588 goto next;
589
590 if (em->start < start) {
591 split->start = em->start;
592 split->len = start - em->start;
593
594 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
595 split->orig_start = em->orig_start;
596 split->block_start = em->block_start;
597
598 if (compressed)
599 split->block_len = em->block_len;
600 else
601 split->block_len = split->len;
602 split->orig_block_len = max(split->block_len,
603 em->orig_block_len);
604 split->ram_bytes = em->ram_bytes;
605 } else {
606 split->orig_start = split->start;
607 split->block_len = 0;
608 split->block_start = em->block_start;
609 split->orig_block_len = 0;
610 split->ram_bytes = split->len;
611 }
612
613 split->generation = gen;
614 split->bdev = em->bdev;
615 split->flags = flags;
616 split->compress_type = em->compress_type;
617 replace_extent_mapping(em_tree, em, split, modified);
618 free_extent_map(split);
619 split = split2;
620 split2 = NULL;
621 }
622 if (testend && em->start + em->len > start + len) {
623 u64 diff = start + len - em->start;
624
625 split->start = start + len;
626 split->len = em->start + em->len - (start + len);
627 split->bdev = em->bdev;
628 split->flags = flags;
629 split->compress_type = em->compress_type;
630 split->generation = gen;
631
632 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
633 split->orig_block_len = max(em->block_len,
634 em->orig_block_len);
635
636 split->ram_bytes = em->ram_bytes;
637 if (compressed) {
638 split->block_len = em->block_len;
639 split->block_start = em->block_start;
640 split->orig_start = em->orig_start;
641 } else {
642 split->block_len = split->len;
643 split->block_start = em->block_start
644 + diff;
645 split->orig_start = em->orig_start;
646 }
647 } else {
648 split->ram_bytes = split->len;
649 split->orig_start = split->start;
650 split->block_len = 0;
651 split->block_start = em->block_start;
652 split->orig_block_len = 0;
653 }
654
655 if (extent_map_in_tree(em)) {
656 replace_extent_mapping(em_tree, em, split,
657 modified);
658 } else {
659 ret = add_extent_mapping(em_tree, split,
660 modified);
661 ASSERT(ret == 0);
662 }
663 free_extent_map(split);
664 split = NULL;
665 }
666next:
667 if (extent_map_in_tree(em))
668 remove_extent_mapping(em_tree, em);
669 write_unlock(&em_tree->lock);
670
671
672 free_extent_map(em);
673
674 free_extent_map(em);
675 }
676 if (split)
677 free_extent_map(split);
678 if (split2)
679 free_extent_map(split2);
680}
681
682
683
684
685
686
687
688
689
690
691int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
692 struct btrfs_root *root, struct inode *inode,
693 struct btrfs_path *path, u64 start, u64 end,
694 u64 *drop_end, int drop_cache,
695 int replace_extent,
696 u32 extent_item_size,
697 int *key_inserted)
698{
699 struct extent_buffer *leaf;
700 struct btrfs_file_extent_item *fi;
701 struct btrfs_key key;
702 struct btrfs_key new_key;
703 u64 ino = btrfs_ino(inode);
704 u64 search_start = start;
705 u64 disk_bytenr = 0;
706 u64 num_bytes = 0;
707 u64 extent_offset = 0;
708 u64 extent_end = 0;
709 int del_nr = 0;
710 int del_slot = 0;
711 int extent_type;
712 int recow;
713 int ret;
714 int modify_tree = -1;
715 int update_refs;
716 int found = 0;
717 int leafs_visited = 0;
718
719 if (drop_cache)
720 btrfs_drop_extent_cache(inode, start, end - 1, 0);
721
722 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
723 modify_tree = 0;
724
725 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
726 root == root->fs_info->tree_root);
727 while (1) {
728 recow = 0;
729 ret = btrfs_lookup_file_extent(trans, root, path, ino,
730 search_start, modify_tree);
731 if (ret < 0)
732 break;
733 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
734 leaf = path->nodes[0];
735 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
736 if (key.objectid == ino &&
737 key.type == BTRFS_EXTENT_DATA_KEY)
738 path->slots[0]--;
739 }
740 ret = 0;
741 leafs_visited++;
742next_slot:
743 leaf = path->nodes[0];
744 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
745 BUG_ON(del_nr > 0);
746 ret = btrfs_next_leaf(root, path);
747 if (ret < 0)
748 break;
749 if (ret > 0) {
750 ret = 0;
751 break;
752 }
753 leafs_visited++;
754 leaf = path->nodes[0];
755 recow = 1;
756 }
757
758 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
759 if (key.objectid > ino ||
760 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
761 break;
762
763 fi = btrfs_item_ptr(leaf, path->slots[0],
764 struct btrfs_file_extent_item);
765 extent_type = btrfs_file_extent_type(leaf, fi);
766
767 if (extent_type == BTRFS_FILE_EXTENT_REG ||
768 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
769 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
770 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
771 extent_offset = btrfs_file_extent_offset(leaf, fi);
772 extent_end = key.offset +
773 btrfs_file_extent_num_bytes(leaf, fi);
774 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
775 extent_end = key.offset +
776 btrfs_file_extent_inline_len(leaf,
777 path->slots[0], fi);
778 } else {
779 WARN_ON(1);
780 extent_end = search_start;
781 }
782
783
784
785
786
787
788
789
790
791
792 if (extent_end == key.offset && extent_end >= search_start)
793 goto delete_extent_item;
794
795 if (extent_end <= search_start) {
796 path->slots[0]++;
797 goto next_slot;
798 }
799
800 found = 1;
801 search_start = max(key.offset, start);
802 if (recow || !modify_tree) {
803 modify_tree = -1;
804 btrfs_release_path(path);
805 continue;
806 }
807
808
809
810
811
812 if (start > key.offset && end < extent_end) {
813 BUG_ON(del_nr > 0);
814 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
815 ret = -EOPNOTSUPP;
816 break;
817 }
818
819 memcpy(&new_key, &key, sizeof(new_key));
820 new_key.offset = start;
821 ret = btrfs_duplicate_item(trans, root, path,
822 &new_key);
823 if (ret == -EAGAIN) {
824 btrfs_release_path(path);
825 continue;
826 }
827 if (ret < 0)
828 break;
829
830 leaf = path->nodes[0];
831 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
832 struct btrfs_file_extent_item);
833 btrfs_set_file_extent_num_bytes(leaf, fi,
834 start - key.offset);
835
836 fi = btrfs_item_ptr(leaf, path->slots[0],
837 struct btrfs_file_extent_item);
838
839 extent_offset += start - key.offset;
840 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
841 btrfs_set_file_extent_num_bytes(leaf, fi,
842 extent_end - start);
843 btrfs_mark_buffer_dirty(leaf);
844
845 if (update_refs && disk_bytenr > 0) {
846 ret = btrfs_inc_extent_ref(trans, root,
847 disk_bytenr, num_bytes, 0,
848 root->root_key.objectid,
849 new_key.objectid,
850 start - extent_offset, 1);
851 BUG_ON(ret);
852 }
853 key.offset = start;
854 }
855
856
857
858
859 if (start <= key.offset && end < extent_end) {
860 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
861 ret = -EOPNOTSUPP;
862 break;
863 }
864
865 memcpy(&new_key, &key, sizeof(new_key));
866 new_key.offset = end;
867 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
868
869 extent_offset += end - key.offset;
870 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
871 btrfs_set_file_extent_num_bytes(leaf, fi,
872 extent_end - end);
873 btrfs_mark_buffer_dirty(leaf);
874 if (update_refs && disk_bytenr > 0)
875 inode_sub_bytes(inode, end - key.offset);
876 break;
877 }
878
879 search_start = extent_end;
880
881
882
883
884 if (start > key.offset && end >= extent_end) {
885 BUG_ON(del_nr > 0);
886 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
887 ret = -EOPNOTSUPP;
888 break;
889 }
890
891 btrfs_set_file_extent_num_bytes(leaf, fi,
892 start - key.offset);
893 btrfs_mark_buffer_dirty(leaf);
894 if (update_refs && disk_bytenr > 0)
895 inode_sub_bytes(inode, extent_end - start);
896 if (end == extent_end)
897 break;
898
899 path->slots[0]++;
900 goto next_slot;
901 }
902
903
904
905
906
907 if (start <= key.offset && end >= extent_end) {
908delete_extent_item:
909 if (del_nr == 0) {
910 del_slot = path->slots[0];
911 del_nr = 1;
912 } else {
913 BUG_ON(del_slot + del_nr != path->slots[0]);
914 del_nr++;
915 }
916
917 if (update_refs &&
918 extent_type == BTRFS_FILE_EXTENT_INLINE) {
919 inode_sub_bytes(inode,
920 extent_end - key.offset);
921 extent_end = ALIGN(extent_end,
922 root->sectorsize);
923 } else if (update_refs && disk_bytenr > 0) {
924 ret = btrfs_free_extent(trans, root,
925 disk_bytenr, num_bytes, 0,
926 root->root_key.objectid,
927 key.objectid, key.offset -
928 extent_offset, 0);
929 BUG_ON(ret);
930 inode_sub_bytes(inode,
931 extent_end - key.offset);
932 }
933
934 if (end == extent_end)
935 break;
936
937 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
938 path->slots[0]++;
939 goto next_slot;
940 }
941
942 ret = btrfs_del_items(trans, root, path, del_slot,
943 del_nr);
944 if (ret) {
945 btrfs_abort_transaction(trans, root, ret);
946 break;
947 }
948
949 del_nr = 0;
950 del_slot = 0;
951
952 btrfs_release_path(path);
953 continue;
954 }
955
956 BUG_ON(1);
957 }
958
959 if (!ret && del_nr > 0) {
960
961
962
963
964
965
966 path->slots[0] = del_slot;
967 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
968 if (ret)
969 btrfs_abort_transaction(trans, root, ret);
970 }
971
972 leaf = path->nodes[0];
973
974
975
976
977
978 if (!ret && replace_extent && leafs_visited == 1 &&
979 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
980 path->locks[0] == BTRFS_WRITE_LOCK) &&
981 btrfs_leaf_free_space(root, leaf) >=
982 sizeof(struct btrfs_item) + extent_item_size) {
983
984 key.objectid = ino;
985 key.type = BTRFS_EXTENT_DATA_KEY;
986 key.offset = start;
987 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
988 struct btrfs_key slot_key;
989
990 btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
991 if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
992 path->slots[0]++;
993 }
994 setup_items_for_insert(root, path, &key,
995 &extent_item_size,
996 extent_item_size,
997 sizeof(struct btrfs_item) +
998 extent_item_size, 1);
999 *key_inserted = 1;
1000 }
1001
1002 if (!replace_extent || !(*key_inserted))
1003 btrfs_release_path(path);
1004 if (drop_end)
1005 *drop_end = found ? min(end, extent_end) : end;
1006 return ret;
1007}
1008
1009int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1010 struct btrfs_root *root, struct inode *inode, u64 start,
1011 u64 end, int drop_cache)
1012{
1013 struct btrfs_path *path;
1014 int ret;
1015
1016 path = btrfs_alloc_path();
1017 if (!path)
1018 return -ENOMEM;
1019 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
1020 drop_cache, 0, 0, NULL);
1021 btrfs_free_path(path);
1022 return ret;
1023}
1024
1025static int extent_mergeable(struct extent_buffer *leaf, int slot,
1026 u64 objectid, u64 bytenr, u64 orig_offset,
1027 u64 *start, u64 *end)
1028{
1029 struct btrfs_file_extent_item *fi;
1030 struct btrfs_key key;
1031 u64 extent_end;
1032
1033 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1034 return 0;
1035
1036 btrfs_item_key_to_cpu(leaf, &key, slot);
1037 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
1038 return 0;
1039
1040 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1041 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
1042 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
1043 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
1044 btrfs_file_extent_compression(leaf, fi) ||
1045 btrfs_file_extent_encryption(leaf, fi) ||
1046 btrfs_file_extent_other_encoding(leaf, fi))
1047 return 0;
1048
1049 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1050 if ((*start && *start != key.offset) || (*end && *end != extent_end))
1051 return 0;
1052
1053 *start = key.offset;
1054 *end = extent_end;
1055 return 1;
1056}
1057
1058
1059
1060
1061
1062
1063
1064
1065int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1066 struct inode *inode, u64 start, u64 end)
1067{
1068 struct btrfs_root *root = BTRFS_I(inode)->root;
1069 struct extent_buffer *leaf;
1070 struct btrfs_path *path;
1071 struct btrfs_file_extent_item *fi;
1072 struct btrfs_key key;
1073 struct btrfs_key new_key;
1074 u64 bytenr;
1075 u64 num_bytes;
1076 u64 extent_end;
1077 u64 orig_offset;
1078 u64 other_start;
1079 u64 other_end;
1080 u64 split;
1081 int del_nr = 0;
1082 int del_slot = 0;
1083 int recow;
1084 int ret;
1085 u64 ino = btrfs_ino(inode);
1086
1087 path = btrfs_alloc_path();
1088 if (!path)
1089 return -ENOMEM;
1090again:
1091 recow = 0;
1092 split = start;
1093 key.objectid = ino;
1094 key.type = BTRFS_EXTENT_DATA_KEY;
1095 key.offset = split;
1096
1097 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1098 if (ret < 0)
1099 goto out;
1100 if (ret > 0 && path->slots[0] > 0)
1101 path->slots[0]--;
1102
1103 leaf = path->nodes[0];
1104 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1105 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
1106 fi = btrfs_item_ptr(leaf, path->slots[0],
1107 struct btrfs_file_extent_item);
1108 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
1109 BTRFS_FILE_EXTENT_PREALLOC);
1110 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1111 BUG_ON(key.offset > start || extent_end < end);
1112
1113 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1114 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1115 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
1116 memcpy(&new_key, &key, sizeof(new_key));
1117
1118 if (start == key.offset && end < extent_end) {
1119 other_start = 0;
1120 other_end = start;
1121 if (extent_mergeable(leaf, path->slots[0] - 1,
1122 ino, bytenr, orig_offset,
1123 &other_start, &other_end)) {
1124 new_key.offset = end;
1125 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1126 fi = btrfs_item_ptr(leaf, path->slots[0],
1127 struct btrfs_file_extent_item);
1128 btrfs_set_file_extent_generation(leaf, fi,
1129 trans->transid);
1130 btrfs_set_file_extent_num_bytes(leaf, fi,
1131 extent_end - end);
1132 btrfs_set_file_extent_offset(leaf, fi,
1133 end - orig_offset);
1134 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1135 struct btrfs_file_extent_item);
1136 btrfs_set_file_extent_generation(leaf, fi,
1137 trans->transid);
1138 btrfs_set_file_extent_num_bytes(leaf, fi,
1139 end - other_start);
1140 btrfs_mark_buffer_dirty(leaf);
1141 goto out;
1142 }
1143 }
1144
1145 if (start > key.offset && end == extent_end) {
1146 other_start = end;
1147 other_end = 0;
1148 if (extent_mergeable(leaf, path->slots[0] + 1,
1149 ino, bytenr, orig_offset,
1150 &other_start, &other_end)) {
1151 fi = btrfs_item_ptr(leaf, path->slots[0],
1152 struct btrfs_file_extent_item);
1153 btrfs_set_file_extent_num_bytes(leaf, fi,
1154 start - key.offset);
1155 btrfs_set_file_extent_generation(leaf, fi,
1156 trans->transid);
1157 path->slots[0]++;
1158 new_key.offset = start;
1159 btrfs_set_item_key_safe(root->fs_info, path, &new_key);
1160
1161 fi = btrfs_item_ptr(leaf, path->slots[0],
1162 struct btrfs_file_extent_item);
1163 btrfs_set_file_extent_generation(leaf, fi,
1164 trans->transid);
1165 btrfs_set_file_extent_num_bytes(leaf, fi,
1166 other_end - start);
1167 btrfs_set_file_extent_offset(leaf, fi,
1168 start - orig_offset);
1169 btrfs_mark_buffer_dirty(leaf);
1170 goto out;
1171 }
1172 }
1173
1174 while (start > key.offset || end < extent_end) {
1175 if (key.offset == start)
1176 split = end;
1177
1178 new_key.offset = split;
1179 ret = btrfs_duplicate_item(trans, root, path, &new_key);
1180 if (ret == -EAGAIN) {
1181 btrfs_release_path(path);
1182 goto again;
1183 }
1184 if (ret < 0) {
1185 btrfs_abort_transaction(trans, root, ret);
1186 goto out;
1187 }
1188
1189 leaf = path->nodes[0];
1190 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1191 struct btrfs_file_extent_item);
1192 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1193 btrfs_set_file_extent_num_bytes(leaf, fi,
1194 split - key.offset);
1195
1196 fi = btrfs_item_ptr(leaf, path->slots[0],
1197 struct btrfs_file_extent_item);
1198
1199 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1200 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
1201 btrfs_set_file_extent_num_bytes(leaf, fi,
1202 extent_end - split);
1203 btrfs_mark_buffer_dirty(leaf);
1204
1205 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
1206 root->root_key.objectid,
1207 ino, orig_offset, 1);
1208 BUG_ON(ret);
1209
1210 if (split == start) {
1211 key.offset = start;
1212 } else {
1213 BUG_ON(start != key.offset);
1214 path->slots[0]--;
1215 extent_end = end;
1216 }
1217 recow = 1;
1218 }
1219
1220 other_start = end;
1221 other_end = 0;
1222 if (extent_mergeable(leaf, path->slots[0] + 1,
1223 ino, bytenr, orig_offset,
1224 &other_start, &other_end)) {
1225 if (recow) {
1226 btrfs_release_path(path);
1227 goto again;
1228 }
1229 extent_end = other_end;
1230 del_slot = path->slots[0] + 1;
1231 del_nr++;
1232 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1233 0, root->root_key.objectid,
1234 ino, orig_offset, 0);
1235 BUG_ON(ret);
1236 }
1237 other_start = 0;
1238 other_end = start;
1239 if (extent_mergeable(leaf, path->slots[0] - 1,
1240 ino, bytenr, orig_offset,
1241 &other_start, &other_end)) {
1242 if (recow) {
1243 btrfs_release_path(path);
1244 goto again;
1245 }
1246 key.offset = other_start;
1247 del_slot = path->slots[0];
1248 del_nr++;
1249 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1250 0, root->root_key.objectid,
1251 ino, orig_offset, 0);
1252 BUG_ON(ret);
1253 }
1254 if (del_nr == 0) {
1255 fi = btrfs_item_ptr(leaf, path->slots[0],
1256 struct btrfs_file_extent_item);
1257 btrfs_set_file_extent_type(leaf, fi,
1258 BTRFS_FILE_EXTENT_REG);
1259 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1260 btrfs_mark_buffer_dirty(leaf);
1261 } else {
1262 fi = btrfs_item_ptr(leaf, del_slot - 1,
1263 struct btrfs_file_extent_item);
1264 btrfs_set_file_extent_type(leaf, fi,
1265 BTRFS_FILE_EXTENT_REG);
1266 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1267 btrfs_set_file_extent_num_bytes(leaf, fi,
1268 extent_end - key.offset);
1269 btrfs_mark_buffer_dirty(leaf);
1270
1271 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1272 if (ret < 0) {
1273 btrfs_abort_transaction(trans, root, ret);
1274 goto out;
1275 }
1276 }
1277out:
1278 btrfs_free_path(path);
1279 return 0;
1280}
1281
1282
1283
1284
1285
1286static int prepare_uptodate_page(struct page *page, u64 pos,
1287 bool force_uptodate)
1288{
1289 int ret = 0;
1290
1291 if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
1292 !PageUptodate(page)) {
1293 ret = btrfs_readpage(NULL, page);
1294 if (ret)
1295 return ret;
1296 lock_page(page);
1297 if (!PageUptodate(page)) {
1298 unlock_page(page);
1299 return -EIO;
1300 }
1301 }
1302 return 0;
1303}
1304
1305
1306
1307
1308static noinline int prepare_pages(struct inode *inode, struct page **pages,
1309 size_t num_pages, loff_t pos,
1310 size_t write_bytes, bool force_uptodate)
1311{
1312 int i;
1313 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1314 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1315 int err = 0;
1316 int faili;
1317
1318 for (i = 0; i < num_pages; i++) {
1319 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1320 mask | __GFP_WRITE);
1321 if (!pages[i]) {
1322 faili = i - 1;
1323 err = -ENOMEM;
1324 goto fail;
1325 }
1326
1327 if (i == 0)
1328 err = prepare_uptodate_page(pages[i], pos,
1329 force_uptodate);
1330 if (i == num_pages - 1)
1331 err = prepare_uptodate_page(pages[i],
1332 pos + write_bytes, false);
1333 if (err) {
1334 page_cache_release(pages[i]);
1335 faili = i - 1;
1336 goto fail;
1337 }
1338 wait_on_page_writeback(pages[i]);
1339 }
1340
1341 return 0;
1342fail:
1343 while (faili >= 0) {
1344 unlock_page(pages[faili]);
1345 page_cache_release(pages[faili]);
1346 faili--;
1347 }
1348 return err;
1349
1350}
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362static noinline int
1363lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1364 size_t num_pages, loff_t pos,
1365 u64 *lockstart, u64 *lockend,
1366 struct extent_state **cached_state)
1367{
1368 u64 start_pos;
1369 u64 last_pos;
1370 int i;
1371 int ret = 0;
1372
1373 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1374 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
1375
1376 if (start_pos < inode->i_size) {
1377 struct btrfs_ordered_extent *ordered;
1378 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1379 start_pos, last_pos, 0, cached_state);
1380 ordered = btrfs_lookup_ordered_range(inode, start_pos,
1381 last_pos - start_pos + 1);
1382 if (ordered &&
1383 ordered->file_offset + ordered->len > start_pos &&
1384 ordered->file_offset <= last_pos) {
1385 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1386 start_pos, last_pos,
1387 cached_state, GFP_NOFS);
1388 for (i = 0; i < num_pages; i++) {
1389 unlock_page(pages[i]);
1390 page_cache_release(pages[i]);
1391 }
1392 btrfs_start_ordered_extent(inode, ordered, 1);
1393 btrfs_put_ordered_extent(ordered);
1394 return -EAGAIN;
1395 }
1396 if (ordered)
1397 btrfs_put_ordered_extent(ordered);
1398
1399 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
1400 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
1401 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1402 0, 0, cached_state, GFP_NOFS);
1403 *lockstart = start_pos;
1404 *lockend = last_pos;
1405 ret = 1;
1406 }
1407
1408 for (i = 0; i < num_pages; i++) {
1409 if (clear_page_dirty_for_io(pages[i]))
1410 account_page_redirty(pages[i]);
1411 set_page_extent_mapped(pages[i]);
1412 WARN_ON(!PageLocked(pages[i]));
1413 }
1414
1415 return ret;
1416}
1417
1418static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1419 size_t *write_bytes)
1420{
1421 struct btrfs_root *root = BTRFS_I(inode)->root;
1422 struct btrfs_ordered_extent *ordered;
1423 u64 lockstart, lockend;
1424 u64 num_bytes;
1425 int ret;
1426
1427 ret = btrfs_start_write_no_snapshoting(root);
1428 if (!ret)
1429 return -ENOSPC;
1430
1431 lockstart = round_down(pos, root->sectorsize);
1432 lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
1433
1434 while (1) {
1435 lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1436 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1437 lockend - lockstart + 1);
1438 if (!ordered) {
1439 break;
1440 }
1441 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1442 btrfs_start_ordered_extent(inode, ordered, 1);
1443 btrfs_put_ordered_extent(ordered);
1444 }
1445
1446 num_bytes = lockend - lockstart + 1;
1447 ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
1448 if (ret <= 0) {
1449 ret = 0;
1450 btrfs_end_write_no_snapshoting(root);
1451 } else {
1452 *write_bytes = min_t(size_t, *write_bytes ,
1453 num_bytes - pos + lockstart);
1454 }
1455
1456 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1457
1458 return ret;
1459}
1460
1461static noinline ssize_t __btrfs_buffered_write(struct file *file,
1462 struct iov_iter *i,
1463 loff_t pos)
1464{
1465 struct inode *inode = file_inode(file);
1466 struct btrfs_root *root = BTRFS_I(inode)->root;
1467 struct page **pages = NULL;
1468 struct extent_state *cached_state = NULL;
1469 u64 release_bytes = 0;
1470 u64 lockstart;
1471 u64 lockend;
1472 unsigned long first_index;
1473 size_t num_written = 0;
1474 int nrptrs;
1475 int ret = 0;
1476 bool only_release_metadata = false;
1477 bool force_page_uptodate = false;
1478 bool need_unlock;
1479
1480 nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
1481 PAGE_CACHE_SIZE / (sizeof(struct page *)));
1482 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1483 nrptrs = max(nrptrs, 8);
1484 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1485 if (!pages)
1486 return -ENOMEM;
1487
1488 first_index = pos >> PAGE_CACHE_SHIFT;
1489
1490 while (iov_iter_count(i) > 0) {
1491 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1492 size_t write_bytes = min(iov_iter_count(i),
1493 nrptrs * (size_t)PAGE_CACHE_SIZE -
1494 offset);
1495 size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
1496 PAGE_CACHE_SIZE);
1497 size_t reserve_bytes;
1498 size_t dirty_pages;
1499 size_t copied;
1500
1501 WARN_ON(num_pages > nrptrs);
1502
1503
1504
1505
1506
1507 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1508 ret = -EFAULT;
1509 break;
1510 }
1511
1512 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1513 ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
1514 if (ret == -ENOSPC &&
1515 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1516 BTRFS_INODE_PREALLOC))) {
1517 ret = check_can_nocow(inode, pos, &write_bytes);
1518 if (ret > 0) {
1519 only_release_metadata = true;
1520
1521
1522
1523
1524 num_pages = DIV_ROUND_UP(write_bytes + offset,
1525 PAGE_CACHE_SIZE);
1526 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1527 ret = 0;
1528 } else {
1529 ret = -ENOSPC;
1530 }
1531 }
1532
1533 if (ret)
1534 break;
1535
1536 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
1537 if (ret) {
1538 if (!only_release_metadata)
1539 btrfs_free_reserved_data_space(inode,
1540 reserve_bytes);
1541 else
1542 btrfs_end_write_no_snapshoting(root);
1543 break;
1544 }
1545
1546 release_bytes = reserve_bytes;
1547 need_unlock = false;
1548again:
1549
1550
1551
1552
1553
1554 ret = prepare_pages(inode, pages, num_pages,
1555 pos, write_bytes,
1556 force_page_uptodate);
1557 if (ret)
1558 break;
1559
1560 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1561 pos, &lockstart, &lockend,
1562 &cached_state);
1563 if (ret < 0) {
1564 if (ret == -EAGAIN)
1565 goto again;
1566 break;
1567 } else if (ret > 0) {
1568 need_unlock = true;
1569 ret = 0;
1570 }
1571
1572 copied = btrfs_copy_from_user(pos, num_pages,
1573 write_bytes, pages, i);
1574
1575
1576
1577
1578
1579 if (copied < write_bytes)
1580 nrptrs = 1;
1581
1582 if (copied == 0) {
1583 force_page_uptodate = true;
1584 dirty_pages = 0;
1585 } else {
1586 force_page_uptodate = false;
1587 dirty_pages = DIV_ROUND_UP(copied + offset,
1588 PAGE_CACHE_SIZE);
1589 }
1590
1591
1592
1593
1594
1595
1596
1597
1598 if (num_pages > dirty_pages) {
1599 release_bytes = (num_pages - dirty_pages) <<
1600 PAGE_CACHE_SHIFT;
1601 if (copied > 0) {
1602 spin_lock(&BTRFS_I(inode)->lock);
1603 BTRFS_I(inode)->outstanding_extents++;
1604 spin_unlock(&BTRFS_I(inode)->lock);
1605 }
1606 if (only_release_metadata)
1607 btrfs_delalloc_release_metadata(inode,
1608 release_bytes);
1609 else
1610 btrfs_delalloc_release_space(inode,
1611 release_bytes);
1612 }
1613
1614 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
1615
1616 if (copied > 0)
1617 ret = btrfs_dirty_pages(root, inode, pages,
1618 dirty_pages, pos, copied,
1619 NULL);
1620 if (need_unlock)
1621 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1622 lockstart, lockend, &cached_state,
1623 GFP_NOFS);
1624 if (ret) {
1625 btrfs_drop_pages(pages, num_pages);
1626 break;
1627 }
1628
1629 release_bytes = 0;
1630 if (only_release_metadata)
1631 btrfs_end_write_no_snapshoting(root);
1632
1633 if (only_release_metadata && copied > 0) {
1634 lockstart = round_down(pos, root->sectorsize);
1635 lockend = lockstart +
1636 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1637
1638 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1639 lockend, EXTENT_NORESERVE, NULL,
1640 NULL, GFP_NOFS);
1641 only_release_metadata = false;
1642 }
1643
1644 btrfs_drop_pages(pages, num_pages);
1645
1646 cond_resched();
1647
1648 balance_dirty_pages_ratelimited(inode->i_mapping);
1649 if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1)
1650 btrfs_btree_balance_dirty(root);
1651
1652 pos += copied;
1653 num_written += copied;
1654 }
1655
1656 kfree(pages);
1657
1658 if (release_bytes) {
1659 if (only_release_metadata) {
1660 btrfs_end_write_no_snapshoting(root);
1661 btrfs_delalloc_release_metadata(inode, release_bytes);
1662 } else {
1663 btrfs_delalloc_release_space(inode, release_bytes);
1664 }
1665 }
1666
1667 return num_written ? num_written : ret;
1668}
1669
1670static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1671 struct iov_iter *from,
1672 loff_t pos)
1673{
1674 struct file *file = iocb->ki_filp;
1675 struct inode *inode = file_inode(file);
1676 ssize_t written;
1677 ssize_t written_buffered;
1678 loff_t endbyte;
1679 int err;
1680
1681 written = generic_file_direct_write(iocb, from, pos);
1682
1683 if (written < 0 || !iov_iter_count(from))
1684 return written;
1685
1686 pos += written;
1687 written_buffered = __btrfs_buffered_write(file, from, pos);
1688 if (written_buffered < 0) {
1689 err = written_buffered;
1690 goto out;
1691 }
1692
1693
1694
1695
1696 endbyte = pos + written_buffered - 1;
1697 err = btrfs_fdatawrite_range(inode, pos, endbyte);
1698 if (err)
1699 goto out;
1700 err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
1701 if (err)
1702 goto out;
1703 written += written_buffered;
1704 iocb->ki_pos = pos + written_buffered;
1705 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1706 endbyte >> PAGE_CACHE_SHIFT);
1707out:
1708 return written ? written : err;
1709}
1710
1711static void update_time_for_write(struct inode *inode)
1712{
1713 struct timespec now;
1714
1715 if (IS_NOCMTIME(inode))
1716 return;
1717
1718 now = current_fs_time(inode->i_sb);
1719 if (!timespec_equal(&inode->i_mtime, &now))
1720 inode->i_mtime = now;
1721
1722 if (!timespec_equal(&inode->i_ctime, &now))
1723 inode->i_ctime = now;
1724
1725 if (IS_I_VERSION(inode))
1726 inode_inc_iversion(inode);
1727}
1728
1729static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1730 struct iov_iter *from)
1731{
1732 struct file *file = iocb->ki_filp;
1733 struct inode *inode = file_inode(file);
1734 struct btrfs_root *root = BTRFS_I(inode)->root;
1735 u64 start_pos;
1736 u64 end_pos;
1737 ssize_t num_written = 0;
1738 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1739 ssize_t err;
1740 loff_t pos;
1741 size_t count;
1742
1743 mutex_lock(&inode->i_mutex);
1744 err = generic_write_checks(iocb, from);
1745 if (err <= 0) {
1746 mutex_unlock(&inode->i_mutex);
1747 return err;
1748 }
1749
1750 current->backing_dev_info = inode_to_bdi(inode);
1751 err = file_remove_suid(file);
1752 if (err) {
1753 mutex_unlock(&inode->i_mutex);
1754 goto out;
1755 }
1756
1757
1758
1759
1760
1761
1762
1763 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1764 mutex_unlock(&inode->i_mutex);
1765 err = -EROFS;
1766 goto out;
1767 }
1768
1769
1770
1771
1772
1773
1774
1775 update_time_for_write(inode);
1776
1777 pos = iocb->ki_pos;
1778 count = iov_iter_count(from);
1779 start_pos = round_down(pos, root->sectorsize);
1780 if (start_pos > i_size_read(inode)) {
1781
1782 end_pos = round_up(pos + count, root->sectorsize);
1783 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
1784 if (err) {
1785 mutex_unlock(&inode->i_mutex);
1786 goto out;
1787 }
1788 }
1789
1790 if (sync)
1791 atomic_inc(&BTRFS_I(inode)->sync_writers);
1792
1793 if (iocb->ki_flags & IOCB_DIRECT) {
1794 num_written = __btrfs_direct_write(iocb, from, pos);
1795 } else {
1796 num_written = __btrfs_buffered_write(file, from, pos);
1797 if (num_written > 0)
1798 iocb->ki_pos = pos + num_written;
1799 }
1800
1801 mutex_unlock(&inode->i_mutex);
1802
1803
1804
1805
1806
1807
1808 spin_lock(&BTRFS_I(inode)->lock);
1809 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1810 spin_unlock(&BTRFS_I(inode)->lock);
1811 if (num_written > 0) {
1812 err = generic_write_sync(file, pos, num_written);
1813 if (err < 0)
1814 num_written = err;
1815 }
1816
1817 if (sync)
1818 atomic_dec(&BTRFS_I(inode)->sync_writers);
1819out:
1820 current->backing_dev_info = NULL;
1821 return num_written ? num_written : err;
1822}
1823
1824int btrfs_release_file(struct inode *inode, struct file *filp)
1825{
1826 if (filp->private_data)
1827 btrfs_ioctl_trans_end(filp);
1828
1829
1830
1831
1832
1833
1834 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1835 &BTRFS_I(inode)->runtime_flags))
1836 filemap_flush(inode->i_mapping);
1837 return 0;
1838}
1839
1840static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
1841{
1842 int ret;
1843
1844 atomic_inc(&BTRFS_I(inode)->sync_writers);
1845 ret = btrfs_fdatawrite_range(inode, start, end);
1846 atomic_dec(&BTRFS_I(inode)->sync_writers);
1847
1848 return ret;
1849}
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1863{
1864 struct dentry *dentry = file->f_path.dentry;
1865 struct inode *inode = d_inode(dentry);
1866 struct btrfs_root *root = BTRFS_I(inode)->root;
1867 struct btrfs_trans_handle *trans;
1868 struct btrfs_log_ctx ctx;
1869 int ret = 0;
1870 bool full_sync = 0;
1871
1872 trace_btrfs_sync_file(file, datasync);
1873
1874
1875
1876
1877
1878
1879
1880 ret = start_ordered_ops(inode, start, end);
1881 if (ret)
1882 return ret;
1883
1884 mutex_lock(&inode->i_mutex);
1885 atomic_inc(&root->log_batch);
1886 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1887 &BTRFS_I(inode)->runtime_flags);
1888
1889
1890
1891
1892 if (full_sync) {
1893
1894
1895
1896
1897
1898
1899 ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
1900 } else {
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933 ret = start_ordered_ops(inode, start, end);
1934 }
1935 if (ret) {
1936 mutex_unlock(&inode->i_mutex);
1937 goto out;
1938 }
1939 atomic_inc(&root->log_batch);
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969 smp_mb();
1970 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1971 (full_sync && BTRFS_I(inode)->last_trans <=
1972 root->fs_info->last_trans_committed)) {
1973
1974
1975
1976
1977
1978 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1979 &BTRFS_I(inode)->runtime_flags);
1980 mutex_unlock(&inode->i_mutex);
1981 goto out;
1982 }
1983
1984
1985
1986
1987 if (file->private_data)
1988 btrfs_ioctl_trans_end(file);
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001 trans = btrfs_start_transaction(root, 0);
2002 if (IS_ERR(trans)) {
2003 ret = PTR_ERR(trans);
2004 mutex_unlock(&inode->i_mutex);
2005 goto out;
2006 }
2007 trans->sync = true;
2008
2009 btrfs_init_log_ctx(&ctx);
2010
2011 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2012 if (ret < 0) {
2013
2014 ret = 1;
2015 }
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027 mutex_unlock(&inode->i_mutex);
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042 if (ctx.io_err) {
2043 btrfs_end_transaction(trans, root);
2044 ret = ctx.io_err;
2045 goto out;
2046 }
2047
2048 if (ret != BTRFS_NO_LOG_SYNC) {
2049 if (!ret) {
2050 ret = btrfs_sync_log(trans, root, &ctx);
2051 if (!ret) {
2052 ret = btrfs_end_transaction(trans, root);
2053 goto out;
2054 }
2055 }
2056 if (!full_sync) {
2057 ret = btrfs_wait_ordered_range(inode, start,
2058 end - start + 1);
2059 if (ret) {
2060 btrfs_end_transaction(trans, root);
2061 goto out;
2062 }
2063 }
2064 ret = btrfs_commit_transaction(trans, root);
2065 } else {
2066 ret = btrfs_end_transaction(trans, root);
2067 }
2068out:
2069 return ret > 0 ? -EIO : ret;
2070}
2071
2072static const struct vm_operations_struct btrfs_file_vm_ops = {
2073 .fault = filemap_fault,
2074 .map_pages = filemap_map_pages,
2075 .page_mkwrite = btrfs_page_mkwrite,
2076};
2077
2078static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
2079{
2080 struct address_space *mapping = filp->f_mapping;
2081
2082 if (!mapping->a_ops->readpage)
2083 return -ENOEXEC;
2084
2085 file_accessed(filp);
2086 vma->vm_ops = &btrfs_file_vm_ops;
2087
2088 return 0;
2089}
2090
2091static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
2092 int slot, u64 start, u64 end)
2093{
2094 struct btrfs_file_extent_item *fi;
2095 struct btrfs_key key;
2096
2097 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
2098 return 0;
2099
2100 btrfs_item_key_to_cpu(leaf, &key, slot);
2101 if (key.objectid != btrfs_ino(inode) ||
2102 key.type != BTRFS_EXTENT_DATA_KEY)
2103 return 0;
2104
2105 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
2106
2107 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2108 return 0;
2109
2110 if (btrfs_file_extent_disk_bytenr(leaf, fi))
2111 return 0;
2112
2113 if (key.offset == end)
2114 return 1;
2115 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
2116 return 1;
2117 return 0;
2118}
2119
2120static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2121 struct btrfs_path *path, u64 offset, u64 end)
2122{
2123 struct btrfs_root *root = BTRFS_I(inode)->root;
2124 struct extent_buffer *leaf;
2125 struct btrfs_file_extent_item *fi;
2126 struct extent_map *hole_em;
2127 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2128 struct btrfs_key key;
2129 int ret;
2130
2131 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
2132 goto out;
2133
2134 key.objectid = btrfs_ino(inode);
2135 key.type = BTRFS_EXTENT_DATA_KEY;
2136 key.offset = offset;
2137
2138 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2139 if (ret < 0)
2140 return ret;
2141 BUG_ON(!ret);
2142
2143 leaf = path->nodes[0];
2144 if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
2145 u64 num_bytes;
2146
2147 path->slots[0]--;
2148 fi = btrfs_item_ptr(leaf, path->slots[0],
2149 struct btrfs_file_extent_item);
2150 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
2151 end - offset;
2152 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2153 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2154 btrfs_set_file_extent_offset(leaf, fi, 0);
2155 btrfs_mark_buffer_dirty(leaf);
2156 goto out;
2157 }
2158
2159 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2160 u64 num_bytes;
2161
2162 key.offset = offset;
2163 btrfs_set_item_key_safe(root->fs_info, path, &key);
2164 fi = btrfs_item_ptr(leaf, path->slots[0],
2165 struct btrfs_file_extent_item);
2166 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
2167 offset;
2168 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2169 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2170 btrfs_set_file_extent_offset(leaf, fi, 0);
2171 btrfs_mark_buffer_dirty(leaf);
2172 goto out;
2173 }
2174 btrfs_release_path(path);
2175
2176 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
2177 0, 0, end - offset, 0, end - offset,
2178 0, 0, 0);
2179 if (ret)
2180 return ret;
2181
2182out:
2183 btrfs_release_path(path);
2184
2185 hole_em = alloc_extent_map();
2186 if (!hole_em) {
2187 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2188 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2189 &BTRFS_I(inode)->runtime_flags);
2190 } else {
2191 hole_em->start = offset;
2192 hole_em->len = end - offset;
2193 hole_em->ram_bytes = hole_em->len;
2194 hole_em->orig_start = offset;
2195
2196 hole_em->block_start = EXTENT_MAP_HOLE;
2197 hole_em->block_len = 0;
2198 hole_em->orig_block_len = 0;
2199 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
2200 hole_em->compress_type = BTRFS_COMPRESS_NONE;
2201 hole_em->generation = trans->transid;
2202
2203 do {
2204 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2205 write_lock(&em_tree->lock);
2206 ret = add_extent_mapping(em_tree, hole_em, 1);
2207 write_unlock(&em_tree->lock);
2208 } while (ret == -EEXIST);
2209 free_extent_map(hole_em);
2210 if (ret)
2211 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2212 &BTRFS_I(inode)->runtime_flags);
2213 }
2214
2215 return 0;
2216}
2217
2218
2219
2220
2221
2222
2223
2224static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2225{
2226 struct extent_map *em;
2227 int ret = 0;
2228
2229 em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
2230 if (IS_ERR_OR_NULL(em)) {
2231 if (!em)
2232 ret = -ENOMEM;
2233 else
2234 ret = PTR_ERR(em);
2235 return ret;
2236 }
2237
2238
2239 if (em->block_start == EXTENT_MAP_HOLE) {
2240 ret = 1;
2241 *len = em->start + em->len > *start + *len ?
2242 0 : *start + *len - em->start - em->len;
2243 *start = em->start + em->len;
2244 }
2245 free_extent_map(em);
2246 return ret;
2247}
2248
2249static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2250{
2251 struct btrfs_root *root = BTRFS_I(inode)->root;
2252 struct extent_state *cached_state = NULL;
2253 struct btrfs_path *path;
2254 struct btrfs_block_rsv *rsv;
2255 struct btrfs_trans_handle *trans;
2256 u64 lockstart;
2257 u64 lockend;
2258 u64 tail_start;
2259 u64 tail_len;
2260 u64 orig_start = offset;
2261 u64 cur_offset;
2262 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
2263 u64 drop_end;
2264 int ret = 0;
2265 int err = 0;
2266 int rsv_count;
2267 bool same_page;
2268 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2269 u64 ino_size;
2270 bool truncated_page = false;
2271 bool updated_inode = false;
2272
2273 ret = btrfs_wait_ordered_range(inode, offset, len);
2274 if (ret)
2275 return ret;
2276
2277 mutex_lock(&inode->i_mutex);
2278 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
2279 ret = find_first_non_hole(inode, &offset, &len);
2280 if (ret < 0)
2281 goto out_only_mutex;
2282 if (ret && !len) {
2283
2284 ret = 0;
2285 goto out_only_mutex;
2286 }
2287
2288 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2289 lockend = round_down(offset + len,
2290 BTRFS_I(inode)->root->sectorsize) - 1;
2291 same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2292 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302 if (same_page && len < PAGE_CACHE_SIZE) {
2303 if (offset < ino_size) {
2304 truncated_page = true;
2305 ret = btrfs_truncate_page(inode, offset, len, 0);
2306 } else {
2307 ret = 0;
2308 }
2309 goto out_only_mutex;
2310 }
2311
2312
2313 if (offset < ino_size) {
2314 truncated_page = true;
2315 ret = btrfs_truncate_page(inode, offset, 0, 0);
2316 if (ret) {
2317 mutex_unlock(&inode->i_mutex);
2318 return ret;
2319 }
2320 }
2321
2322
2323
2324
2325
2326 if (offset == orig_start) {
2327
2328 len = offset + len - lockstart;
2329 offset = lockstart;
2330 ret = find_first_non_hole(inode, &offset, &len);
2331 if (ret < 0)
2332 goto out_only_mutex;
2333 if (ret && !len) {
2334 ret = 0;
2335 goto out_only_mutex;
2336 }
2337 lockstart = offset;
2338 }
2339
2340
2341 tail_start = lockend + 1;
2342 tail_len = offset + len - tail_start;
2343 if (tail_len) {
2344 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2345 if (unlikely(ret < 0))
2346 goto out_only_mutex;
2347 if (!ret) {
2348
2349 if (tail_start + tail_len < ino_size) {
2350 truncated_page = true;
2351 ret = btrfs_truncate_page(inode,
2352 tail_start + tail_len, 0, 1);
2353 if (ret)
2354 goto out_only_mutex;
2355 }
2356 }
2357 }
2358
2359 if (lockend < lockstart) {
2360 ret = 0;
2361 goto out_only_mutex;
2362 }
2363
2364 while (1) {
2365 struct btrfs_ordered_extent *ordered;
2366
2367 truncate_pagecache_range(inode, lockstart, lockend);
2368
2369 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2370 0, &cached_state);
2371 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
2372
2373
2374
2375
2376
2377
2378 if ((!ordered ||
2379 (ordered->file_offset + ordered->len <= lockstart ||
2380 ordered->file_offset > lockend)) &&
2381 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2382 if (ordered)
2383 btrfs_put_ordered_extent(ordered);
2384 break;
2385 }
2386 if (ordered)
2387 btrfs_put_ordered_extent(ordered);
2388 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2389 lockend, &cached_state, GFP_NOFS);
2390 ret = btrfs_wait_ordered_range(inode, lockstart,
2391 lockend - lockstart + 1);
2392 if (ret) {
2393 mutex_unlock(&inode->i_mutex);
2394 return ret;
2395 }
2396 }
2397
2398 path = btrfs_alloc_path();
2399 if (!path) {
2400 ret = -ENOMEM;
2401 goto out;
2402 }
2403
2404 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
2405 if (!rsv) {
2406 ret = -ENOMEM;
2407 goto out_free;
2408 }
2409 rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
2410 rsv->failfast = 1;
2411
2412
2413
2414
2415
2416
2417 rsv_count = no_holes ? 2 : 3;
2418 trans = btrfs_start_transaction(root, rsv_count);
2419 if (IS_ERR(trans)) {
2420 err = PTR_ERR(trans);
2421 goto out_free;
2422 }
2423
2424 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
2425 min_size);
2426 BUG_ON(ret);
2427 trans->block_rsv = rsv;
2428
2429 cur_offset = lockstart;
2430 len = lockend - cur_offset;
2431 while (cur_offset < lockend) {
2432 ret = __btrfs_drop_extents(trans, root, inode, path,
2433 cur_offset, lockend + 1,
2434 &drop_end, 1, 0, 0, NULL);
2435 if (ret != -ENOSPC)
2436 break;
2437
2438 trans->block_rsv = &root->fs_info->trans_block_rsv;
2439
2440 if (cur_offset < ino_size) {
2441 ret = fill_holes(trans, inode, path, cur_offset,
2442 drop_end);
2443 if (ret) {
2444 err = ret;
2445 break;
2446 }
2447 }
2448
2449 cur_offset = drop_end;
2450
2451 ret = btrfs_update_inode(trans, root, inode);
2452 if (ret) {
2453 err = ret;
2454 break;
2455 }
2456
2457 btrfs_end_transaction(trans, root);
2458 btrfs_btree_balance_dirty(root);
2459
2460 trans = btrfs_start_transaction(root, rsv_count);
2461 if (IS_ERR(trans)) {
2462 ret = PTR_ERR(trans);
2463 trans = NULL;
2464 break;
2465 }
2466
2467 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
2468 rsv, min_size);
2469 BUG_ON(ret);
2470 trans->block_rsv = rsv;
2471
2472 ret = find_first_non_hole(inode, &cur_offset, &len);
2473 if (unlikely(ret < 0))
2474 break;
2475 if (ret && !len) {
2476 ret = 0;
2477 break;
2478 }
2479 }
2480
2481 if (ret) {
2482 err = ret;
2483 goto out_trans;
2484 }
2485
2486 trans->block_rsv = &root->fs_info->trans_block_rsv;
2487
2488
2489
2490
2491
2492 if (cur_offset < ino_size && cur_offset < drop_end) {
2493 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
2494 if (ret) {
2495 err = ret;
2496 goto out_trans;
2497 }
2498 }
2499
2500out_trans:
2501 if (!trans)
2502 goto out_free;
2503
2504 inode_inc_iversion(inode);
2505 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2506
2507 trans->block_rsv = &root->fs_info->trans_block_rsv;
2508 ret = btrfs_update_inode(trans, root, inode);
2509 updated_inode = true;
2510 btrfs_end_transaction(trans, root);
2511 btrfs_btree_balance_dirty(root);
2512out_free:
2513 btrfs_free_path(path);
2514 btrfs_free_block_rsv(root, rsv);
2515out:
2516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2517 &cached_state, GFP_NOFS);
2518out_only_mutex:
2519 if (!updated_inode && truncated_page && !ret && !err) {
2520
2521
2522
2523
2524
2525
2526
2527 trans = btrfs_start_transaction(root, 1);
2528 if (IS_ERR(trans)) {
2529 err = PTR_ERR(trans);
2530 } else {
2531 err = btrfs_update_inode(trans, root, inode);
2532 ret = btrfs_end_transaction(trans, root);
2533 }
2534 }
2535 mutex_unlock(&inode->i_mutex);
2536 if (ret && !err)
2537 err = ret;
2538 return err;
2539}
2540
2541static long btrfs_fallocate(struct file *file, int mode,
2542 loff_t offset, loff_t len)
2543{
2544 struct inode *inode = file_inode(file);
2545 struct extent_state *cached_state = NULL;
2546 u64 cur_offset;
2547 u64 last_byte;
2548 u64 alloc_start;
2549 u64 alloc_end;
2550 u64 alloc_hint = 0;
2551 u64 locked_end;
2552 struct extent_map *em;
2553 int blocksize = BTRFS_I(inode)->root->sectorsize;
2554 int ret;
2555
2556 alloc_start = round_down(offset, blocksize);
2557 alloc_end = round_up(offset + len, blocksize);
2558
2559
2560 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2561 return -EOPNOTSUPP;
2562
2563 if (mode & FALLOC_FL_PUNCH_HOLE)
2564 return btrfs_punch_hole(inode, offset, len);
2565
2566
2567
2568
2569
2570 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
2571 if (ret)
2572 return ret;
2573
2574 mutex_lock(&inode->i_mutex);
2575 ret = inode_newsize_ok(inode, alloc_end);
2576 if (ret)
2577 goto out;
2578
2579 if (alloc_start > inode->i_size) {
2580 ret = btrfs_cont_expand(inode, i_size_read(inode),
2581 alloc_start);
2582 if (ret)
2583 goto out;
2584 } else {
2585
2586
2587
2588
2589
2590 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
2591 if (ret)
2592 goto out;
2593 }
2594
2595
2596
2597
2598
2599 ret = btrfs_wait_ordered_range(inode, alloc_start,
2600 alloc_end - alloc_start);
2601 if (ret)
2602 goto out;
2603
2604 locked_end = alloc_end - 1;
2605 while (1) {
2606 struct btrfs_ordered_extent *ordered;
2607
2608
2609
2610
2611 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
2612 locked_end, 0, &cached_state);
2613 ordered = btrfs_lookup_first_ordered_extent(inode,
2614 alloc_end - 1);
2615 if (ordered &&
2616 ordered->file_offset + ordered->len > alloc_start &&
2617 ordered->file_offset < alloc_end) {
2618 btrfs_put_ordered_extent(ordered);
2619 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2620 alloc_start, locked_end,
2621 &cached_state, GFP_NOFS);
2622
2623
2624
2625
2626 ret = btrfs_wait_ordered_range(inode, alloc_start,
2627 alloc_end - alloc_start);
2628 if (ret)
2629 goto out;
2630 } else {
2631 if (ordered)
2632 btrfs_put_ordered_extent(ordered);
2633 break;
2634 }
2635 }
2636
2637 cur_offset = alloc_start;
2638 while (1) {
2639 u64 actual_end;
2640
2641 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
2642 alloc_end - cur_offset, 0);
2643 if (IS_ERR_OR_NULL(em)) {
2644 if (!em)
2645 ret = -ENOMEM;
2646 else
2647 ret = PTR_ERR(em);
2648 break;
2649 }
2650 last_byte = min(extent_map_end(em), alloc_end);
2651 actual_end = min_t(u64, extent_map_end(em), offset + len);
2652 last_byte = ALIGN(last_byte, blocksize);
2653
2654 if (em->block_start == EXTENT_MAP_HOLE ||
2655 (cur_offset >= inode->i_size &&
2656 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
2657 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
2658 last_byte - cur_offset,
2659 1 << inode->i_blkbits,
2660 offset + len,
2661 &alloc_hint);
2662 } else if (actual_end > inode->i_size &&
2663 !(mode & FALLOC_FL_KEEP_SIZE)) {
2664 struct btrfs_trans_handle *trans;
2665 struct btrfs_root *root = BTRFS_I(inode)->root;
2666
2667
2668
2669
2670
2671
2672 trans = btrfs_start_transaction(root, 1);
2673 if (IS_ERR(trans)) {
2674 ret = PTR_ERR(trans);
2675 } else {
2676 inode->i_ctime = CURRENT_TIME;
2677 i_size_write(inode, actual_end);
2678 btrfs_ordered_update_i_size(inode, actual_end,
2679 NULL);
2680 ret = btrfs_update_inode(trans, root, inode);
2681 if (ret)
2682 btrfs_end_transaction(trans, root);
2683 else
2684 ret = btrfs_end_transaction(trans,
2685 root);
2686 }
2687 }
2688 free_extent_map(em);
2689 if (ret < 0)
2690 break;
2691
2692 cur_offset = last_byte;
2693 if (cur_offset >= alloc_end) {
2694 ret = 0;
2695 break;
2696 }
2697 }
2698 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2699 &cached_state, GFP_NOFS);
2700out:
2701 mutex_unlock(&inode->i_mutex);
2702
2703 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2704 return ret;
2705}
2706
2707static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2708{
2709 struct btrfs_root *root = BTRFS_I(inode)->root;
2710 struct extent_map *em = NULL;
2711 struct extent_state *cached_state = NULL;
2712 u64 lockstart;
2713 u64 lockend;
2714 u64 start;
2715 u64 len;
2716 int ret = 0;
2717
2718 if (inode->i_size == 0)
2719 return -ENXIO;
2720
2721
2722
2723
2724
2725 start = max_t(loff_t, 0, *offset);
2726
2727 lockstart = round_down(start, root->sectorsize);
2728 lockend = round_up(i_size_read(inode), root->sectorsize);
2729 if (lockend <= lockstart)
2730 lockend = lockstart + root->sectorsize;
2731 lockend--;
2732 len = lockend - lockstart + 1;
2733
2734 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
2735 &cached_state);
2736
2737 while (start < inode->i_size) {
2738 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
2739 if (IS_ERR(em)) {
2740 ret = PTR_ERR(em);
2741 em = NULL;
2742 break;
2743 }
2744
2745 if (whence == SEEK_HOLE &&
2746 (em->block_start == EXTENT_MAP_HOLE ||
2747 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2748 break;
2749 else if (whence == SEEK_DATA &&
2750 (em->block_start != EXTENT_MAP_HOLE &&
2751 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2752 break;
2753
2754 start = em->start + em->len;
2755 free_extent_map(em);
2756 em = NULL;
2757 cond_resched();
2758 }
2759 free_extent_map(em);
2760 if (!ret) {
2761 if (whence == SEEK_DATA && start >= inode->i_size)
2762 ret = -ENXIO;
2763 else
2764 *offset = min_t(loff_t, start, inode->i_size);
2765 }
2766 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2767 &cached_state, GFP_NOFS);
2768 return ret;
2769}
2770
2771static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
2772{
2773 struct inode *inode = file->f_mapping->host;
2774 int ret;
2775
2776 mutex_lock(&inode->i_mutex);
2777 switch (whence) {
2778 case SEEK_END:
2779 case SEEK_CUR:
2780 offset = generic_file_llseek(file, offset, whence);
2781 goto out;
2782 case SEEK_DATA:
2783 case SEEK_HOLE:
2784 if (offset >= i_size_read(inode)) {
2785 mutex_unlock(&inode->i_mutex);
2786 return -ENXIO;
2787 }
2788
2789 ret = find_desired_extent(inode, &offset, whence);
2790 if (ret) {
2791 mutex_unlock(&inode->i_mutex);
2792 return ret;
2793 }
2794 }
2795
2796 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
2797out:
2798 mutex_unlock(&inode->i_mutex);
2799 return offset;
2800}
2801
2802const struct file_operations btrfs_file_operations = {
2803 .llseek = btrfs_file_llseek,
2804 .read_iter = generic_file_read_iter,
2805 .splice_read = generic_file_splice_read,
2806 .write_iter = btrfs_file_write_iter,
2807 .mmap = btrfs_file_mmap,
2808 .open = generic_file_open,
2809 .release = btrfs_release_file,
2810 .fsync = btrfs_sync_file,
2811 .fallocate = btrfs_fallocate,
2812 .unlocked_ioctl = btrfs_ioctl,
2813#ifdef CONFIG_COMPAT
2814 .compat_ioctl = btrfs_ioctl,
2815#endif
2816};
2817
2818void btrfs_auto_defrag_exit(void)
2819{
2820 if (btrfs_inode_defrag_cachep)
2821 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2822}
2823
2824int btrfs_auto_defrag_init(void)
2825{
2826 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
2827 sizeof(struct inode_defrag), 0,
2828 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
2829 NULL);
2830 if (!btrfs_inode_defrag_cachep)
2831 return -ENOMEM;
2832
2833 return 0;
2834}
2835
2836int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
2837{
2838 int ret;
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
2855 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
2856 &BTRFS_I(inode)->runtime_flags))
2857 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
2858
2859 return ret;
2860}
2861