1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/backing-dev.h>
26#include <linux/mpage.h>
27#include <linux/aio.h>
28#include <linux/falloc.h>
29#include <linux/swap.h>
30#include <linux/writeback.h>
31#include <linux/statfs.h>
32#include <linux/compat.h>
33#include <linux/slab.h>
34#include <linux/btrfs.h>
35#include "ctree.h"
36#include "disk-io.h"
37#include "transaction.h"
38#include "btrfs_inode.h"
39#include "print-tree.h"
40#include "tree-log.h"
41#include "locking.h"
42#include "volumes.h"
43#include "qgroup.h"
44
45static struct kmem_cache *btrfs_inode_defrag_cachep;
46
47
48
49
50
51struct inode_defrag {
52 struct rb_node rb_node;
53
54 u64 ino;
55
56
57
58
59 u64 transid;
60
61
62 u64 root;
63
64
65 u64 last_offset;
66
67
68 int cycled;
69};
70
71static int __compare_inode_defrag(struct inode_defrag *defrag1,
72 struct inode_defrag *defrag2)
73{
74 if (defrag1->root > defrag2->root)
75 return 1;
76 else if (defrag1->root < defrag2->root)
77 return -1;
78 else if (defrag1->ino > defrag2->ino)
79 return 1;
80 else if (defrag1->ino < defrag2->ino)
81 return -1;
82 else
83 return 0;
84}
85
86
87
88
89
90
91
92
93
94
95static int __btrfs_add_inode_defrag(struct inode *inode,
96 struct inode_defrag *defrag)
97{
98 struct btrfs_root *root = BTRFS_I(inode)->root;
99 struct inode_defrag *entry;
100 struct rb_node **p;
101 struct rb_node *parent = NULL;
102 int ret;
103
104 p = &root->fs_info->defrag_inodes.rb_node;
105 while (*p) {
106 parent = *p;
107 entry = rb_entry(parent, struct inode_defrag, rb_node);
108
109 ret = __compare_inode_defrag(defrag, entry);
110 if (ret < 0)
111 p = &parent->rb_left;
112 else if (ret > 0)
113 p = &parent->rb_right;
114 else {
115
116
117
118
119 if (defrag->transid < entry->transid)
120 entry->transid = defrag->transid;
121 if (defrag->last_offset > entry->last_offset)
122 entry->last_offset = defrag->last_offset;
123 return -EEXIST;
124 }
125 }
126 set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
127 rb_link_node(&defrag->rb_node, parent, p);
128 rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
129 return 0;
130}
131
132static inline int __need_auto_defrag(struct btrfs_root *root)
133{
134 if (!btrfs_test_opt(root, AUTO_DEFRAG))
135 return 0;
136
137 if (btrfs_fs_closing(root->fs_info))
138 return 0;
139
140 return 1;
141}
142
143
144
145
146
147int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
148 struct inode *inode)
149{
150 struct btrfs_root *root = BTRFS_I(inode)->root;
151 struct inode_defrag *defrag;
152 u64 transid;
153 int ret;
154
155 if (!__need_auto_defrag(root))
156 return 0;
157
158 if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags))
159 return 0;
160
161 if (trans)
162 transid = trans->transid;
163 else
164 transid = BTRFS_I(inode)->root->last_trans;
165
166 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
167 if (!defrag)
168 return -ENOMEM;
169
170 defrag->ino = btrfs_ino(inode);
171 defrag->transid = transid;
172 defrag->root = root->root_key.objectid;
173
174 spin_lock(&root->fs_info->defrag_inodes_lock);
175 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) {
176
177
178
179
180
181 ret = __btrfs_add_inode_defrag(inode, defrag);
182 if (ret)
183 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
184 } else {
185 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
186 }
187 spin_unlock(&root->fs_info->defrag_inodes_lock);
188 return 0;
189}
190
191
192
193
194
195
196static void btrfs_requeue_inode_defrag(struct inode *inode,
197 struct inode_defrag *defrag)
198{
199 struct btrfs_root *root = BTRFS_I(inode)->root;
200 int ret;
201
202 if (!__need_auto_defrag(root))
203 goto out;
204
205
206
207
208
209 spin_lock(&root->fs_info->defrag_inodes_lock);
210 ret = __btrfs_add_inode_defrag(inode, defrag);
211 spin_unlock(&root->fs_info->defrag_inodes_lock);
212 if (ret)
213 goto out;
214 return;
215out:
216 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
217}
218
219
220
221
222
223static struct inode_defrag *
224btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
225{
226 struct inode_defrag *entry = NULL;
227 struct inode_defrag tmp;
228 struct rb_node *p;
229 struct rb_node *parent = NULL;
230 int ret;
231
232 tmp.ino = ino;
233 tmp.root = root;
234
235 spin_lock(&fs_info->defrag_inodes_lock);
236 p = fs_info->defrag_inodes.rb_node;
237 while (p) {
238 parent = p;
239 entry = rb_entry(parent, struct inode_defrag, rb_node);
240
241 ret = __compare_inode_defrag(&tmp, entry);
242 if (ret < 0)
243 p = parent->rb_left;
244 else if (ret > 0)
245 p = parent->rb_right;
246 else
247 goto out;
248 }
249
250 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
251 parent = rb_next(parent);
252 if (parent)
253 entry = rb_entry(parent, struct inode_defrag, rb_node);
254 else
255 entry = NULL;
256 }
257out:
258 if (entry)
259 rb_erase(parent, &fs_info->defrag_inodes);
260 spin_unlock(&fs_info->defrag_inodes_lock);
261 return entry;
262}
263
264void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
265{
266 struct inode_defrag *defrag;
267 struct rb_node *node;
268
269 spin_lock(&fs_info->defrag_inodes_lock);
270 node = rb_first(&fs_info->defrag_inodes);
271 while (node) {
272 rb_erase(node, &fs_info->defrag_inodes);
273 defrag = rb_entry(node, struct inode_defrag, rb_node);
274 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
275
276 if (need_resched()) {
277 spin_unlock(&fs_info->defrag_inodes_lock);
278 cond_resched();
279 spin_lock(&fs_info->defrag_inodes_lock);
280 }
281
282 node = rb_first(&fs_info->defrag_inodes);
283 }
284 spin_unlock(&fs_info->defrag_inodes_lock);
285}
286
287#define BTRFS_DEFRAG_BATCH 1024
288
289static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
290 struct inode_defrag *defrag)
291{
292 struct btrfs_root *inode_root;
293 struct inode *inode;
294 struct btrfs_key key;
295 struct btrfs_ioctl_defrag_range_args range;
296 int num_defrag;
297 int index;
298 int ret;
299
300
301 key.objectid = defrag->root;
302 key.type = BTRFS_ROOT_ITEM_KEY;
303 key.offset = (u64)-1;
304
305 index = srcu_read_lock(&fs_info->subvol_srcu);
306
307 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
308 if (IS_ERR(inode_root)) {
309 ret = PTR_ERR(inode_root);
310 goto cleanup;
311 }
312
313 key.objectid = defrag->ino;
314 key.type = BTRFS_INODE_ITEM_KEY;
315 key.offset = 0;
316 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
317 if (IS_ERR(inode)) {
318 ret = PTR_ERR(inode);
319 goto cleanup;
320 }
321 srcu_read_unlock(&fs_info->subvol_srcu, index);
322
323
324 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
325 memset(&range, 0, sizeof(range));
326 range.len = (u64)-1;
327 range.start = defrag->last_offset;
328
329 sb_start_write(fs_info->sb);
330 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
331 BTRFS_DEFRAG_BATCH);
332 sb_end_write(fs_info->sb);
333
334
335
336
337
338 if (num_defrag == BTRFS_DEFRAG_BATCH) {
339 defrag->last_offset = range.start;
340 btrfs_requeue_inode_defrag(inode, defrag);
341 } else if (defrag->last_offset && !defrag->cycled) {
342
343
344
345
346
347 defrag->last_offset = 0;
348 defrag->cycled = 1;
349 btrfs_requeue_inode_defrag(inode, defrag);
350 } else {
351 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
352 }
353
354 iput(inode);
355 return 0;
356cleanup:
357 srcu_read_unlock(&fs_info->subvol_srcu, index);
358 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
359 return ret;
360}
361
362
363
364
365
366int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
367{
368 struct inode_defrag *defrag;
369 u64 first_ino = 0;
370 u64 root_objectid = 0;
371
372 atomic_inc(&fs_info->defrag_running);
373 while (1) {
374
375 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
376 &fs_info->fs_state))
377 break;
378
379 if (!__need_auto_defrag(fs_info->tree_root))
380 break;
381
382
383 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
384 first_ino);
385 if (!defrag) {
386 if (root_objectid || first_ino) {
387 root_objectid = 0;
388 first_ino = 0;
389 continue;
390 } else {
391 break;
392 }
393 }
394
395 first_ino = defrag->ino + 1;
396 root_objectid = defrag->root;
397
398 __btrfs_run_defrag_inode(fs_info, defrag);
399 }
400 atomic_dec(&fs_info->defrag_running);
401
402
403
404
405
406 wake_up(&fs_info->transaction_wait);
407 return 0;
408}
409
410
411
412
413static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
414 size_t write_bytes,
415 struct page **prepared_pages,
416 struct iov_iter *i)
417{
418 size_t copied = 0;
419 size_t total_copied = 0;
420 int pg = 0;
421 int offset = pos & (PAGE_CACHE_SIZE - 1);
422
423 while (write_bytes > 0) {
424 size_t count = min_t(size_t,
425 PAGE_CACHE_SIZE - offset, write_bytes);
426 struct page *page = prepared_pages[pg];
427
428
429
430 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
431
432
433 flush_dcache_page(page);
434
435
436
437
438
439
440
441
442
443
444 if (!PageUptodate(page) && copied < count)
445 copied = 0;
446
447 iov_iter_advance(i, copied);
448 write_bytes -= copied;
449 total_copied += copied;
450
451
452 if (unlikely(copied == 0))
453 break;
454
455 if (copied < PAGE_CACHE_SIZE - offset) {
456 offset += copied;
457 } else {
458 pg++;
459 offset = 0;
460 }
461 }
462 return total_copied;
463}
464
465
466
467
468static void btrfs_drop_pages(struct page **pages, size_t num_pages)
469{
470 size_t i;
471 for (i = 0; i < num_pages; i++) {
472
473
474
475
476
477
478 ClearPageChecked(pages[i]);
479 unlock_page(pages[i]);
480 page_cache_release(pages[i]);
481 }
482}
483
484
485
486
487
488
489
490
491
492int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
493 struct page **pages, size_t num_pages,
494 loff_t pos, size_t write_bytes,
495 struct extent_state **cached)
496{
497 int err = 0;
498 int i;
499 u64 num_bytes;
500 u64 start_pos;
501 u64 end_of_last_block;
502 u64 end_pos = pos + write_bytes;
503 loff_t isize = i_size_read(inode);
504
505 start_pos = pos & ~((u64)root->sectorsize - 1);
506 num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
507
508 end_of_last_block = start_pos + num_bytes - 1;
509 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
510 cached);
511 if (err)
512 return err;
513
514 for (i = 0; i < num_pages; i++) {
515 struct page *p = pages[i];
516 SetPageUptodate(p);
517 ClearPageChecked(p);
518 set_page_dirty(p);
519 }
520
521
522
523
524
525
526 if (end_pos > isize)
527 i_size_write(inode, end_pos);
528 return 0;
529}
530
531
532
533
534
535void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
536 int skip_pinned)
537{
538 struct extent_map *em;
539 struct extent_map *split = NULL;
540 struct extent_map *split2 = NULL;
541 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
542 u64 len = end - start + 1;
543 u64 gen;
544 int ret;
545 int testend = 1;
546 unsigned long flags;
547 int compressed = 0;
548 bool modified;
549
550 WARN_ON(end < start);
551 if (end == (u64)-1) {
552 len = (u64)-1;
553 testend = 0;
554 }
555 while (1) {
556 int no_splits = 0;
557
558 modified = false;
559 if (!split)
560 split = alloc_extent_map();
561 if (!split2)
562 split2 = alloc_extent_map();
563 if (!split || !split2)
564 no_splits = 1;
565
566 write_lock(&em_tree->lock);
567 em = lookup_extent_mapping(em_tree, start, len);
568 if (!em) {
569 write_unlock(&em_tree->lock);
570 break;
571 }
572 flags = em->flags;
573 gen = em->generation;
574 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
575 if (testend && em->start + em->len >= start + len) {
576 free_extent_map(em);
577 write_unlock(&em_tree->lock);
578 break;
579 }
580 start = em->start + em->len;
581 if (testend)
582 len = start + len - (em->start + em->len);
583 free_extent_map(em);
584 write_unlock(&em_tree->lock);
585 continue;
586 }
587 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
588 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
589 clear_bit(EXTENT_FLAG_LOGGING, &flags);
590 modified = !list_empty(&em->list);
591 if (no_splits)
592 goto next;
593
594 if (em->start < start) {
595 split->start = em->start;
596 split->len = start - em->start;
597
598 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
599 split->orig_start = em->orig_start;
600 split->block_start = em->block_start;
601
602 if (compressed)
603 split->block_len = em->block_len;
604 else
605 split->block_len = split->len;
606 split->orig_block_len = max(split->block_len,
607 em->orig_block_len);
608 split->ram_bytes = em->ram_bytes;
609 } else {
610 split->orig_start = split->start;
611 split->block_len = 0;
612 split->block_start = em->block_start;
613 split->orig_block_len = 0;
614 split->ram_bytes = split->len;
615 }
616
617 split->generation = gen;
618 split->bdev = em->bdev;
619 split->flags = flags;
620 split->compress_type = em->compress_type;
621 replace_extent_mapping(em_tree, em, split, modified);
622 free_extent_map(split);
623 split = split2;
624 split2 = NULL;
625 }
626 if (testend && em->start + em->len > start + len) {
627 u64 diff = start + len - em->start;
628
629 split->start = start + len;
630 split->len = em->start + em->len - (start + len);
631 split->bdev = em->bdev;
632 split->flags = flags;
633 split->compress_type = em->compress_type;
634 split->generation = gen;
635
636 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
637 split->orig_block_len = max(em->block_len,
638 em->orig_block_len);
639
640 split->ram_bytes = em->ram_bytes;
641 if (compressed) {
642 split->block_len = em->block_len;
643 split->block_start = em->block_start;
644 split->orig_start = em->orig_start;
645 } else {
646 split->block_len = split->len;
647 split->block_start = em->block_start
648 + diff;
649 split->orig_start = em->orig_start;
650 }
651 } else {
652 split->ram_bytes = split->len;
653 split->orig_start = split->start;
654 split->block_len = 0;
655 split->block_start = em->block_start;
656 split->orig_block_len = 0;
657 }
658
659 if (extent_map_in_tree(em)) {
660 replace_extent_mapping(em_tree, em, split,
661 modified);
662 } else {
663 ret = add_extent_mapping(em_tree, split,
664 modified);
665 ASSERT(ret == 0);
666 }
667 free_extent_map(split);
668 split = NULL;
669 }
670next:
671 if (extent_map_in_tree(em))
672 remove_extent_mapping(em_tree, em);
673 write_unlock(&em_tree->lock);
674
675
676 free_extent_map(em);
677
678 free_extent_map(em);
679 }
680 if (split)
681 free_extent_map(split);
682 if (split2)
683 free_extent_map(split2);
684}
685
686
687
688
689
690
691
692
693
694
695int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
696 struct btrfs_root *root, struct inode *inode,
697 struct btrfs_path *path, u64 start, u64 end,
698 u64 *drop_end, int drop_cache,
699 int replace_extent,
700 u32 extent_item_size,
701 int *key_inserted)
702{
703 struct extent_buffer *leaf;
704 struct btrfs_file_extent_item *fi;
705 struct btrfs_key key;
706 struct btrfs_key new_key;
707 u64 ino = btrfs_ino(inode);
708 u64 search_start = start;
709 u64 disk_bytenr = 0;
710 u64 num_bytes = 0;
711 u64 extent_offset = 0;
712 u64 extent_end = 0;
713 int del_nr = 0;
714 int del_slot = 0;
715 int extent_type;
716 int recow;
717 int ret;
718 int modify_tree = -1;
719 int update_refs;
720 int found = 0;
721 int leafs_visited = 0;
722
723 if (drop_cache)
724 btrfs_drop_extent_cache(inode, start, end - 1, 0);
725
726 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
727 modify_tree = 0;
728
729 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
730 root == root->fs_info->tree_root);
731 while (1) {
732 recow = 0;
733 ret = btrfs_lookup_file_extent(trans, root, path, ino,
734 search_start, modify_tree);
735 if (ret < 0)
736 break;
737 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
738 leaf = path->nodes[0];
739 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
740 if (key.objectid == ino &&
741 key.type == BTRFS_EXTENT_DATA_KEY)
742 path->slots[0]--;
743 }
744 ret = 0;
745 leafs_visited++;
746next_slot:
747 leaf = path->nodes[0];
748 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
749 BUG_ON(del_nr > 0);
750 ret = btrfs_next_leaf(root, path);
751 if (ret < 0)
752 break;
753 if (ret > 0) {
754 ret = 0;
755 break;
756 }
757 leafs_visited++;
758 leaf = path->nodes[0];
759 recow = 1;
760 }
761
762 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
763 if (key.objectid > ino ||
764 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
765 break;
766
767 fi = btrfs_item_ptr(leaf, path->slots[0],
768 struct btrfs_file_extent_item);
769 extent_type = btrfs_file_extent_type(leaf, fi);
770
771 if (extent_type == BTRFS_FILE_EXTENT_REG ||
772 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
773 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
774 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
775 extent_offset = btrfs_file_extent_offset(leaf, fi);
776 extent_end = key.offset +
777 btrfs_file_extent_num_bytes(leaf, fi);
778 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
779 extent_end = key.offset +
780 btrfs_file_extent_inline_len(leaf,
781 path->slots[0], fi);
782 } else {
783 WARN_ON(1);
784 extent_end = search_start;
785 }
786
787
788
789
790
791
792
793
794
795
796 if (extent_end == key.offset && extent_end >= search_start)
797 goto delete_extent_item;
798
799 if (extent_end <= search_start) {
800 path->slots[0]++;
801 goto next_slot;
802 }
803
804 found = 1;
805 search_start = max(key.offset, start);
806 if (recow || !modify_tree) {
807 modify_tree = -1;
808 btrfs_release_path(path);
809 continue;
810 }
811
812
813
814
815
816 if (start > key.offset && end < extent_end) {
817 BUG_ON(del_nr > 0);
818 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
819 ret = -EOPNOTSUPP;
820 break;
821 }
822
823 memcpy(&new_key, &key, sizeof(new_key));
824 new_key.offset = start;
825 ret = btrfs_duplicate_item(trans, root, path,
826 &new_key);
827 if (ret == -EAGAIN) {
828 btrfs_release_path(path);
829 continue;
830 }
831 if (ret < 0)
832 break;
833
834 leaf = path->nodes[0];
835 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
836 struct btrfs_file_extent_item);
837 btrfs_set_file_extent_num_bytes(leaf, fi,
838 start - key.offset);
839
840 fi = btrfs_item_ptr(leaf, path->slots[0],
841 struct btrfs_file_extent_item);
842
843 extent_offset += start - key.offset;
844 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
845 btrfs_set_file_extent_num_bytes(leaf, fi,
846 extent_end - start);
847 btrfs_mark_buffer_dirty(leaf);
848
849 if (update_refs && disk_bytenr > 0) {
850 ret = btrfs_inc_extent_ref(trans, root,
851 disk_bytenr, num_bytes, 0,
852 root->root_key.objectid,
853 new_key.objectid,
854 start - extent_offset, 1);
855 BUG_ON(ret);
856 }
857 key.offset = start;
858 }
859
860
861
862
863 if (start <= key.offset && end < extent_end) {
864 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
865 ret = -EOPNOTSUPP;
866 break;
867 }
868
869 memcpy(&new_key, &key, sizeof(new_key));
870 new_key.offset = end;
871 btrfs_set_item_key_safe(root, path, &new_key);
872
873 extent_offset += end - key.offset;
874 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
875 btrfs_set_file_extent_num_bytes(leaf, fi,
876 extent_end - end);
877 btrfs_mark_buffer_dirty(leaf);
878 if (update_refs && disk_bytenr > 0)
879 inode_sub_bytes(inode, end - key.offset);
880 break;
881 }
882
883 search_start = extent_end;
884
885
886
887
888 if (start > key.offset && end >= extent_end) {
889 BUG_ON(del_nr > 0);
890 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
891 ret = -EOPNOTSUPP;
892 break;
893 }
894
895 btrfs_set_file_extent_num_bytes(leaf, fi,
896 start - key.offset);
897 btrfs_mark_buffer_dirty(leaf);
898 if (update_refs && disk_bytenr > 0)
899 inode_sub_bytes(inode, extent_end - start);
900 if (end == extent_end)
901 break;
902
903 path->slots[0]++;
904 goto next_slot;
905 }
906
907
908
909
910
911 if (start <= key.offset && end >= extent_end) {
912delete_extent_item:
913 if (del_nr == 0) {
914 del_slot = path->slots[0];
915 del_nr = 1;
916 } else {
917 BUG_ON(del_slot + del_nr != path->slots[0]);
918 del_nr++;
919 }
920
921 if (update_refs &&
922 extent_type == BTRFS_FILE_EXTENT_INLINE) {
923 inode_sub_bytes(inode,
924 extent_end - key.offset);
925 extent_end = ALIGN(extent_end,
926 root->sectorsize);
927 } else if (update_refs && disk_bytenr > 0) {
928 ret = btrfs_free_extent(trans, root,
929 disk_bytenr, num_bytes, 0,
930 root->root_key.objectid,
931 key.objectid, key.offset -
932 extent_offset, 0);
933 BUG_ON(ret);
934 inode_sub_bytes(inode,
935 extent_end - key.offset);
936 }
937
938 if (end == extent_end)
939 break;
940
941 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
942 path->slots[0]++;
943 goto next_slot;
944 }
945
946 ret = btrfs_del_items(trans, root, path, del_slot,
947 del_nr);
948 if (ret) {
949 btrfs_abort_transaction(trans, root, ret);
950 break;
951 }
952
953 del_nr = 0;
954 del_slot = 0;
955
956 btrfs_release_path(path);
957 continue;
958 }
959
960 BUG_ON(1);
961 }
962
963 if (!ret && del_nr > 0) {
964
965
966
967
968
969
970 path->slots[0] = del_slot;
971 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
972 if (ret)
973 btrfs_abort_transaction(trans, root, ret);
974 }
975
976 leaf = path->nodes[0];
977
978
979
980
981
982 if (!ret && replace_extent && leafs_visited == 1 &&
983 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
984 path->locks[0] == BTRFS_WRITE_LOCK) &&
985 btrfs_leaf_free_space(root, leaf) >=
986 sizeof(struct btrfs_item) + extent_item_size) {
987
988 key.objectid = ino;
989 key.type = BTRFS_EXTENT_DATA_KEY;
990 key.offset = start;
991 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
992 struct btrfs_key slot_key;
993
994 btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
995 if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
996 path->slots[0]++;
997 }
998 setup_items_for_insert(root, path, &key,
999 &extent_item_size,
1000 extent_item_size,
1001 sizeof(struct btrfs_item) +
1002 extent_item_size, 1);
1003 *key_inserted = 1;
1004 }
1005
1006 if (!replace_extent || !(*key_inserted))
1007 btrfs_release_path(path);
1008 if (drop_end)
1009 *drop_end = found ? min(end, extent_end) : end;
1010 return ret;
1011}
1012
1013int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1014 struct btrfs_root *root, struct inode *inode, u64 start,
1015 u64 end, int drop_cache)
1016{
1017 struct btrfs_path *path;
1018 int ret;
1019
1020 path = btrfs_alloc_path();
1021 if (!path)
1022 return -ENOMEM;
1023 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
1024 drop_cache, 0, 0, NULL);
1025 btrfs_free_path(path);
1026 return ret;
1027}
1028
1029static int extent_mergeable(struct extent_buffer *leaf, int slot,
1030 u64 objectid, u64 bytenr, u64 orig_offset,
1031 u64 *start, u64 *end)
1032{
1033 struct btrfs_file_extent_item *fi;
1034 struct btrfs_key key;
1035 u64 extent_end;
1036
1037 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1038 return 0;
1039
1040 btrfs_item_key_to_cpu(leaf, &key, slot);
1041 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
1042 return 0;
1043
1044 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1045 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
1046 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
1047 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
1048 btrfs_file_extent_compression(leaf, fi) ||
1049 btrfs_file_extent_encryption(leaf, fi) ||
1050 btrfs_file_extent_other_encoding(leaf, fi))
1051 return 0;
1052
1053 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1054 if ((*start && *start != key.offset) || (*end && *end != extent_end))
1055 return 0;
1056
1057 *start = key.offset;
1058 *end = extent_end;
1059 return 1;
1060}
1061
1062
1063
1064
1065
1066
1067
1068
1069int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1070 struct inode *inode, u64 start, u64 end)
1071{
1072 struct btrfs_root *root = BTRFS_I(inode)->root;
1073 struct extent_buffer *leaf;
1074 struct btrfs_path *path;
1075 struct btrfs_file_extent_item *fi;
1076 struct btrfs_key key;
1077 struct btrfs_key new_key;
1078 u64 bytenr;
1079 u64 num_bytes;
1080 u64 extent_end;
1081 u64 orig_offset;
1082 u64 other_start;
1083 u64 other_end;
1084 u64 split;
1085 int del_nr = 0;
1086 int del_slot = 0;
1087 int recow;
1088 int ret;
1089 u64 ino = btrfs_ino(inode);
1090
1091 path = btrfs_alloc_path();
1092 if (!path)
1093 return -ENOMEM;
1094again:
1095 recow = 0;
1096 split = start;
1097 key.objectid = ino;
1098 key.type = BTRFS_EXTENT_DATA_KEY;
1099 key.offset = split;
1100
1101 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1102 if (ret < 0)
1103 goto out;
1104 if (ret > 0 && path->slots[0] > 0)
1105 path->slots[0]--;
1106
1107 leaf = path->nodes[0];
1108 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1109 BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
1110 fi = btrfs_item_ptr(leaf, path->slots[0],
1111 struct btrfs_file_extent_item);
1112 BUG_ON(btrfs_file_extent_type(leaf, fi) !=
1113 BTRFS_FILE_EXTENT_PREALLOC);
1114 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1115 BUG_ON(key.offset > start || extent_end < end);
1116
1117 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1118 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1119 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
1120 memcpy(&new_key, &key, sizeof(new_key));
1121
1122 if (start == key.offset && end < extent_end) {
1123 other_start = 0;
1124 other_end = start;
1125 if (extent_mergeable(leaf, path->slots[0] - 1,
1126 ino, bytenr, orig_offset,
1127 &other_start, &other_end)) {
1128 new_key.offset = end;
1129 btrfs_set_item_key_safe(root, path, &new_key);
1130 fi = btrfs_item_ptr(leaf, path->slots[0],
1131 struct btrfs_file_extent_item);
1132 btrfs_set_file_extent_generation(leaf, fi,
1133 trans->transid);
1134 btrfs_set_file_extent_num_bytes(leaf, fi,
1135 extent_end - end);
1136 btrfs_set_file_extent_offset(leaf, fi,
1137 end - orig_offset);
1138 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1139 struct btrfs_file_extent_item);
1140 btrfs_set_file_extent_generation(leaf, fi,
1141 trans->transid);
1142 btrfs_set_file_extent_num_bytes(leaf, fi,
1143 end - other_start);
1144 btrfs_mark_buffer_dirty(leaf);
1145 goto out;
1146 }
1147 }
1148
1149 if (start > key.offset && end == extent_end) {
1150 other_start = end;
1151 other_end = 0;
1152 if (extent_mergeable(leaf, path->slots[0] + 1,
1153 ino, bytenr, orig_offset,
1154 &other_start, &other_end)) {
1155 fi = btrfs_item_ptr(leaf, path->slots[0],
1156 struct btrfs_file_extent_item);
1157 btrfs_set_file_extent_num_bytes(leaf, fi,
1158 start - key.offset);
1159 btrfs_set_file_extent_generation(leaf, fi,
1160 trans->transid);
1161 path->slots[0]++;
1162 new_key.offset = start;
1163 btrfs_set_item_key_safe(root, path, &new_key);
1164
1165 fi = btrfs_item_ptr(leaf, path->slots[0],
1166 struct btrfs_file_extent_item);
1167 btrfs_set_file_extent_generation(leaf, fi,
1168 trans->transid);
1169 btrfs_set_file_extent_num_bytes(leaf, fi,
1170 other_end - start);
1171 btrfs_set_file_extent_offset(leaf, fi,
1172 start - orig_offset);
1173 btrfs_mark_buffer_dirty(leaf);
1174 goto out;
1175 }
1176 }
1177
1178 while (start > key.offset || end < extent_end) {
1179 if (key.offset == start)
1180 split = end;
1181
1182 new_key.offset = split;
1183 ret = btrfs_duplicate_item(trans, root, path, &new_key);
1184 if (ret == -EAGAIN) {
1185 btrfs_release_path(path);
1186 goto again;
1187 }
1188 if (ret < 0) {
1189 btrfs_abort_transaction(trans, root, ret);
1190 goto out;
1191 }
1192
1193 leaf = path->nodes[0];
1194 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1195 struct btrfs_file_extent_item);
1196 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1197 btrfs_set_file_extent_num_bytes(leaf, fi,
1198 split - key.offset);
1199
1200 fi = btrfs_item_ptr(leaf, path->slots[0],
1201 struct btrfs_file_extent_item);
1202
1203 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1204 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
1205 btrfs_set_file_extent_num_bytes(leaf, fi,
1206 extent_end - split);
1207 btrfs_mark_buffer_dirty(leaf);
1208
1209 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
1210 root->root_key.objectid,
1211 ino, orig_offset, 1);
1212 BUG_ON(ret);
1213
1214 if (split == start) {
1215 key.offset = start;
1216 } else {
1217 BUG_ON(start != key.offset);
1218 path->slots[0]--;
1219 extent_end = end;
1220 }
1221 recow = 1;
1222 }
1223
1224 other_start = end;
1225 other_end = 0;
1226 if (extent_mergeable(leaf, path->slots[0] + 1,
1227 ino, bytenr, orig_offset,
1228 &other_start, &other_end)) {
1229 if (recow) {
1230 btrfs_release_path(path);
1231 goto again;
1232 }
1233 extent_end = other_end;
1234 del_slot = path->slots[0] + 1;
1235 del_nr++;
1236 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1237 0, root->root_key.objectid,
1238 ino, orig_offset, 0);
1239 BUG_ON(ret);
1240 }
1241 other_start = 0;
1242 other_end = start;
1243 if (extent_mergeable(leaf, path->slots[0] - 1,
1244 ino, bytenr, orig_offset,
1245 &other_start, &other_end)) {
1246 if (recow) {
1247 btrfs_release_path(path);
1248 goto again;
1249 }
1250 key.offset = other_start;
1251 del_slot = path->slots[0];
1252 del_nr++;
1253 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1254 0, root->root_key.objectid,
1255 ino, orig_offset, 0);
1256 BUG_ON(ret);
1257 }
1258 if (del_nr == 0) {
1259 fi = btrfs_item_ptr(leaf, path->slots[0],
1260 struct btrfs_file_extent_item);
1261 btrfs_set_file_extent_type(leaf, fi,
1262 BTRFS_FILE_EXTENT_REG);
1263 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1264 btrfs_mark_buffer_dirty(leaf);
1265 } else {
1266 fi = btrfs_item_ptr(leaf, del_slot - 1,
1267 struct btrfs_file_extent_item);
1268 btrfs_set_file_extent_type(leaf, fi,
1269 BTRFS_FILE_EXTENT_REG);
1270 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1271 btrfs_set_file_extent_num_bytes(leaf, fi,
1272 extent_end - key.offset);
1273 btrfs_mark_buffer_dirty(leaf);
1274
1275 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1276 if (ret < 0) {
1277 btrfs_abort_transaction(trans, root, ret);
1278 goto out;
1279 }
1280 }
1281out:
1282 btrfs_free_path(path);
1283 return 0;
1284}
1285
1286
1287
1288
1289
1290static int prepare_uptodate_page(struct page *page, u64 pos,
1291 bool force_uptodate)
1292{
1293 int ret = 0;
1294
1295 if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
1296 !PageUptodate(page)) {
1297 ret = btrfs_readpage(NULL, page);
1298 if (ret)
1299 return ret;
1300 lock_page(page);
1301 if (!PageUptodate(page)) {
1302 unlock_page(page);
1303 return -EIO;
1304 }
1305 }
1306 return 0;
1307}
1308
1309
1310
1311
1312static noinline int prepare_pages(struct inode *inode, struct page **pages,
1313 size_t num_pages, loff_t pos,
1314 size_t write_bytes, bool force_uptodate)
1315{
1316 int i;
1317 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1318 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1319 int err = 0;
1320 int faili;
1321
1322 for (i = 0; i < num_pages; i++) {
1323 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1324 mask | __GFP_WRITE);
1325 if (!pages[i]) {
1326 faili = i - 1;
1327 err = -ENOMEM;
1328 goto fail;
1329 }
1330
1331 if (i == 0)
1332 err = prepare_uptodate_page(pages[i], pos,
1333 force_uptodate);
1334 if (i == num_pages - 1)
1335 err = prepare_uptodate_page(pages[i],
1336 pos + write_bytes, false);
1337 if (err) {
1338 page_cache_release(pages[i]);
1339 faili = i - 1;
1340 goto fail;
1341 }
1342 wait_on_page_writeback(pages[i]);
1343 }
1344
1345 return 0;
1346fail:
1347 while (faili >= 0) {
1348 unlock_page(pages[faili]);
1349 page_cache_release(pages[faili]);
1350 faili--;
1351 }
1352 return err;
1353
1354}
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366static noinline int
1367lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1368 size_t num_pages, loff_t pos,
1369 u64 *lockstart, u64 *lockend,
1370 struct extent_state **cached_state)
1371{
1372 u64 start_pos;
1373 u64 last_pos;
1374 int i;
1375 int ret = 0;
1376
1377 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1378 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
1379
1380 if (start_pos < inode->i_size) {
1381 struct btrfs_ordered_extent *ordered;
1382 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1383 start_pos, last_pos, 0, cached_state);
1384 ordered = btrfs_lookup_ordered_range(inode, start_pos,
1385 last_pos - start_pos + 1);
1386 if (ordered &&
1387 ordered->file_offset + ordered->len > start_pos &&
1388 ordered->file_offset <= last_pos) {
1389 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1390 start_pos, last_pos,
1391 cached_state, GFP_NOFS);
1392 for (i = 0; i < num_pages; i++) {
1393 unlock_page(pages[i]);
1394 page_cache_release(pages[i]);
1395 }
1396 btrfs_start_ordered_extent(inode, ordered, 1);
1397 btrfs_put_ordered_extent(ordered);
1398 return -EAGAIN;
1399 }
1400 if (ordered)
1401 btrfs_put_ordered_extent(ordered);
1402
1403 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
1404 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
1405 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1406 0, 0, cached_state, GFP_NOFS);
1407 *lockstart = start_pos;
1408 *lockend = last_pos;
1409 ret = 1;
1410 }
1411
1412 for (i = 0; i < num_pages; i++) {
1413 if (clear_page_dirty_for_io(pages[i]))
1414 account_page_redirty(pages[i]);
1415 set_page_extent_mapped(pages[i]);
1416 WARN_ON(!PageLocked(pages[i]));
1417 }
1418
1419 return ret;
1420}
1421
1422static noinline int check_can_nocow(struct inode *inode, loff_t pos,
1423 size_t *write_bytes)
1424{
1425 struct btrfs_root *root = BTRFS_I(inode)->root;
1426 struct btrfs_ordered_extent *ordered;
1427 u64 lockstart, lockend;
1428 u64 num_bytes;
1429 int ret;
1430
1431 ret = btrfs_start_write_no_snapshoting(root);
1432 if (!ret)
1433 return -ENOSPC;
1434
1435 lockstart = round_down(pos, root->sectorsize);
1436 lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
1437
1438 while (1) {
1439 lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1440 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1441 lockend - lockstart + 1);
1442 if (!ordered) {
1443 break;
1444 }
1445 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1446 btrfs_start_ordered_extent(inode, ordered, 1);
1447 btrfs_put_ordered_extent(ordered);
1448 }
1449
1450 num_bytes = lockend - lockstart + 1;
1451 ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
1452 if (ret <= 0) {
1453 ret = 0;
1454 btrfs_end_write_no_snapshoting(root);
1455 } else {
1456 *write_bytes = min_t(size_t, *write_bytes ,
1457 num_bytes - pos + lockstart);
1458 }
1459
1460 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
1461
1462 return ret;
1463}
1464
1465static noinline ssize_t __btrfs_buffered_write(struct file *file,
1466 struct iov_iter *i,
1467 loff_t pos)
1468{
1469 struct inode *inode = file_inode(file);
1470 struct btrfs_root *root = BTRFS_I(inode)->root;
1471 struct page **pages = NULL;
1472 struct extent_state *cached_state = NULL;
1473 u64 release_bytes = 0;
1474 u64 lockstart;
1475 u64 lockend;
1476 unsigned long first_index;
1477 size_t num_written = 0;
1478 int nrptrs;
1479 int ret = 0;
1480 bool only_release_metadata = false;
1481 bool force_page_uptodate = false;
1482 bool need_unlock;
1483
1484 nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
1485 PAGE_CACHE_SIZE / (sizeof(struct page *)));
1486 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1487 nrptrs = max(nrptrs, 8);
1488 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1489 if (!pages)
1490 return -ENOMEM;
1491
1492 first_index = pos >> PAGE_CACHE_SHIFT;
1493
1494 while (iov_iter_count(i) > 0) {
1495 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1496 size_t write_bytes = min(iov_iter_count(i),
1497 nrptrs * (size_t)PAGE_CACHE_SIZE -
1498 offset);
1499 size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
1500 PAGE_CACHE_SIZE);
1501 size_t reserve_bytes;
1502 size_t dirty_pages;
1503 size_t copied;
1504
1505 WARN_ON(num_pages > nrptrs);
1506
1507
1508
1509
1510
1511 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1512 ret = -EFAULT;
1513 break;
1514 }
1515
1516 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1517 ret = btrfs_check_data_free_space(inode, reserve_bytes);
1518 if (ret == -ENOSPC &&
1519 (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1520 BTRFS_INODE_PREALLOC))) {
1521 ret = check_can_nocow(inode, pos, &write_bytes);
1522 if (ret > 0) {
1523 only_release_metadata = true;
1524
1525
1526
1527
1528 num_pages = DIV_ROUND_UP(write_bytes + offset,
1529 PAGE_CACHE_SIZE);
1530 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
1531 ret = 0;
1532 } else {
1533 ret = -ENOSPC;
1534 }
1535 }
1536
1537 if (ret)
1538 break;
1539
1540 ret = btrfs_delalloc_reserve_metadata(inode, reserve_bytes);
1541 if (ret) {
1542 if (!only_release_metadata)
1543 btrfs_free_reserved_data_space(inode,
1544 reserve_bytes);
1545 else
1546 btrfs_end_write_no_snapshoting(root);
1547 break;
1548 }
1549
1550 release_bytes = reserve_bytes;
1551 need_unlock = false;
1552again:
1553
1554
1555
1556
1557
1558 ret = prepare_pages(inode, pages, num_pages,
1559 pos, write_bytes,
1560 force_page_uptodate);
1561 if (ret)
1562 break;
1563
1564 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1565 pos, &lockstart, &lockend,
1566 &cached_state);
1567 if (ret < 0) {
1568 if (ret == -EAGAIN)
1569 goto again;
1570 break;
1571 } else if (ret > 0) {
1572 need_unlock = true;
1573 ret = 0;
1574 }
1575
1576 copied = btrfs_copy_from_user(pos, num_pages,
1577 write_bytes, pages, i);
1578
1579
1580
1581
1582
1583 if (copied < write_bytes)
1584 nrptrs = 1;
1585
1586 if (copied == 0) {
1587 force_page_uptodate = true;
1588 dirty_pages = 0;
1589 } else {
1590 force_page_uptodate = false;
1591 dirty_pages = DIV_ROUND_UP(copied + offset,
1592 PAGE_CACHE_SIZE);
1593 }
1594
1595
1596
1597
1598
1599
1600
1601
1602 if (num_pages > dirty_pages) {
1603 release_bytes = (num_pages - dirty_pages) <<
1604 PAGE_CACHE_SHIFT;
1605 if (copied > 0) {
1606 spin_lock(&BTRFS_I(inode)->lock);
1607 BTRFS_I(inode)->outstanding_extents++;
1608 spin_unlock(&BTRFS_I(inode)->lock);
1609 }
1610 if (only_release_metadata)
1611 btrfs_delalloc_release_metadata(inode,
1612 release_bytes);
1613 else
1614 btrfs_delalloc_release_space(inode,
1615 release_bytes);
1616 }
1617
1618 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
1619
1620 if (copied > 0)
1621 ret = btrfs_dirty_pages(root, inode, pages,
1622 dirty_pages, pos, copied,
1623 NULL);
1624 if (need_unlock)
1625 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1626 lockstart, lockend, &cached_state,
1627 GFP_NOFS);
1628 if (ret) {
1629 btrfs_drop_pages(pages, num_pages);
1630 break;
1631 }
1632
1633 release_bytes = 0;
1634 if (only_release_metadata)
1635 btrfs_end_write_no_snapshoting(root);
1636
1637 if (only_release_metadata && copied > 0) {
1638 u64 lockstart = round_down(pos, root->sectorsize);
1639 u64 lockend = lockstart +
1640 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
1641
1642 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1643 lockend, EXTENT_NORESERVE, NULL,
1644 NULL, GFP_NOFS);
1645 only_release_metadata = false;
1646 }
1647
1648 btrfs_drop_pages(pages, num_pages);
1649
1650 cond_resched();
1651
1652 balance_dirty_pages_ratelimited(inode->i_mapping);
1653 if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1)
1654 btrfs_btree_balance_dirty(root);
1655
1656 pos += copied;
1657 num_written += copied;
1658 }
1659
1660 kfree(pages);
1661
1662 if (release_bytes) {
1663 if (only_release_metadata) {
1664 btrfs_end_write_no_snapshoting(root);
1665 btrfs_delalloc_release_metadata(inode, release_bytes);
1666 } else {
1667 btrfs_delalloc_release_space(inode, release_bytes);
1668 }
1669 }
1670
1671 return num_written ? num_written : ret;
1672}
1673
1674static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1675 struct iov_iter *from,
1676 loff_t pos)
1677{
1678 struct file *file = iocb->ki_filp;
1679 struct inode *inode = file_inode(file);
1680 ssize_t written;
1681 ssize_t written_buffered;
1682 loff_t endbyte;
1683 int err;
1684
1685 written = generic_file_direct_write(iocb, from, pos);
1686
1687 if (written < 0 || !iov_iter_count(from))
1688 return written;
1689
1690 pos += written;
1691 written_buffered = __btrfs_buffered_write(file, from, pos);
1692 if (written_buffered < 0) {
1693 err = written_buffered;
1694 goto out;
1695 }
1696
1697
1698
1699
1700 endbyte = pos + written_buffered - 1;
1701 err = btrfs_fdatawrite_range(inode, pos, endbyte);
1702 if (err)
1703 goto out;
1704 err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
1705 if (err)
1706 goto out;
1707 written += written_buffered;
1708 iocb->ki_pos = pos + written_buffered;
1709 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1710 endbyte >> PAGE_CACHE_SHIFT);
1711out:
1712 return written ? written : err;
1713}
1714
1715static void update_time_for_write(struct inode *inode)
1716{
1717 struct timespec now;
1718
1719 if (IS_NOCMTIME(inode))
1720 return;
1721
1722 now = current_fs_time(inode->i_sb);
1723 if (!timespec_equal(&inode->i_mtime, &now))
1724 inode->i_mtime = now;
1725
1726 if (!timespec_equal(&inode->i_ctime, &now))
1727 inode->i_ctime = now;
1728
1729 if (IS_I_VERSION(inode))
1730 inode_inc_iversion(inode);
1731}
1732
1733static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1734 struct iov_iter *from)
1735{
1736 struct file *file = iocb->ki_filp;
1737 struct inode *inode = file_inode(file);
1738 struct btrfs_root *root = BTRFS_I(inode)->root;
1739 u64 start_pos;
1740 u64 end_pos;
1741 ssize_t num_written = 0;
1742 ssize_t err = 0;
1743 size_t count = iov_iter_count(from);
1744 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1745 loff_t pos = iocb->ki_pos;
1746
1747 mutex_lock(&inode->i_mutex);
1748
1749 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1750 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1751 if (err) {
1752 mutex_unlock(&inode->i_mutex);
1753 goto out;
1754 }
1755
1756 if (count == 0) {
1757 mutex_unlock(&inode->i_mutex);
1758 goto out;
1759 }
1760
1761 iov_iter_truncate(from, count);
1762
1763 err = file_remove_suid(file);
1764 if (err) {
1765 mutex_unlock(&inode->i_mutex);
1766 goto out;
1767 }
1768
1769
1770
1771
1772
1773
1774
1775 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1776 mutex_unlock(&inode->i_mutex);
1777 err = -EROFS;
1778 goto out;
1779 }
1780
1781
1782
1783
1784
1785
1786
1787 update_time_for_write(inode);
1788
1789 start_pos = round_down(pos, root->sectorsize);
1790 if (start_pos > i_size_read(inode)) {
1791
1792 end_pos = round_up(pos + count, root->sectorsize);
1793 err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
1794 if (err) {
1795 mutex_unlock(&inode->i_mutex);
1796 goto out;
1797 }
1798 }
1799
1800 if (sync)
1801 atomic_inc(&BTRFS_I(inode)->sync_writers);
1802
1803 if (file->f_flags & O_DIRECT) {
1804 num_written = __btrfs_direct_write(iocb, from, pos);
1805 } else {
1806 num_written = __btrfs_buffered_write(file, from, pos);
1807 if (num_written > 0)
1808 iocb->ki_pos = pos + num_written;
1809 }
1810
1811 mutex_unlock(&inode->i_mutex);
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1830 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1831 if (num_written > 0) {
1832 err = generic_write_sync(file, pos, num_written);
1833 if (err < 0)
1834 num_written = err;
1835 }
1836
1837 if (sync)
1838 atomic_dec(&BTRFS_I(inode)->sync_writers);
1839out:
1840 current->backing_dev_info = NULL;
1841 return num_written ? num_written : err;
1842}
1843
1844int btrfs_release_file(struct inode *inode, struct file *filp)
1845{
1846 if (filp->private_data)
1847 btrfs_ioctl_trans_end(filp);
1848
1849
1850
1851
1852
1853
1854 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1855 &BTRFS_I(inode)->runtime_flags))
1856 filemap_flush(inode->i_mapping);
1857 return 0;
1858}
1859
1860static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
1861{
1862 int ret;
1863
1864 atomic_inc(&BTRFS_I(inode)->sync_writers);
1865 ret = btrfs_fdatawrite_range(inode, start, end);
1866 atomic_dec(&BTRFS_I(inode)->sync_writers);
1867
1868 return ret;
1869}
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1883{
1884 struct dentry *dentry = file->f_path.dentry;
1885 struct inode *inode = dentry->d_inode;
1886 struct btrfs_root *root = BTRFS_I(inode)->root;
1887 struct btrfs_trans_handle *trans;
1888 struct btrfs_log_ctx ctx;
1889 int ret = 0;
1890 bool full_sync = 0;
1891
1892 trace_btrfs_sync_file(file, datasync);
1893
1894
1895
1896
1897
1898
1899
1900 ret = start_ordered_ops(inode, start, end);
1901 if (ret)
1902 return ret;
1903
1904 mutex_lock(&inode->i_mutex);
1905 atomic_inc(&root->log_batch);
1906 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1907 &BTRFS_I(inode)->runtime_flags);
1908
1909
1910
1911
1912 if (full_sync) {
1913
1914
1915
1916
1917
1918
1919 ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
1920 } else {
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953 ret = start_ordered_ops(inode, start, end);
1954 }
1955 if (ret) {
1956 mutex_unlock(&inode->i_mutex);
1957 goto out;
1958 }
1959 atomic_inc(&root->log_batch);
1960
1961
1962
1963
1964
1965 if (!BTRFS_I(inode)->last_trans) {
1966 mutex_unlock(&inode->i_mutex);
1967 goto out;
1968 }
1969
1970
1971
1972
1973
1974
1975 smp_mb();
1976 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1977 BTRFS_I(inode)->last_trans <=
1978 root->fs_info->last_trans_committed) {
1979 BTRFS_I(inode)->last_trans = 0;
1980
1981
1982
1983
1984
1985
1986 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1987 &BTRFS_I(inode)->runtime_flags);
1988 mutex_unlock(&inode->i_mutex);
1989 goto out;
1990 }
1991
1992
1993
1994
1995 if (file->private_data)
1996 btrfs_ioctl_trans_end(file);
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009 trans = btrfs_start_transaction(root, 0);
2010 if (IS_ERR(trans)) {
2011 ret = PTR_ERR(trans);
2012 mutex_unlock(&inode->i_mutex);
2013 goto out;
2014 }
2015 trans->sync = true;
2016
2017 btrfs_init_log_ctx(&ctx);
2018
2019 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2020 if (ret < 0) {
2021
2022 ret = 1;
2023 }
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035 mutex_unlock(&inode->i_mutex);
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050 if (ctx.io_err) {
2051 btrfs_end_transaction(trans, root);
2052 ret = ctx.io_err;
2053 goto out;
2054 }
2055
2056 if (ret != BTRFS_NO_LOG_SYNC) {
2057 if (!ret) {
2058 ret = btrfs_sync_log(trans, root, &ctx);
2059 if (!ret) {
2060 ret = btrfs_end_transaction(trans, root);
2061 goto out;
2062 }
2063 }
2064 if (!full_sync) {
2065 ret = btrfs_wait_ordered_range(inode, start,
2066 end - start + 1);
2067 if (ret) {
2068 btrfs_end_transaction(trans, root);
2069 goto out;
2070 }
2071 }
2072 ret = btrfs_commit_transaction(trans, root);
2073 } else {
2074 ret = btrfs_end_transaction(trans, root);
2075 }
2076out:
2077 return ret > 0 ? -EIO : ret;
2078}
2079
2080static const struct vm_operations_struct btrfs_file_vm_ops = {
2081 .fault = filemap_fault,
2082 .map_pages = filemap_map_pages,
2083 .page_mkwrite = btrfs_page_mkwrite,
2084 .remap_pages = generic_file_remap_pages,
2085};
2086
2087static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
2088{
2089 struct address_space *mapping = filp->f_mapping;
2090
2091 if (!mapping->a_ops->readpage)
2092 return -ENOEXEC;
2093
2094 file_accessed(filp);
2095 vma->vm_ops = &btrfs_file_vm_ops;
2096
2097 return 0;
2098}
2099
2100static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
2101 int slot, u64 start, u64 end)
2102{
2103 struct btrfs_file_extent_item *fi;
2104 struct btrfs_key key;
2105
2106 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
2107 return 0;
2108
2109 btrfs_item_key_to_cpu(leaf, &key, slot);
2110 if (key.objectid != btrfs_ino(inode) ||
2111 key.type != BTRFS_EXTENT_DATA_KEY)
2112 return 0;
2113
2114 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
2115
2116 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2117 return 0;
2118
2119 if (btrfs_file_extent_disk_bytenr(leaf, fi))
2120 return 0;
2121
2122 if (key.offset == end)
2123 return 1;
2124 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
2125 return 1;
2126 return 0;
2127}
2128
2129static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
2130 struct btrfs_path *path, u64 offset, u64 end)
2131{
2132 struct btrfs_root *root = BTRFS_I(inode)->root;
2133 struct extent_buffer *leaf;
2134 struct btrfs_file_extent_item *fi;
2135 struct extent_map *hole_em;
2136 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2137 struct btrfs_key key;
2138 int ret;
2139
2140 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
2141 goto out;
2142
2143 key.objectid = btrfs_ino(inode);
2144 key.type = BTRFS_EXTENT_DATA_KEY;
2145 key.offset = offset;
2146
2147 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2148 if (ret < 0)
2149 return ret;
2150 BUG_ON(!ret);
2151
2152 leaf = path->nodes[0];
2153 if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
2154 u64 num_bytes;
2155
2156 path->slots[0]--;
2157 fi = btrfs_item_ptr(leaf, path->slots[0],
2158 struct btrfs_file_extent_item);
2159 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
2160 end - offset;
2161 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2162 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2163 btrfs_set_file_extent_offset(leaf, fi, 0);
2164 btrfs_mark_buffer_dirty(leaf);
2165 goto out;
2166 }
2167
2168 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2169 u64 num_bytes;
2170
2171 key.offset = offset;
2172 btrfs_set_item_key_safe(root, path, &key);
2173 fi = btrfs_item_ptr(leaf, path->slots[0],
2174 struct btrfs_file_extent_item);
2175 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
2176 offset;
2177 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2178 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2179 btrfs_set_file_extent_offset(leaf, fi, 0);
2180 btrfs_mark_buffer_dirty(leaf);
2181 goto out;
2182 }
2183 btrfs_release_path(path);
2184
2185 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
2186 0, 0, end - offset, 0, end - offset,
2187 0, 0, 0);
2188 if (ret)
2189 return ret;
2190
2191out:
2192 btrfs_release_path(path);
2193
2194 hole_em = alloc_extent_map();
2195 if (!hole_em) {
2196 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2197 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2198 &BTRFS_I(inode)->runtime_flags);
2199 } else {
2200 hole_em->start = offset;
2201 hole_em->len = end - offset;
2202 hole_em->ram_bytes = hole_em->len;
2203 hole_em->orig_start = offset;
2204
2205 hole_em->block_start = EXTENT_MAP_HOLE;
2206 hole_em->block_len = 0;
2207 hole_em->orig_block_len = 0;
2208 hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
2209 hole_em->compress_type = BTRFS_COMPRESS_NONE;
2210 hole_em->generation = trans->transid;
2211
2212 do {
2213 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2214 write_lock(&em_tree->lock);
2215 ret = add_extent_mapping(em_tree, hole_em, 1);
2216 write_unlock(&em_tree->lock);
2217 } while (ret == -EEXIST);
2218 free_extent_map(hole_em);
2219 if (ret)
2220 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2221 &BTRFS_I(inode)->runtime_flags);
2222 }
2223
2224 return 0;
2225}
2226
2227
2228
2229
2230
2231
2232
2233static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2234{
2235 struct extent_map *em;
2236 int ret = 0;
2237
2238 em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
2239 if (IS_ERR_OR_NULL(em)) {
2240 if (!em)
2241 ret = -ENOMEM;
2242 else
2243 ret = PTR_ERR(em);
2244 return ret;
2245 }
2246
2247
2248 if (em->block_start == EXTENT_MAP_HOLE) {
2249 ret = 1;
2250 *len = em->start + em->len > *start + *len ?
2251 0 : *start + *len - em->start - em->len;
2252 *start = em->start + em->len;
2253 }
2254 free_extent_map(em);
2255 return ret;
2256}
2257
2258static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2259{
2260 struct btrfs_root *root = BTRFS_I(inode)->root;
2261 struct extent_state *cached_state = NULL;
2262 struct btrfs_path *path;
2263 struct btrfs_block_rsv *rsv;
2264 struct btrfs_trans_handle *trans;
2265 u64 lockstart;
2266 u64 lockend;
2267 u64 tail_start;
2268 u64 tail_len;
2269 u64 orig_start = offset;
2270 u64 cur_offset;
2271 u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
2272 u64 drop_end;
2273 int ret = 0;
2274 int err = 0;
2275 int rsv_count;
2276 bool same_page;
2277 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2278 u64 ino_size;
2279
2280 ret = btrfs_wait_ordered_range(inode, offset, len);
2281 if (ret)
2282 return ret;
2283
2284 mutex_lock(&inode->i_mutex);
2285 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
2286 ret = find_first_non_hole(inode, &offset, &len);
2287 if (ret < 0)
2288 goto out_only_mutex;
2289 if (ret && !len) {
2290
2291 ret = 0;
2292 goto out_only_mutex;
2293 }
2294
2295 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
2296 lockend = round_down(offset + len,
2297 BTRFS_I(inode)->root->sectorsize) - 1;
2298 same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2299 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309 if (same_page && len < PAGE_CACHE_SIZE) {
2310 if (offset < ino_size)
2311 ret = btrfs_truncate_page(inode, offset, len, 0);
2312 goto out_only_mutex;
2313 }
2314
2315
2316 if (offset < ino_size) {
2317 ret = btrfs_truncate_page(inode, offset, 0, 0);
2318 if (ret) {
2319 mutex_unlock(&inode->i_mutex);
2320 return ret;
2321 }
2322 }
2323
2324
2325
2326
2327
2328 if (offset == orig_start) {
2329
2330 len = offset + len - lockstart;
2331 offset = lockstart;
2332 ret = find_first_non_hole(inode, &offset, &len);
2333 if (ret < 0)
2334 goto out_only_mutex;
2335 if (ret && !len) {
2336 ret = 0;
2337 goto out_only_mutex;
2338 }
2339 lockstart = offset;
2340 }
2341
2342
2343 tail_start = lockend + 1;
2344 tail_len = offset + len - tail_start;
2345 if (tail_len) {
2346 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2347 if (unlikely(ret < 0))
2348 goto out_only_mutex;
2349 if (!ret) {
2350
2351 if (tail_start + tail_len < ino_size) {
2352 ret = btrfs_truncate_page(inode,
2353 tail_start + tail_len, 0, 1);
2354 if (ret)
2355 goto out_only_mutex;
2356 }
2357 }
2358 }
2359
2360 if (lockend < lockstart) {
2361 mutex_unlock(&inode->i_mutex);
2362 return 0;
2363 }
2364
2365 while (1) {
2366 struct btrfs_ordered_extent *ordered;
2367
2368 truncate_pagecache_range(inode, lockstart, lockend);
2369
2370 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2371 0, &cached_state);
2372 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
2373
2374
2375
2376
2377
2378
2379 if ((!ordered ||
2380 (ordered->file_offset + ordered->len <= lockstart ||
2381 ordered->file_offset > lockend)) &&
2382 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2383 if (ordered)
2384 btrfs_put_ordered_extent(ordered);
2385 break;
2386 }
2387 if (ordered)
2388 btrfs_put_ordered_extent(ordered);
2389 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2390 lockend, &cached_state, GFP_NOFS);
2391 ret = btrfs_wait_ordered_range(inode, lockstart,
2392 lockend - lockstart + 1);
2393 if (ret) {
2394 mutex_unlock(&inode->i_mutex);
2395 return ret;
2396 }
2397 }
2398
2399 path = btrfs_alloc_path();
2400 if (!path) {
2401 ret = -ENOMEM;
2402 goto out;
2403 }
2404
2405 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
2406 if (!rsv) {
2407 ret = -ENOMEM;
2408 goto out_free;
2409 }
2410 rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
2411 rsv->failfast = 1;
2412
2413
2414
2415
2416
2417
2418 rsv_count = no_holes ? 2 : 3;
2419 trans = btrfs_start_transaction(root, rsv_count);
2420 if (IS_ERR(trans)) {
2421 err = PTR_ERR(trans);
2422 goto out_free;
2423 }
2424
2425 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
2426 min_size);
2427 BUG_ON(ret);
2428 trans->block_rsv = rsv;
2429
2430 cur_offset = lockstart;
2431 len = lockend - cur_offset;
2432 while (cur_offset < lockend) {
2433 ret = __btrfs_drop_extents(trans, root, inode, path,
2434 cur_offset, lockend + 1,
2435 &drop_end, 1, 0, 0, NULL);
2436 if (ret != -ENOSPC)
2437 break;
2438
2439 trans->block_rsv = &root->fs_info->trans_block_rsv;
2440
2441 if (cur_offset < ino_size) {
2442 ret = fill_holes(trans, inode, path, cur_offset,
2443 drop_end);
2444 if (ret) {
2445 err = ret;
2446 break;
2447 }
2448 }
2449
2450 cur_offset = drop_end;
2451
2452 ret = btrfs_update_inode(trans, root, inode);
2453 if (ret) {
2454 err = ret;
2455 break;
2456 }
2457
2458 btrfs_end_transaction(trans, root);
2459 btrfs_btree_balance_dirty(root);
2460
2461 trans = btrfs_start_transaction(root, rsv_count);
2462 if (IS_ERR(trans)) {
2463 ret = PTR_ERR(trans);
2464 trans = NULL;
2465 break;
2466 }
2467
2468 ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
2469 rsv, min_size);
2470 BUG_ON(ret);
2471 trans->block_rsv = rsv;
2472
2473 ret = find_first_non_hole(inode, &cur_offset, &len);
2474 if (unlikely(ret < 0))
2475 break;
2476 if (ret && !len) {
2477 ret = 0;
2478 break;
2479 }
2480 }
2481
2482 if (ret) {
2483 err = ret;
2484 goto out_trans;
2485 }
2486
2487 trans->block_rsv = &root->fs_info->trans_block_rsv;
2488
2489
2490
2491
2492
2493 if (cur_offset < ino_size && cur_offset < drop_end) {
2494 ret = fill_holes(trans, inode, path, cur_offset, drop_end);
2495 if (ret) {
2496 err = ret;
2497 goto out_trans;
2498 }
2499 }
2500
2501out_trans:
2502 if (!trans)
2503 goto out_free;
2504
2505 inode_inc_iversion(inode);
2506 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2507
2508 trans->block_rsv = &root->fs_info->trans_block_rsv;
2509 ret = btrfs_update_inode(trans, root, inode);
2510 btrfs_end_transaction(trans, root);
2511 btrfs_btree_balance_dirty(root);
2512out_free:
2513 btrfs_free_path(path);
2514 btrfs_free_block_rsv(root, rsv);
2515out:
2516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2517 &cached_state, GFP_NOFS);
2518out_only_mutex:
2519 mutex_unlock(&inode->i_mutex);
2520 if (ret && !err)
2521 err = ret;
2522 return err;
2523}
2524
2525static long btrfs_fallocate(struct file *file, int mode,
2526 loff_t offset, loff_t len)
2527{
2528 struct inode *inode = file_inode(file);
2529 struct extent_state *cached_state = NULL;
2530 struct btrfs_root *root = BTRFS_I(inode)->root;
2531 u64 cur_offset;
2532 u64 last_byte;
2533 u64 alloc_start;
2534 u64 alloc_end;
2535 u64 alloc_hint = 0;
2536 u64 locked_end;
2537 struct extent_map *em;
2538 int blocksize = BTRFS_I(inode)->root->sectorsize;
2539 int ret;
2540
2541 alloc_start = round_down(offset, blocksize);
2542 alloc_end = round_up(offset + len, blocksize);
2543
2544
2545 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2546 return -EOPNOTSUPP;
2547
2548 if (mode & FALLOC_FL_PUNCH_HOLE)
2549 return btrfs_punch_hole(inode, offset, len);
2550
2551
2552
2553
2554
2555 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
2556 if (ret)
2557 return ret;
2558 if (root->fs_info->quota_enabled) {
2559 ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
2560 if (ret)
2561 goto out_reserve_fail;
2562 }
2563
2564 mutex_lock(&inode->i_mutex);
2565 ret = inode_newsize_ok(inode, alloc_end);
2566 if (ret)
2567 goto out;
2568
2569 if (alloc_start > inode->i_size) {
2570 ret = btrfs_cont_expand(inode, i_size_read(inode),
2571 alloc_start);
2572 if (ret)
2573 goto out;
2574 } else {
2575
2576
2577
2578
2579
2580 ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
2581 if (ret)
2582 goto out;
2583 }
2584
2585
2586
2587
2588
2589 ret = btrfs_wait_ordered_range(inode, alloc_start,
2590 alloc_end - alloc_start);
2591 if (ret)
2592 goto out;
2593
2594 locked_end = alloc_end - 1;
2595 while (1) {
2596 struct btrfs_ordered_extent *ordered;
2597
2598
2599
2600
2601 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
2602 locked_end, 0, &cached_state);
2603 ordered = btrfs_lookup_first_ordered_extent(inode,
2604 alloc_end - 1);
2605 if (ordered &&
2606 ordered->file_offset + ordered->len > alloc_start &&
2607 ordered->file_offset < alloc_end) {
2608 btrfs_put_ordered_extent(ordered);
2609 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2610 alloc_start, locked_end,
2611 &cached_state, GFP_NOFS);
2612
2613
2614
2615
2616 ret = btrfs_wait_ordered_range(inode, alloc_start,
2617 alloc_end - alloc_start);
2618 if (ret)
2619 goto out;
2620 } else {
2621 if (ordered)
2622 btrfs_put_ordered_extent(ordered);
2623 break;
2624 }
2625 }
2626
2627 cur_offset = alloc_start;
2628 while (1) {
2629 u64 actual_end;
2630
2631 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
2632 alloc_end - cur_offset, 0);
2633 if (IS_ERR_OR_NULL(em)) {
2634 if (!em)
2635 ret = -ENOMEM;
2636 else
2637 ret = PTR_ERR(em);
2638 break;
2639 }
2640 last_byte = min(extent_map_end(em), alloc_end);
2641 actual_end = min_t(u64, extent_map_end(em), offset + len);
2642 last_byte = ALIGN(last_byte, blocksize);
2643
2644 if (em->block_start == EXTENT_MAP_HOLE ||
2645 (cur_offset >= inode->i_size &&
2646 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
2647 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
2648 last_byte - cur_offset,
2649 1 << inode->i_blkbits,
2650 offset + len,
2651 &alloc_hint);
2652
2653 if (ret < 0) {
2654 free_extent_map(em);
2655 break;
2656 }
2657 } else if (actual_end > inode->i_size &&
2658 !(mode & FALLOC_FL_KEEP_SIZE)) {
2659
2660
2661
2662
2663
2664 inode->i_ctime = CURRENT_TIME;
2665 i_size_write(inode, actual_end);
2666 btrfs_ordered_update_i_size(inode, actual_end, NULL);
2667 }
2668 free_extent_map(em);
2669
2670 cur_offset = last_byte;
2671 if (cur_offset >= alloc_end) {
2672 ret = 0;
2673 break;
2674 }
2675 }
2676 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2677 &cached_state, GFP_NOFS);
2678out:
2679 mutex_unlock(&inode->i_mutex);
2680 if (root->fs_info->quota_enabled)
2681 btrfs_qgroup_free(root, alloc_end - alloc_start);
2682out_reserve_fail:
2683
2684 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
2685 return ret;
2686}
2687
2688static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2689{
2690 struct btrfs_root *root = BTRFS_I(inode)->root;
2691 struct extent_map *em = NULL;
2692 struct extent_state *cached_state = NULL;
2693 u64 lockstart;
2694 u64 lockend;
2695 u64 start;
2696 u64 len;
2697 int ret = 0;
2698
2699 if (inode->i_size == 0)
2700 return -ENXIO;
2701
2702
2703
2704
2705
2706 start = max_t(loff_t, 0, *offset);
2707
2708 lockstart = round_down(start, root->sectorsize);
2709 lockend = round_up(i_size_read(inode), root->sectorsize);
2710 if (lockend <= lockstart)
2711 lockend = lockstart + root->sectorsize;
2712 lockend--;
2713 len = lockend - lockstart + 1;
2714
2715 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
2716 &cached_state);
2717
2718 while (start < inode->i_size) {
2719 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
2720 if (IS_ERR(em)) {
2721 ret = PTR_ERR(em);
2722 em = NULL;
2723 break;
2724 }
2725
2726 if (whence == SEEK_HOLE &&
2727 (em->block_start == EXTENT_MAP_HOLE ||
2728 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2729 break;
2730 else if (whence == SEEK_DATA &&
2731 (em->block_start != EXTENT_MAP_HOLE &&
2732 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
2733 break;
2734
2735 start = em->start + em->len;
2736 free_extent_map(em);
2737 em = NULL;
2738 cond_resched();
2739 }
2740 free_extent_map(em);
2741 if (!ret) {
2742 if (whence == SEEK_DATA && start >= inode->i_size)
2743 ret = -ENXIO;
2744 else
2745 *offset = min_t(loff_t, start, inode->i_size);
2746 }
2747 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2748 &cached_state, GFP_NOFS);
2749 return ret;
2750}
2751
2752static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
2753{
2754 struct inode *inode = file->f_mapping->host;
2755 int ret;
2756
2757 mutex_lock(&inode->i_mutex);
2758 switch (whence) {
2759 case SEEK_END:
2760 case SEEK_CUR:
2761 offset = generic_file_llseek(file, offset, whence);
2762 goto out;
2763 case SEEK_DATA:
2764 case SEEK_HOLE:
2765 if (offset >= i_size_read(inode)) {
2766 mutex_unlock(&inode->i_mutex);
2767 return -ENXIO;
2768 }
2769
2770 ret = find_desired_extent(inode, &offset, whence);
2771 if (ret) {
2772 mutex_unlock(&inode->i_mutex);
2773 return ret;
2774 }
2775 }
2776
2777 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
2778out:
2779 mutex_unlock(&inode->i_mutex);
2780 return offset;
2781}
2782
2783const struct file_operations btrfs_file_operations = {
2784 .llseek = btrfs_file_llseek,
2785 .read = new_sync_read,
2786 .write = new_sync_write,
2787 .read_iter = generic_file_read_iter,
2788 .splice_read = generic_file_splice_read,
2789 .write_iter = btrfs_file_write_iter,
2790 .mmap = btrfs_file_mmap,
2791 .open = generic_file_open,
2792 .release = btrfs_release_file,
2793 .fsync = btrfs_sync_file,
2794 .fallocate = btrfs_fallocate,
2795 .unlocked_ioctl = btrfs_ioctl,
2796#ifdef CONFIG_COMPAT
2797 .compat_ioctl = btrfs_ioctl,
2798#endif
2799};
2800
2801void btrfs_auto_defrag_exit(void)
2802{
2803 if (btrfs_inode_defrag_cachep)
2804 kmem_cache_destroy(btrfs_inode_defrag_cachep);
2805}
2806
2807int btrfs_auto_defrag_init(void)
2808{
2809 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
2810 sizeof(struct inode_defrag), 0,
2811 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
2812 NULL);
2813 if (!btrfs_inode_defrag_cachep)
2814 return -ENOMEM;
2815
2816 return 0;
2817}
2818
2819int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
2820{
2821 int ret;
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
2838 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
2839 &BTRFS_I(inode)->runtime_flags))
2840 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
2841
2842 return ret;
2843}
2844