1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/backing-dev.h>
26#include <linux/mpage.h>
27#include <linux/falloc.h>
28#include <linux/swap.h>
29#include <linux/writeback.h>
30#include <linux/compat.h>
31#include <linux/slab.h>
32#include <linux/btrfs.h>
33#include <linux/uio.h>
34#include "ctree.h"
35#include "disk-io.h"
36#include "transaction.h"
37#include "btrfs_inode.h"
38#include "print-tree.h"
39#include "tree-log.h"
40#include "locking.h"
41#include "volumes.h"
42#include "qgroup.h"
43#include "compression.h"
44
45static struct kmem_cache *btrfs_inode_defrag_cachep;
46
47
48
49
50
51struct inode_defrag {
52 struct rb_node rb_node;
53
54 u64 ino;
55
56
57
58
59 u64 transid;
60
61
62 u64 root;
63
64
65 u64 last_offset;
66
67
68 int cycled;
69};
70
71static int __compare_inode_defrag(struct inode_defrag *defrag1,
72 struct inode_defrag *defrag2)
73{
74 if (defrag1->root > defrag2->root)
75 return 1;
76 else if (defrag1->root < defrag2->root)
77 return -1;
78 else if (defrag1->ino > defrag2->ino)
79 return 1;
80 else if (defrag1->ino < defrag2->ino)
81 return -1;
82 else
83 return 0;
84}
85
86
87
88
89
90
91
92
93
94
95static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
96 struct inode_defrag *defrag)
97{
98 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
99 struct inode_defrag *entry;
100 struct rb_node **p;
101 struct rb_node *parent = NULL;
102 int ret;
103
104 p = &fs_info->defrag_inodes.rb_node;
105 while (*p) {
106 parent = *p;
107 entry = rb_entry(parent, struct inode_defrag, rb_node);
108
109 ret = __compare_inode_defrag(defrag, entry);
110 if (ret < 0)
111 p = &parent->rb_left;
112 else if (ret > 0)
113 p = &parent->rb_right;
114 else {
115
116
117
118
119 if (defrag->transid < entry->transid)
120 entry->transid = defrag->transid;
121 if (defrag->last_offset > entry->last_offset)
122 entry->last_offset = defrag->last_offset;
123 return -EEXIST;
124 }
125 }
126 set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
127 rb_link_node(&defrag->rb_node, parent, p);
128 rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
129 return 0;
130}
131
132static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
133{
134 if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
135 return 0;
136
137 if (btrfs_fs_closing(fs_info))
138 return 0;
139
140 return 1;
141}
142
143
144
145
146
147int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
148 struct btrfs_inode *inode)
149{
150 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
151 struct btrfs_root *root = inode->root;
152 struct inode_defrag *defrag;
153 u64 transid;
154 int ret;
155
156 if (!__need_auto_defrag(fs_info))
157 return 0;
158
159 if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
160 return 0;
161
162 if (trans)
163 transid = trans->transid;
164 else
165 transid = inode->root->last_trans;
166
167 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
168 if (!defrag)
169 return -ENOMEM;
170
171 defrag->ino = btrfs_ino(inode);
172 defrag->transid = transid;
173 defrag->root = root->root_key.objectid;
174
175 spin_lock(&fs_info->defrag_inodes_lock);
176 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
177
178
179
180
181
182 ret = __btrfs_add_inode_defrag(inode, defrag);
183 if (ret)
184 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
185 } else {
186 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
187 }
188 spin_unlock(&fs_info->defrag_inodes_lock);
189 return 0;
190}
191
192
193
194
195
196
197static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
198 struct inode_defrag *defrag)
199{
200 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
201 int ret;
202
203 if (!__need_auto_defrag(fs_info))
204 goto out;
205
206
207
208
209
210 spin_lock(&fs_info->defrag_inodes_lock);
211 ret = __btrfs_add_inode_defrag(inode, defrag);
212 spin_unlock(&fs_info->defrag_inodes_lock);
213 if (ret)
214 goto out;
215 return;
216out:
217 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
218}
219
220
221
222
223
224static struct inode_defrag *
225btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
226{
227 struct inode_defrag *entry = NULL;
228 struct inode_defrag tmp;
229 struct rb_node *p;
230 struct rb_node *parent = NULL;
231 int ret;
232
233 tmp.ino = ino;
234 tmp.root = root;
235
236 spin_lock(&fs_info->defrag_inodes_lock);
237 p = fs_info->defrag_inodes.rb_node;
238 while (p) {
239 parent = p;
240 entry = rb_entry(parent, struct inode_defrag, rb_node);
241
242 ret = __compare_inode_defrag(&tmp, entry);
243 if (ret < 0)
244 p = parent->rb_left;
245 else if (ret > 0)
246 p = parent->rb_right;
247 else
248 goto out;
249 }
250
251 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
252 parent = rb_next(parent);
253 if (parent)
254 entry = rb_entry(parent, struct inode_defrag, rb_node);
255 else
256 entry = NULL;
257 }
258out:
259 if (entry)
260 rb_erase(parent, &fs_info->defrag_inodes);
261 spin_unlock(&fs_info->defrag_inodes_lock);
262 return entry;
263}
264
265void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
266{
267 struct inode_defrag *defrag;
268 struct rb_node *node;
269
270 spin_lock(&fs_info->defrag_inodes_lock);
271 node = rb_first(&fs_info->defrag_inodes);
272 while (node) {
273 rb_erase(node, &fs_info->defrag_inodes);
274 defrag = rb_entry(node, struct inode_defrag, rb_node);
275 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
276
277 cond_resched_lock(&fs_info->defrag_inodes_lock);
278
279 node = rb_first(&fs_info->defrag_inodes);
280 }
281 spin_unlock(&fs_info->defrag_inodes_lock);
282}
283
284#define BTRFS_DEFRAG_BATCH 1024
285
286static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
287 struct inode_defrag *defrag)
288{
289 struct btrfs_root *inode_root;
290 struct inode *inode;
291 struct btrfs_key key;
292 struct btrfs_ioctl_defrag_range_args range;
293 int num_defrag;
294 int index;
295 int ret;
296
297
298 key.objectid = defrag->root;
299 key.type = BTRFS_ROOT_ITEM_KEY;
300 key.offset = (u64)-1;
301
302 index = srcu_read_lock(&fs_info->subvol_srcu);
303
304 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
305 if (IS_ERR(inode_root)) {
306 ret = PTR_ERR(inode_root);
307 goto cleanup;
308 }
309
310 key.objectid = defrag->ino;
311 key.type = BTRFS_INODE_ITEM_KEY;
312 key.offset = 0;
313 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
314 if (IS_ERR(inode)) {
315 ret = PTR_ERR(inode);
316 goto cleanup;
317 }
318 srcu_read_unlock(&fs_info->subvol_srcu, index);
319
320
321 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
322 memset(&range, 0, sizeof(range));
323 range.len = (u64)-1;
324 range.start = defrag->last_offset;
325
326 sb_start_write(fs_info->sb);
327 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
328 BTRFS_DEFRAG_BATCH);
329 sb_end_write(fs_info->sb);
330
331
332
333
334
335 if (num_defrag == BTRFS_DEFRAG_BATCH) {
336 defrag->last_offset = range.start;
337 btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
338 } else if (defrag->last_offset && !defrag->cycled) {
339
340
341
342
343
344 defrag->last_offset = 0;
345 defrag->cycled = 1;
346 btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
347 } else {
348 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
349 }
350
351 iput(inode);
352 return 0;
353cleanup:
354 srcu_read_unlock(&fs_info->subvol_srcu, index);
355 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
356 return ret;
357}
358
359
360
361
362
363int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
364{
365 struct inode_defrag *defrag;
366 u64 first_ino = 0;
367 u64 root_objectid = 0;
368
369 atomic_inc(&fs_info->defrag_running);
370 while (1) {
371
372 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
373 &fs_info->fs_state))
374 break;
375
376 if (!__need_auto_defrag(fs_info))
377 break;
378
379
380 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
381 first_ino);
382 if (!defrag) {
383 if (root_objectid || first_ino) {
384 root_objectid = 0;
385 first_ino = 0;
386 continue;
387 } else {
388 break;
389 }
390 }
391
392 first_ino = defrag->ino + 1;
393 root_objectid = defrag->root;
394
395 __btrfs_run_defrag_inode(fs_info, defrag);
396 }
397 atomic_dec(&fs_info->defrag_running);
398
399
400
401
402
403 wake_up(&fs_info->transaction_wait);
404 return 0;
405}
406
407
408
409
410static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
411 struct page **prepared_pages,
412 struct iov_iter *i)
413{
414 size_t copied = 0;
415 size_t total_copied = 0;
416 int pg = 0;
417 int offset = pos & (PAGE_SIZE - 1);
418
419 while (write_bytes > 0) {
420 size_t count = min_t(size_t,
421 PAGE_SIZE - offset, write_bytes);
422 struct page *page = prepared_pages[pg];
423
424
425
426 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
427
428
429 flush_dcache_page(page);
430
431
432
433
434
435
436
437
438
439
440 if (!PageUptodate(page) && copied < count)
441 copied = 0;
442
443 iov_iter_advance(i, copied);
444 write_bytes -= copied;
445 total_copied += copied;
446
447
448 if (unlikely(copied == 0))
449 break;
450
451 if (copied < PAGE_SIZE - offset) {
452 offset += copied;
453 } else {
454 pg++;
455 offset = 0;
456 }
457 }
458 return total_copied;
459}
460
461
462
463
464static void btrfs_drop_pages(struct page **pages, size_t num_pages)
465{
466 size_t i;
467 for (i = 0; i < num_pages; i++) {
468
469
470
471
472
473
474 ClearPageChecked(pages[i]);
475 unlock_page(pages[i]);
476 put_page(pages[i]);
477 }
478}
479
480static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
481 const u64 start,
482 const u64 len,
483 struct extent_state **cached_state)
484{
485 u64 search_start = start;
486 const u64 end = start + len - 1;
487
488 while (search_start < end) {
489 const u64 search_len = end - search_start + 1;
490 struct extent_map *em;
491 u64 em_len;
492 int ret = 0;
493
494 em = btrfs_get_extent(inode, NULL, 0, search_start,
495 search_len, 0);
496 if (IS_ERR(em))
497 return PTR_ERR(em);
498
499 if (em->block_start != EXTENT_MAP_HOLE)
500 goto next;
501
502 em_len = em->len;
503 if (em->start < search_start)
504 em_len -= search_start - em->start;
505 if (em_len > search_len)
506 em_len = search_len;
507
508 ret = set_extent_bit(&inode->io_tree, search_start,
509 search_start + em_len - 1,
510 EXTENT_DELALLOC_NEW,
511 NULL, cached_state, GFP_NOFS);
512next:
513 search_start = extent_map_end(em);
514 free_extent_map(em);
515 if (ret)
516 return ret;
517 }
518 return 0;
519}
520
521
522
523
524
525
526
527
528
529int btrfs_dirty_pages(struct inode *inode, struct page **pages,
530 size_t num_pages, loff_t pos, size_t write_bytes,
531 struct extent_state **cached)
532{
533 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
534 int err = 0;
535 int i;
536 u64 num_bytes;
537 u64 start_pos;
538 u64 end_of_last_block;
539 u64 end_pos = pos + write_bytes;
540 loff_t isize = i_size_read(inode);
541 unsigned int extra_bits = 0;
542
543 start_pos = pos & ~((u64) fs_info->sectorsize - 1);
544 num_bytes = round_up(write_bytes + pos - start_pos,
545 fs_info->sectorsize);
546
547 end_of_last_block = start_pos + num_bytes - 1;
548
549 if (!btrfs_is_free_space_inode(BTRFS_I(inode))) {
550 if (start_pos >= isize &&
551 !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) {
552
553
554
555
556
557 extra_bits |= EXTENT_DELALLOC_NEW;
558 } else {
559 err = btrfs_find_new_delalloc_bytes(BTRFS_I(inode),
560 start_pos,
561 num_bytes, cached);
562 if (err)
563 return err;
564 }
565 }
566
567 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
568 extra_bits, cached, 0);
569 if (err)
570 return err;
571
572 for (i = 0; i < num_pages; i++) {
573 struct page *p = pages[i];
574 SetPageUptodate(p);
575 ClearPageChecked(p);
576 set_page_dirty(p);
577 }
578
579
580
581
582
583
584 if (end_pos > isize)
585 i_size_write(inode, end_pos);
586 return 0;
587}
588
589
590
591
592
593void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
594 int skip_pinned)
595{
596 struct extent_map *em;
597 struct extent_map *split = NULL;
598 struct extent_map *split2 = NULL;
599 struct extent_map_tree *em_tree = &inode->extent_tree;
600 u64 len = end - start + 1;
601 u64 gen;
602 int ret;
603 int testend = 1;
604 unsigned long flags;
605 int compressed = 0;
606 bool modified;
607
608 WARN_ON(end < start);
609 if (end == (u64)-1) {
610 len = (u64)-1;
611 testend = 0;
612 }
613 while (1) {
614 int no_splits = 0;
615
616 modified = false;
617 if (!split)
618 split = alloc_extent_map();
619 if (!split2)
620 split2 = alloc_extent_map();
621 if (!split || !split2)
622 no_splits = 1;
623
624 write_lock(&em_tree->lock);
625 em = lookup_extent_mapping(em_tree, start, len);
626 if (!em) {
627 write_unlock(&em_tree->lock);
628 break;
629 }
630 flags = em->flags;
631 gen = em->generation;
632 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
633 if (testend && em->start + em->len >= start + len) {
634 free_extent_map(em);
635 write_unlock(&em_tree->lock);
636 break;
637 }
638 start = em->start + em->len;
639 if (testend)
640 len = start + len - (em->start + em->len);
641 free_extent_map(em);
642 write_unlock(&em_tree->lock);
643 continue;
644 }
645 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
646 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
647 clear_bit(EXTENT_FLAG_LOGGING, &flags);
648 modified = !list_empty(&em->list);
649 if (no_splits)
650 goto next;
651
652 if (em->start < start) {
653 split->start = em->start;
654 split->len = start - em->start;
655
656 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
657 split->orig_start = em->orig_start;
658 split->block_start = em->block_start;
659
660 if (compressed)
661 split->block_len = em->block_len;
662 else
663 split->block_len = split->len;
664 split->orig_block_len = max(split->block_len,
665 em->orig_block_len);
666 split->ram_bytes = em->ram_bytes;
667 } else {
668 split->orig_start = split->start;
669 split->block_len = 0;
670 split->block_start = em->block_start;
671 split->orig_block_len = 0;
672 split->ram_bytes = split->len;
673 }
674
675 split->generation = gen;
676 split->bdev = em->bdev;
677 split->flags = flags;
678 split->compress_type = em->compress_type;
679 replace_extent_mapping(em_tree, em, split, modified);
680 free_extent_map(split);
681 split = split2;
682 split2 = NULL;
683 }
684 if (testend && em->start + em->len > start + len) {
685 u64 diff = start + len - em->start;
686
687 split->start = start + len;
688 split->len = em->start + em->len - (start + len);
689 split->bdev = em->bdev;
690 split->flags = flags;
691 split->compress_type = em->compress_type;
692 split->generation = gen;
693
694 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
695 split->orig_block_len = max(em->block_len,
696 em->orig_block_len);
697
698 split->ram_bytes = em->ram_bytes;
699 if (compressed) {
700 split->block_len = em->block_len;
701 split->block_start = em->block_start;
702 split->orig_start = em->orig_start;
703 } else {
704 split->block_len = split->len;
705 split->block_start = em->block_start
706 + diff;
707 split->orig_start = em->orig_start;
708 }
709 } else {
710 split->ram_bytes = split->len;
711 split->orig_start = split->start;
712 split->block_len = 0;
713 split->block_start = em->block_start;
714 split->orig_block_len = 0;
715 }
716
717 if (extent_map_in_tree(em)) {
718 replace_extent_mapping(em_tree, em, split,
719 modified);
720 } else {
721 ret = add_extent_mapping(em_tree, split,
722 modified);
723 ASSERT(ret == 0);
724 }
725 free_extent_map(split);
726 split = NULL;
727 }
728next:
729 if (extent_map_in_tree(em))
730 remove_extent_mapping(em_tree, em);
731 write_unlock(&em_tree->lock);
732
733
734 free_extent_map(em);
735
736 free_extent_map(em);
737 }
738 if (split)
739 free_extent_map(split);
740 if (split2)
741 free_extent_map(split2);
742}
743
744
745
746
747
748
749
750
751
752
753int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
754 struct btrfs_root *root, struct inode *inode,
755 struct btrfs_path *path, u64 start, u64 end,
756 u64 *drop_end, int drop_cache,
757 int replace_extent,
758 u32 extent_item_size,
759 int *key_inserted)
760{
761 struct btrfs_fs_info *fs_info = root->fs_info;
762 struct extent_buffer *leaf;
763 struct btrfs_file_extent_item *fi;
764 struct btrfs_key key;
765 struct btrfs_key new_key;
766 u64 ino = btrfs_ino(BTRFS_I(inode));
767 u64 search_start = start;
768 u64 disk_bytenr = 0;
769 u64 num_bytes = 0;
770 u64 extent_offset = 0;
771 u64 extent_end = 0;
772 u64 last_end = start;
773 int del_nr = 0;
774 int del_slot = 0;
775 int extent_type;
776 int recow;
777 int ret;
778 int modify_tree = -1;
779 int update_refs;
780 int found = 0;
781 int leafs_visited = 0;
782
783 if (drop_cache)
784 btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
785
786 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
787 modify_tree = 0;
788
789 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
790 root == fs_info->tree_root);
791 while (1) {
792 recow = 0;
793 ret = btrfs_lookup_file_extent(trans, root, path, ino,
794 search_start, modify_tree);
795 if (ret < 0)
796 break;
797 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
798 leaf = path->nodes[0];
799 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
800 if (key.objectid == ino &&
801 key.type == BTRFS_EXTENT_DATA_KEY)
802 path->slots[0]--;
803 }
804 ret = 0;
805 leafs_visited++;
806next_slot:
807 leaf = path->nodes[0];
808 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
809 BUG_ON(del_nr > 0);
810 ret = btrfs_next_leaf(root, path);
811 if (ret < 0)
812 break;
813 if (ret > 0) {
814 ret = 0;
815 break;
816 }
817 leafs_visited++;
818 leaf = path->nodes[0];
819 recow = 1;
820 }
821
822 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
823
824 if (key.objectid > ino)
825 break;
826 if (WARN_ON_ONCE(key.objectid < ino) ||
827 key.type < BTRFS_EXTENT_DATA_KEY) {
828 ASSERT(del_nr == 0);
829 path->slots[0]++;
830 goto next_slot;
831 }
832 if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
833 break;
834
835 fi = btrfs_item_ptr(leaf, path->slots[0],
836 struct btrfs_file_extent_item);
837 extent_type = btrfs_file_extent_type(leaf, fi);
838
839 if (extent_type == BTRFS_FILE_EXTENT_REG ||
840 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
841 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
842 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
843 extent_offset = btrfs_file_extent_offset(leaf, fi);
844 extent_end = key.offset +
845 btrfs_file_extent_num_bytes(leaf, fi);
846 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
847 extent_end = key.offset +
848 btrfs_file_extent_inline_len(leaf,
849 path->slots[0], fi);
850 } else {
851
852 BUG();
853 }
854
855
856
857
858
859
860
861
862
863
864 if (extent_end == key.offset && extent_end >= search_start) {
865 last_end = extent_end;
866 goto delete_extent_item;
867 }
868
869 if (extent_end <= search_start) {
870 path->slots[0]++;
871 goto next_slot;
872 }
873
874 found = 1;
875 search_start = max(key.offset, start);
876 if (recow || !modify_tree) {
877 modify_tree = -1;
878 btrfs_release_path(path);
879 continue;
880 }
881
882
883
884
885
886 if (start > key.offset && end < extent_end) {
887 BUG_ON(del_nr > 0);
888 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
889 ret = -EOPNOTSUPP;
890 break;
891 }
892
893 memcpy(&new_key, &key, sizeof(new_key));
894 new_key.offset = start;
895 ret = btrfs_duplicate_item(trans, root, path,
896 &new_key);
897 if (ret == -EAGAIN) {
898 btrfs_release_path(path);
899 continue;
900 }
901 if (ret < 0)
902 break;
903
904 leaf = path->nodes[0];
905 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
906 struct btrfs_file_extent_item);
907 btrfs_set_file_extent_num_bytes(leaf, fi,
908 start - key.offset);
909
910 fi = btrfs_item_ptr(leaf, path->slots[0],
911 struct btrfs_file_extent_item);
912
913 extent_offset += start - key.offset;
914 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
915 btrfs_set_file_extent_num_bytes(leaf, fi,
916 extent_end - start);
917 btrfs_mark_buffer_dirty(leaf);
918
919 if (update_refs && disk_bytenr > 0) {
920 ret = btrfs_inc_extent_ref(trans, root,
921 disk_bytenr, num_bytes, 0,
922 root->root_key.objectid,
923 new_key.objectid,
924 start - extent_offset);
925 BUG_ON(ret);
926 }
927 key.offset = start;
928 }
929
930
931
932
933 last_end = extent_end;
934
935
936
937
938
939 if (start <= key.offset && end < extent_end) {
940 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
941 ret = -EOPNOTSUPP;
942 break;
943 }
944
945 memcpy(&new_key, &key, sizeof(new_key));
946 new_key.offset = end;
947 btrfs_set_item_key_safe(fs_info, path, &new_key);
948
949 extent_offset += end - key.offset;
950 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
951 btrfs_set_file_extent_num_bytes(leaf, fi,
952 extent_end - end);
953 btrfs_mark_buffer_dirty(leaf);
954 if (update_refs && disk_bytenr > 0)
955 inode_sub_bytes(inode, end - key.offset);
956 break;
957 }
958
959 search_start = extent_end;
960
961
962
963
964 if (start > key.offset && end >= extent_end) {
965 BUG_ON(del_nr > 0);
966 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
967 ret = -EOPNOTSUPP;
968 break;
969 }
970
971 btrfs_set_file_extent_num_bytes(leaf, fi,
972 start - key.offset);
973 btrfs_mark_buffer_dirty(leaf);
974 if (update_refs && disk_bytenr > 0)
975 inode_sub_bytes(inode, extent_end - start);
976 if (end == extent_end)
977 break;
978
979 path->slots[0]++;
980 goto next_slot;
981 }
982
983
984
985
986
987 if (start <= key.offset && end >= extent_end) {
988delete_extent_item:
989 if (del_nr == 0) {
990 del_slot = path->slots[0];
991 del_nr = 1;
992 } else {
993 BUG_ON(del_slot + del_nr != path->slots[0]);
994 del_nr++;
995 }
996
997 if (update_refs &&
998 extent_type == BTRFS_FILE_EXTENT_INLINE) {
999 inode_sub_bytes(inode,
1000 extent_end - key.offset);
1001 extent_end = ALIGN(extent_end,
1002 fs_info->sectorsize);
1003 } else if (update_refs && disk_bytenr > 0) {
1004 ret = btrfs_free_extent(trans, root,
1005 disk_bytenr, num_bytes, 0,
1006 root->root_key.objectid,
1007 key.objectid, key.offset -
1008 extent_offset);
1009 BUG_ON(ret);
1010 inode_sub_bytes(inode,
1011 extent_end - key.offset);
1012 }
1013
1014 if (end == extent_end)
1015 break;
1016
1017 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
1018 path->slots[0]++;
1019 goto next_slot;
1020 }
1021
1022 ret = btrfs_del_items(trans, root, path, del_slot,
1023 del_nr);
1024 if (ret) {
1025 btrfs_abort_transaction(trans, ret);
1026 break;
1027 }
1028
1029 del_nr = 0;
1030 del_slot = 0;
1031
1032 btrfs_release_path(path);
1033 continue;
1034 }
1035
1036 BUG_ON(1);
1037 }
1038
1039 if (!ret && del_nr > 0) {
1040
1041
1042
1043
1044
1045
1046 path->slots[0] = del_slot;
1047 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1048 if (ret)
1049 btrfs_abort_transaction(trans, ret);
1050 }
1051
1052 leaf = path->nodes[0];
1053
1054
1055
1056
1057
1058 if (!ret && replace_extent && leafs_visited == 1 &&
1059 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
1060 path->locks[0] == BTRFS_WRITE_LOCK) &&
1061 btrfs_leaf_free_space(fs_info, leaf) >=
1062 sizeof(struct btrfs_item) + extent_item_size) {
1063
1064 key.objectid = ino;
1065 key.type = BTRFS_EXTENT_DATA_KEY;
1066 key.offset = start;
1067 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
1068 struct btrfs_key slot_key;
1069
1070 btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
1071 if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
1072 path->slots[0]++;
1073 }
1074 setup_items_for_insert(root, path, &key,
1075 &extent_item_size,
1076 extent_item_size,
1077 sizeof(struct btrfs_item) +
1078 extent_item_size, 1);
1079 *key_inserted = 1;
1080 }
1081
1082 if (!replace_extent || !(*key_inserted))
1083 btrfs_release_path(path);
1084 if (drop_end)
1085 *drop_end = found ? min(end, last_end) : end;
1086 return ret;
1087}
1088
1089int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1090 struct btrfs_root *root, struct inode *inode, u64 start,
1091 u64 end, int drop_cache)
1092{
1093 struct btrfs_path *path;
1094 int ret;
1095
1096 path = btrfs_alloc_path();
1097 if (!path)
1098 return -ENOMEM;
1099 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
1100 drop_cache, 0, 0, NULL);
1101 btrfs_free_path(path);
1102 return ret;
1103}
1104
1105static int extent_mergeable(struct extent_buffer *leaf, int slot,
1106 u64 objectid, u64 bytenr, u64 orig_offset,
1107 u64 *start, u64 *end)
1108{
1109 struct btrfs_file_extent_item *fi;
1110 struct btrfs_key key;
1111 u64 extent_end;
1112
1113 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1114 return 0;
1115
1116 btrfs_item_key_to_cpu(leaf, &key, slot);
1117 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
1118 return 0;
1119
1120 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1121 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
1122 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
1123 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
1124 btrfs_file_extent_compression(leaf, fi) ||
1125 btrfs_file_extent_encryption(leaf, fi) ||
1126 btrfs_file_extent_other_encoding(leaf, fi))
1127 return 0;
1128
1129 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1130 if ((*start && *start != key.offset) || (*end && *end != extent_end))
1131 return 0;
1132
1133 *start = key.offset;
1134 *end = extent_end;
1135 return 1;
1136}
1137
1138
1139
1140
1141
1142
1143
1144
1145int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1146 struct btrfs_inode *inode, u64 start, u64 end)
1147{
1148 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1149 struct btrfs_root *root = inode->root;
1150 struct extent_buffer *leaf;
1151 struct btrfs_path *path;
1152 struct btrfs_file_extent_item *fi;
1153 struct btrfs_key key;
1154 struct btrfs_key new_key;
1155 u64 bytenr;
1156 u64 num_bytes;
1157 u64 extent_end;
1158 u64 orig_offset;
1159 u64 other_start;
1160 u64 other_end;
1161 u64 split;
1162 int del_nr = 0;
1163 int del_slot = 0;
1164 int recow;
1165 int ret;
1166 u64 ino = btrfs_ino(inode);
1167
1168 path = btrfs_alloc_path();
1169 if (!path)
1170 return -ENOMEM;
1171again:
1172 recow = 0;
1173 split = start;
1174 key.objectid = ino;
1175 key.type = BTRFS_EXTENT_DATA_KEY;
1176 key.offset = split;
1177
1178 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1179 if (ret < 0)
1180 goto out;
1181 if (ret > 0 && path->slots[0] > 0)
1182 path->slots[0]--;
1183
1184 leaf = path->nodes[0];
1185 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1186 if (key.objectid != ino ||
1187 key.type != BTRFS_EXTENT_DATA_KEY) {
1188 ret = -EINVAL;
1189 btrfs_abort_transaction(trans, ret);
1190 goto out;
1191 }
1192 fi = btrfs_item_ptr(leaf, path->slots[0],
1193 struct btrfs_file_extent_item);
1194 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
1195 ret = -EINVAL;
1196 btrfs_abort_transaction(trans, ret);
1197 goto out;
1198 }
1199 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1200 if (key.offset > start || extent_end < end) {
1201 ret = -EINVAL;
1202 btrfs_abort_transaction(trans, ret);
1203 goto out;
1204 }
1205
1206 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1207 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1208 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
1209 memcpy(&new_key, &key, sizeof(new_key));
1210
1211 if (start == key.offset && end < extent_end) {
1212 other_start = 0;
1213 other_end = start;
1214 if (extent_mergeable(leaf, path->slots[0] - 1,
1215 ino, bytenr, orig_offset,
1216 &other_start, &other_end)) {
1217 new_key.offset = end;
1218 btrfs_set_item_key_safe(fs_info, path, &new_key);
1219 fi = btrfs_item_ptr(leaf, path->slots[0],
1220 struct btrfs_file_extent_item);
1221 btrfs_set_file_extent_generation(leaf, fi,
1222 trans->transid);
1223 btrfs_set_file_extent_num_bytes(leaf, fi,
1224 extent_end - end);
1225 btrfs_set_file_extent_offset(leaf, fi,
1226 end - orig_offset);
1227 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1228 struct btrfs_file_extent_item);
1229 btrfs_set_file_extent_generation(leaf, fi,
1230 trans->transid);
1231 btrfs_set_file_extent_num_bytes(leaf, fi,
1232 end - other_start);
1233 btrfs_mark_buffer_dirty(leaf);
1234 goto out;
1235 }
1236 }
1237
1238 if (start > key.offset && end == extent_end) {
1239 other_start = end;
1240 other_end = 0;
1241 if (extent_mergeable(leaf, path->slots[0] + 1,
1242 ino, bytenr, orig_offset,
1243 &other_start, &other_end)) {
1244 fi = btrfs_item_ptr(leaf, path->slots[0],
1245 struct btrfs_file_extent_item);
1246 btrfs_set_file_extent_num_bytes(leaf, fi,
1247 start - key.offset);
1248 btrfs_set_file_extent_generation(leaf, fi,
1249 trans->transid);
1250 path->slots[0]++;
1251 new_key.offset = start;
1252 btrfs_set_item_key_safe(fs_info, path, &new_key);
1253
1254 fi = btrfs_item_ptr(leaf, path->slots[0],
1255 struct btrfs_file_extent_item);
1256 btrfs_set_file_extent_generation(leaf, fi,
1257 trans->transid);
1258 btrfs_set_file_extent_num_bytes(leaf, fi,
1259 other_end - start);
1260 btrfs_set_file_extent_offset(leaf, fi,
1261 start - orig_offset);
1262 btrfs_mark_buffer_dirty(leaf);
1263 goto out;
1264 }
1265 }
1266
1267 while (start > key.offset || end < extent_end) {
1268 if (key.offset == start)
1269 split = end;
1270
1271 new_key.offset = split;
1272 ret = btrfs_duplicate_item(trans, root, path, &new_key);
1273 if (ret == -EAGAIN) {
1274 btrfs_release_path(path);
1275 goto again;
1276 }
1277 if (ret < 0) {
1278 btrfs_abort_transaction(trans, ret);
1279 goto out;
1280 }
1281
1282 leaf = path->nodes[0];
1283 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1284 struct btrfs_file_extent_item);
1285 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1286 btrfs_set_file_extent_num_bytes(leaf, fi,
1287 split - key.offset);
1288
1289 fi = btrfs_item_ptr(leaf, path->slots[0],
1290 struct btrfs_file_extent_item);
1291
1292 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1293 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
1294 btrfs_set_file_extent_num_bytes(leaf, fi,
1295 extent_end - split);
1296 btrfs_mark_buffer_dirty(leaf);
1297
1298 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
1299 0, root->root_key.objectid,
1300 ino, orig_offset);
1301 if (ret) {
1302 btrfs_abort_transaction(trans, ret);
1303 goto out;
1304 }
1305
1306 if (split == start) {
1307 key.offset = start;
1308 } else {
1309 if (start != key.offset) {
1310 ret = -EINVAL;
1311 btrfs_abort_transaction(trans, ret);
1312 goto out;
1313 }
1314 path->slots[0]--;
1315 extent_end = end;
1316 }
1317 recow = 1;
1318 }
1319
1320 other_start = end;
1321 other_end = 0;
1322 if (extent_mergeable(leaf, path->slots[0] + 1,
1323 ino, bytenr, orig_offset,
1324 &other_start, &other_end)) {
1325 if (recow) {
1326 btrfs_release_path(path);
1327 goto again;
1328 }
1329 extent_end = other_end;
1330 del_slot = path->slots[0] + 1;
1331 del_nr++;
1332 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1333 0, root->root_key.objectid,
1334 ino, orig_offset);
1335 if (ret) {
1336 btrfs_abort_transaction(trans, ret);
1337 goto out;
1338 }
1339 }
1340 other_start = 0;
1341 other_end = start;
1342 if (extent_mergeable(leaf, path->slots[0] - 1,
1343 ino, bytenr, orig_offset,
1344 &other_start, &other_end)) {
1345 if (recow) {
1346 btrfs_release_path(path);
1347 goto again;
1348 }
1349 key.offset = other_start;
1350 del_slot = path->slots[0];
1351 del_nr++;
1352 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1353 0, root->root_key.objectid,
1354 ino, orig_offset);
1355 if (ret) {
1356 btrfs_abort_transaction(trans, ret);
1357 goto out;
1358 }
1359 }
1360 if (del_nr == 0) {
1361 fi = btrfs_item_ptr(leaf, path->slots[0],
1362 struct btrfs_file_extent_item);
1363 btrfs_set_file_extent_type(leaf, fi,
1364 BTRFS_FILE_EXTENT_REG);
1365 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1366 btrfs_mark_buffer_dirty(leaf);
1367 } else {
1368 fi = btrfs_item_ptr(leaf, del_slot - 1,
1369 struct btrfs_file_extent_item);
1370 btrfs_set_file_extent_type(leaf, fi,
1371 BTRFS_FILE_EXTENT_REG);
1372 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1373 btrfs_set_file_extent_num_bytes(leaf, fi,
1374 extent_end - key.offset);
1375 btrfs_mark_buffer_dirty(leaf);
1376
1377 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1378 if (ret < 0) {
1379 btrfs_abort_transaction(trans, ret);
1380 goto out;
1381 }
1382 }
1383out:
1384 btrfs_free_path(path);
1385 return 0;
1386}
1387
1388
1389
1390
1391
1392static int prepare_uptodate_page(struct inode *inode,
1393 struct page *page, u64 pos,
1394 bool force_uptodate)
1395{
1396 int ret = 0;
1397
1398 if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
1399 !PageUptodate(page)) {
1400 ret = btrfs_readpage(NULL, page);
1401 if (ret)
1402 return ret;
1403 lock_page(page);
1404 if (!PageUptodate(page)) {
1405 unlock_page(page);
1406 return -EIO;
1407 }
1408 if (page->mapping != inode->i_mapping) {
1409 unlock_page(page);
1410 return -EAGAIN;
1411 }
1412 }
1413 return 0;
1414}
1415
1416
1417
1418
1419static noinline int prepare_pages(struct inode *inode, struct page **pages,
1420 size_t num_pages, loff_t pos,
1421 size_t write_bytes, bool force_uptodate)
1422{
1423 int i;
1424 unsigned long index = pos >> PAGE_SHIFT;
1425 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1426 int err = 0;
1427 int faili;
1428
1429 for (i = 0; i < num_pages; i++) {
1430again:
1431 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1432 mask | __GFP_WRITE);
1433 if (!pages[i]) {
1434 faili = i - 1;
1435 err = -ENOMEM;
1436 goto fail;
1437 }
1438
1439 if (i == 0)
1440 err = prepare_uptodate_page(inode, pages[i], pos,
1441 force_uptodate);
1442 if (!err && i == num_pages - 1)
1443 err = prepare_uptodate_page(inode, pages[i],
1444 pos + write_bytes, false);
1445 if (err) {
1446 put_page(pages[i]);
1447 if (err == -EAGAIN) {
1448 err = 0;
1449 goto again;
1450 }
1451 faili = i - 1;
1452 goto fail;
1453 }
1454 wait_on_page_writeback(pages[i]);
1455 }
1456
1457 return 0;
1458fail:
1459 while (faili >= 0) {
1460 unlock_page(pages[faili]);
1461 put_page(pages[faili]);
1462 faili--;
1463 }
1464 return err;
1465
1466}
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478static noinline int
1479lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1480 size_t num_pages, loff_t pos,
1481 size_t write_bytes,
1482 u64 *lockstart, u64 *lockend,
1483 struct extent_state **cached_state)
1484{
1485 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1486 u64 start_pos;
1487 u64 last_pos;
1488 int i;
1489 int ret = 0;
1490
1491 start_pos = round_down(pos, fs_info->sectorsize);
1492 last_pos = start_pos
1493 + round_up(pos + write_bytes - start_pos,
1494 fs_info->sectorsize) - 1;
1495
1496 if (start_pos < inode->vfs_inode.i_size) {
1497 struct btrfs_ordered_extent *ordered;
1498
1499 lock_extent_bits(&inode->io_tree, start_pos, last_pos,
1500 cached_state);
1501 ordered = btrfs_lookup_ordered_range(inode, start_pos,
1502 last_pos - start_pos + 1);
1503 if (ordered &&
1504 ordered->file_offset + ordered->len > start_pos &&
1505 ordered->file_offset <= last_pos) {
1506 unlock_extent_cached(&inode->io_tree, start_pos,
1507 last_pos, cached_state, GFP_NOFS);
1508 for (i = 0; i < num_pages; i++) {
1509 unlock_page(pages[i]);
1510 put_page(pages[i]);
1511 }
1512 btrfs_start_ordered_extent(&inode->vfs_inode,
1513 ordered, 1);
1514 btrfs_put_ordered_extent(ordered);
1515 return -EAGAIN;
1516 }
1517 if (ordered)
1518 btrfs_put_ordered_extent(ordered);
1519 clear_extent_bit(&inode->io_tree, start_pos, last_pos,
1520 EXTENT_DIRTY | EXTENT_DELALLOC |
1521 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1522 0, 0, cached_state, GFP_NOFS);
1523 *lockstart = start_pos;
1524 *lockend = last_pos;
1525 ret = 1;
1526 }
1527
1528 for (i = 0; i < num_pages; i++) {
1529 if (clear_page_dirty_for_io(pages[i]))
1530 account_page_redirty(pages[i]);
1531 set_page_extent_mapped(pages[i]);
1532 WARN_ON(!PageLocked(pages[i]));
1533 }
1534
1535 return ret;
1536}
1537
1538static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
1539 size_t *write_bytes)
1540{
1541 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1542 struct btrfs_root *root = inode->root;
1543 struct btrfs_ordered_extent *ordered;
1544 u64 lockstart, lockend;
1545 u64 num_bytes;
1546 int ret;
1547
1548 ret = btrfs_start_write_no_snapshotting(root);
1549 if (!ret)
1550 return -ENOSPC;
1551
1552 lockstart = round_down(pos, fs_info->sectorsize);
1553 lockend = round_up(pos + *write_bytes,
1554 fs_info->sectorsize) - 1;
1555
1556 while (1) {
1557 lock_extent(&inode->io_tree, lockstart, lockend);
1558 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1559 lockend - lockstart + 1);
1560 if (!ordered) {
1561 break;
1562 }
1563 unlock_extent(&inode->io_tree, lockstart, lockend);
1564 btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
1565 btrfs_put_ordered_extent(ordered);
1566 }
1567
1568 num_bytes = lockend - lockstart + 1;
1569 ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
1570 NULL, NULL, NULL);
1571 if (ret <= 0) {
1572 ret = 0;
1573 btrfs_end_write_no_snapshotting(root);
1574 } else {
1575 *write_bytes = min_t(size_t, *write_bytes ,
1576 num_bytes - pos + lockstart);
1577 }
1578
1579 unlock_extent(&inode->io_tree, lockstart, lockend);
1580
1581 return ret;
1582}
1583
1584static noinline ssize_t __btrfs_buffered_write(struct file *file,
1585 struct iov_iter *i,
1586 loff_t pos)
1587{
1588 struct inode *inode = file_inode(file);
1589 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1590 struct btrfs_root *root = BTRFS_I(inode)->root;
1591 struct page **pages = NULL;
1592 struct extent_state *cached_state = NULL;
1593 struct extent_changeset *data_reserved = NULL;
1594 u64 release_bytes = 0;
1595 u64 lockstart;
1596 u64 lockend;
1597 size_t num_written = 0;
1598 int nrptrs;
1599 int ret = 0;
1600 bool only_release_metadata = false;
1601 bool force_page_uptodate = false;
1602
1603 nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
1604 PAGE_SIZE / (sizeof(struct page *)));
1605 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1606 nrptrs = max(nrptrs, 8);
1607 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1608 if (!pages)
1609 return -ENOMEM;
1610
1611 while (iov_iter_count(i) > 0) {
1612 size_t offset = pos & (PAGE_SIZE - 1);
1613 size_t sector_offset;
1614 size_t write_bytes = min(iov_iter_count(i),
1615 nrptrs * (size_t)PAGE_SIZE -
1616 offset);
1617 size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
1618 PAGE_SIZE);
1619 size_t reserve_bytes;
1620 size_t dirty_pages;
1621 size_t copied;
1622 size_t dirty_sectors;
1623 size_t num_sectors;
1624 int extents_locked;
1625
1626 WARN_ON(num_pages > nrptrs);
1627
1628
1629
1630
1631
1632 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1633 ret = -EFAULT;
1634 break;
1635 }
1636
1637 sector_offset = pos & (fs_info->sectorsize - 1);
1638 reserve_bytes = round_up(write_bytes + sector_offset,
1639 fs_info->sectorsize);
1640
1641 extent_changeset_release(data_reserved);
1642 ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
1643 write_bytes);
1644 if (ret < 0) {
1645 if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1646 BTRFS_INODE_PREALLOC)) &&
1647 check_can_nocow(BTRFS_I(inode), pos,
1648 &write_bytes) > 0) {
1649
1650
1651
1652
1653 only_release_metadata = true;
1654
1655
1656
1657
1658 num_pages = DIV_ROUND_UP(write_bytes + offset,
1659 PAGE_SIZE);
1660 reserve_bytes = round_up(write_bytes +
1661 sector_offset,
1662 fs_info->sectorsize);
1663 } else {
1664 break;
1665 }
1666 }
1667
1668 WARN_ON(reserve_bytes == 0);
1669 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
1670 reserve_bytes);
1671 if (ret) {
1672 if (!only_release_metadata)
1673 btrfs_free_reserved_data_space(inode,
1674 data_reserved, pos,
1675 write_bytes);
1676 else
1677 btrfs_end_write_no_snapshotting(root);
1678 break;
1679 }
1680
1681 release_bytes = reserve_bytes;
1682again:
1683
1684
1685
1686
1687
1688 ret = prepare_pages(inode, pages, num_pages,
1689 pos, write_bytes,
1690 force_page_uptodate);
1691 if (ret) {
1692 btrfs_delalloc_release_extents(BTRFS_I(inode),
1693 reserve_bytes);
1694 break;
1695 }
1696
1697 extents_locked = lock_and_cleanup_extent_if_need(
1698 BTRFS_I(inode), pages,
1699 num_pages, pos, write_bytes, &lockstart,
1700 &lockend, &cached_state);
1701 if (extents_locked < 0) {
1702 if (extents_locked == -EAGAIN)
1703 goto again;
1704 btrfs_delalloc_release_extents(BTRFS_I(inode),
1705 reserve_bytes);
1706 ret = extents_locked;
1707 break;
1708 }
1709
1710 copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
1711
1712 num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
1713 dirty_sectors = round_up(copied + sector_offset,
1714 fs_info->sectorsize);
1715 dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
1716
1717
1718
1719
1720
1721 if (copied < write_bytes)
1722 nrptrs = 1;
1723
1724 if (copied == 0) {
1725 force_page_uptodate = true;
1726 dirty_sectors = 0;
1727 dirty_pages = 0;
1728 } else {
1729 force_page_uptodate = false;
1730 dirty_pages = DIV_ROUND_UP(copied + offset,
1731 PAGE_SIZE);
1732 }
1733
1734 if (num_sectors > dirty_sectors) {
1735
1736 release_bytes -= dirty_sectors <<
1737 fs_info->sb->s_blocksize_bits;
1738 if (only_release_metadata) {
1739 btrfs_delalloc_release_metadata(BTRFS_I(inode),
1740 release_bytes);
1741 } else {
1742 u64 __pos;
1743
1744 __pos = round_down(pos,
1745 fs_info->sectorsize) +
1746 (dirty_pages << PAGE_SHIFT);
1747 btrfs_delalloc_release_space(inode,
1748 data_reserved, __pos,
1749 release_bytes);
1750 }
1751 }
1752
1753 release_bytes = round_up(copied + sector_offset,
1754 fs_info->sectorsize);
1755
1756 if (copied > 0)
1757 ret = btrfs_dirty_pages(inode, pages, dirty_pages,
1758 pos, copied, NULL);
1759 if (extents_locked)
1760 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1761 lockstart, lockend, &cached_state,
1762 GFP_NOFS);
1763 btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
1764 if (ret) {
1765 btrfs_drop_pages(pages, num_pages);
1766 break;
1767 }
1768
1769 release_bytes = 0;
1770 if (only_release_metadata)
1771 btrfs_end_write_no_snapshotting(root);
1772
1773 if (only_release_metadata && copied > 0) {
1774 lockstart = round_down(pos,
1775 fs_info->sectorsize);
1776 lockend = round_up(pos + copied,
1777 fs_info->sectorsize) - 1;
1778
1779 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1780 lockend, EXTENT_NORESERVE, NULL,
1781 NULL, GFP_NOFS);
1782 only_release_metadata = false;
1783 }
1784
1785 btrfs_drop_pages(pages, num_pages);
1786
1787 cond_resched();
1788
1789 balance_dirty_pages_ratelimited(inode->i_mapping);
1790 if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
1791 btrfs_btree_balance_dirty(fs_info);
1792
1793 pos += copied;
1794 num_written += copied;
1795 }
1796
1797 kfree(pages);
1798
1799 if (release_bytes) {
1800 if (only_release_metadata) {
1801 btrfs_end_write_no_snapshotting(root);
1802 btrfs_delalloc_release_metadata(BTRFS_I(inode),
1803 release_bytes);
1804 } else {
1805 btrfs_delalloc_release_space(inode, data_reserved,
1806 round_down(pos, fs_info->sectorsize),
1807 release_bytes);
1808 }
1809 }
1810
1811 extent_changeset_free(data_reserved);
1812 return num_written ? num_written : ret;
1813}
1814
1815static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
1816{
1817 struct file *file = iocb->ki_filp;
1818 struct inode *inode = file_inode(file);
1819 loff_t pos = iocb->ki_pos;
1820 ssize_t written;
1821 ssize_t written_buffered;
1822 loff_t endbyte;
1823 int err;
1824
1825 written = generic_file_direct_write(iocb, from);
1826
1827 if (written < 0 || !iov_iter_count(from))
1828 return written;
1829
1830 pos += written;
1831 written_buffered = __btrfs_buffered_write(file, from, pos);
1832 if (written_buffered < 0) {
1833 err = written_buffered;
1834 goto out;
1835 }
1836
1837
1838
1839
1840 endbyte = pos + written_buffered - 1;
1841 err = btrfs_fdatawrite_range(inode, pos, endbyte);
1842 if (err)
1843 goto out;
1844 err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
1845 if (err)
1846 goto out;
1847 written += written_buffered;
1848 iocb->ki_pos = pos + written_buffered;
1849 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
1850 endbyte >> PAGE_SHIFT);
1851out:
1852 return written ? written : err;
1853}
1854
1855static void update_time_for_write(struct inode *inode)
1856{
1857 struct timespec now;
1858
1859 if (IS_NOCMTIME(inode))
1860 return;
1861
1862 now = current_time(inode);
1863 if (!timespec_equal(&inode->i_mtime, &now))
1864 inode->i_mtime = now;
1865
1866 if (!timespec_equal(&inode->i_ctime, &now))
1867 inode->i_ctime = now;
1868
1869 if (IS_I_VERSION(inode))
1870 inode_inc_iversion(inode);
1871}
1872
1873static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1874 struct iov_iter *from)
1875{
1876 struct file *file = iocb->ki_filp;
1877 struct inode *inode = file_inode(file);
1878 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1879 struct btrfs_root *root = BTRFS_I(inode)->root;
1880 u64 start_pos;
1881 u64 end_pos;
1882 ssize_t num_written = 0;
1883 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1884 ssize_t err;
1885 loff_t pos;
1886 size_t count = iov_iter_count(from);
1887 loff_t oldsize;
1888 int clean_page = 0;
1889
1890 if (!(iocb->ki_flags & IOCB_DIRECT) &&
1891 (iocb->ki_flags & IOCB_NOWAIT))
1892 return -EOPNOTSUPP;
1893
1894 if (!inode_trylock(inode)) {
1895 if (iocb->ki_flags & IOCB_NOWAIT)
1896 return -EAGAIN;
1897 inode_lock(inode);
1898 }
1899
1900 err = generic_write_checks(iocb, from);
1901 if (err <= 0) {
1902 inode_unlock(inode);
1903 return err;
1904 }
1905
1906 pos = iocb->ki_pos;
1907 if (iocb->ki_flags & IOCB_NOWAIT) {
1908
1909
1910
1911
1912 if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1913 BTRFS_INODE_PREALLOC)) ||
1914 check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
1915 inode_unlock(inode);
1916 return -EAGAIN;
1917 }
1918 }
1919
1920 current->backing_dev_info = inode_to_bdi(inode);
1921 err = file_remove_privs(file);
1922 if (err) {
1923 inode_unlock(inode);
1924 goto out;
1925 }
1926
1927
1928
1929
1930
1931
1932
1933 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1934 inode_unlock(inode);
1935 err = -EROFS;
1936 goto out;
1937 }
1938
1939
1940
1941
1942
1943
1944
1945 update_time_for_write(inode);
1946
1947 start_pos = round_down(pos, fs_info->sectorsize);
1948 oldsize = i_size_read(inode);
1949 if (start_pos > oldsize) {
1950
1951 end_pos = round_up(pos + count,
1952 fs_info->sectorsize);
1953 err = btrfs_cont_expand(inode, oldsize, end_pos);
1954 if (err) {
1955 inode_unlock(inode);
1956 goto out;
1957 }
1958 if (start_pos > round_up(oldsize, fs_info->sectorsize))
1959 clean_page = 1;
1960 }
1961
1962 if (sync)
1963 atomic_inc(&BTRFS_I(inode)->sync_writers);
1964
1965 if (iocb->ki_flags & IOCB_DIRECT) {
1966 num_written = __btrfs_direct_write(iocb, from);
1967 } else {
1968 num_written = __btrfs_buffered_write(file, from, pos);
1969 if (num_written > 0)
1970 iocb->ki_pos = pos + num_written;
1971 if (clean_page)
1972 pagecache_isize_extended(inode, oldsize,
1973 i_size_read(inode));
1974 }
1975
1976 inode_unlock(inode);
1977
1978
1979
1980
1981
1982
1983 spin_lock(&BTRFS_I(inode)->lock);
1984 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1985 spin_unlock(&BTRFS_I(inode)->lock);
1986 if (num_written > 0)
1987 num_written = generic_write_sync(iocb, num_written);
1988
1989 if (sync)
1990 atomic_dec(&BTRFS_I(inode)->sync_writers);
1991out:
1992 current->backing_dev_info = NULL;
1993 return num_written ? num_written : err;
1994}
1995
1996int btrfs_release_file(struct inode *inode, struct file *filp)
1997{
1998 struct btrfs_file_private *private = filp->private_data;
1999
2000 if (private && private->trans)
2001 btrfs_ioctl_trans_end(filp);
2002 if (private && private->filldir_buf)
2003 kfree(private->filldir_buf);
2004 kfree(private);
2005 filp->private_data = NULL;
2006
2007
2008
2009
2010
2011
2012
2013 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
2014 &BTRFS_I(inode)->runtime_flags))
2015 filemap_flush(inode->i_mapping);
2016 return 0;
2017}
2018
2019static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
2020{
2021 int ret;
2022
2023 atomic_inc(&BTRFS_I(inode)->sync_writers);
2024 ret = btrfs_fdatawrite_range(inode, start, end);
2025 atomic_dec(&BTRFS_I(inode)->sync_writers);
2026
2027 return ret;
2028}
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2042{
2043 struct dentry *dentry = file_dentry(file);
2044 struct inode *inode = d_inode(dentry);
2045 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2046 struct btrfs_root *root = BTRFS_I(inode)->root;
2047 struct btrfs_trans_handle *trans;
2048 struct btrfs_log_ctx ctx;
2049 int ret = 0, err;
2050 bool full_sync = false;
2051 u64 len;
2052
2053
2054
2055
2056
2057 len = (u64)end - (u64)start + 1;
2058 trace_btrfs_sync_file(file, datasync);
2059
2060 btrfs_init_log_ctx(&ctx, inode);
2061
2062
2063
2064
2065
2066
2067
2068 ret = start_ordered_ops(inode, start, end);
2069 if (ret)
2070 goto out;
2071
2072 inode_lock(inode);
2073 atomic_inc(&root->log_batch);
2074 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2075 &BTRFS_I(inode)->runtime_flags);
2076
2077
2078
2079
2080 if (full_sync) {
2081
2082
2083
2084
2085
2086
2087 ret = btrfs_wait_ordered_range(inode, start, len);
2088 } else {
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121 ret = start_ordered_ops(inode, start, end);
2122 }
2123 if (ret) {
2124 inode_unlock(inode);
2125 goto out;
2126 }
2127 atomic_inc(&root->log_batch);
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157 smp_mb();
2158 if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
2159 (full_sync && BTRFS_I(inode)->last_trans <=
2160 fs_info->last_trans_committed) ||
2161 (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
2162 BTRFS_I(inode)->last_trans
2163 <= fs_info->last_trans_committed)) {
2164
2165
2166
2167
2168
2169 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2170 &BTRFS_I(inode)->runtime_flags);
2171
2172
2173
2174
2175
2176
2177
2178 ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
2179 inode_unlock(inode);
2180 goto out;
2181 }
2182
2183
2184
2185
2186 if (file->private_data)
2187 btrfs_ioctl_trans_end(file);
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200 trans = btrfs_start_transaction(root, 0);
2201 if (IS_ERR(trans)) {
2202 ret = PTR_ERR(trans);
2203 inode_unlock(inode);
2204 goto out;
2205 }
2206 trans->sync = true;
2207
2208 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2209 if (ret < 0) {
2210
2211 ret = 1;
2212 }
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 inode_unlock(inode);
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239 if (ctx.io_err) {
2240 btrfs_end_transaction(trans);
2241 ret = ctx.io_err;
2242 goto out;
2243 }
2244
2245 if (ret != BTRFS_NO_LOG_SYNC) {
2246 if (!ret) {
2247 ret = btrfs_sync_log(trans, root, &ctx);
2248 if (!ret) {
2249 ret = btrfs_end_transaction(trans);
2250 goto out;
2251 }
2252 }
2253 if (!full_sync) {
2254 ret = btrfs_wait_ordered_range(inode, start, len);
2255 if (ret) {
2256 btrfs_end_transaction(trans);
2257 goto out;
2258 }
2259 }
2260 ret = btrfs_commit_transaction(trans);
2261 } else {
2262 ret = btrfs_end_transaction(trans);
2263 }
2264out:
2265 ASSERT(list_empty(&ctx.list));
2266 err = file_check_and_advance_wb_err(file);
2267 if (!ret)
2268 ret = err;
2269 return ret > 0 ? -EIO : ret;
2270}
2271
2272static const struct vm_operations_struct btrfs_file_vm_ops = {
2273 .fault = filemap_fault,
2274 .map_pages = filemap_map_pages,
2275 .page_mkwrite = btrfs_page_mkwrite,
2276};
2277
2278static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
2279{
2280 struct address_space *mapping = filp->f_mapping;
2281
2282 if (!mapping->a_ops->readpage)
2283 return -ENOEXEC;
2284
2285 file_accessed(filp);
2286 vma->vm_ops = &btrfs_file_vm_ops;
2287
2288 return 0;
2289}
2290
2291static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
2292 int slot, u64 start, u64 end)
2293{
2294 struct btrfs_file_extent_item *fi;
2295 struct btrfs_key key;
2296
2297 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
2298 return 0;
2299
2300 btrfs_item_key_to_cpu(leaf, &key, slot);
2301 if (key.objectid != btrfs_ino(inode) ||
2302 key.type != BTRFS_EXTENT_DATA_KEY)
2303 return 0;
2304
2305 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
2306
2307 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2308 return 0;
2309
2310 if (btrfs_file_extent_disk_bytenr(leaf, fi))
2311 return 0;
2312
2313 if (key.offset == end)
2314 return 1;
2315 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
2316 return 1;
2317 return 0;
2318}
2319
2320static int fill_holes(struct btrfs_trans_handle *trans,
2321 struct btrfs_inode *inode,
2322 struct btrfs_path *path, u64 offset, u64 end)
2323{
2324 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
2325 struct btrfs_root *root = inode->root;
2326 struct extent_buffer *leaf;
2327 struct btrfs_file_extent_item *fi;
2328 struct extent_map *hole_em;
2329 struct extent_map_tree *em_tree = &inode->extent_tree;
2330 struct btrfs_key key;
2331 int ret;
2332
2333 if (btrfs_fs_incompat(fs_info, NO_HOLES))
2334 goto out;
2335
2336 key.objectid = btrfs_ino(inode);
2337 key.type = BTRFS_EXTENT_DATA_KEY;
2338 key.offset = offset;
2339
2340 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2341 if (ret <= 0) {
2342
2343
2344
2345
2346 if (ret == 0)
2347 ret = -EINVAL;
2348 return ret;
2349 }
2350
2351 leaf = path->nodes[0];
2352 if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
2353 u64 num_bytes;
2354
2355 path->slots[0]--;
2356 fi = btrfs_item_ptr(leaf, path->slots[0],
2357 struct btrfs_file_extent_item);
2358 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
2359 end - offset;
2360 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2361 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2362 btrfs_set_file_extent_offset(leaf, fi, 0);
2363 btrfs_mark_buffer_dirty(leaf);
2364 goto out;
2365 }
2366
2367 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2368 u64 num_bytes;
2369
2370 key.offset = offset;
2371 btrfs_set_item_key_safe(fs_info, path, &key);
2372 fi = btrfs_item_ptr(leaf, path->slots[0],
2373 struct btrfs_file_extent_item);
2374 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
2375 offset;
2376 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2377 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2378 btrfs_set_file_extent_offset(leaf, fi, 0);
2379 btrfs_mark_buffer_dirty(leaf);
2380 goto out;
2381 }
2382 btrfs_release_path(path);
2383
2384 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
2385 offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
2386 if (ret)
2387 return ret;
2388
2389out:
2390 btrfs_release_path(path);
2391
2392 hole_em = alloc_extent_map();
2393 if (!hole_em) {
2394 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2395 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
2396 } else {
2397 hole_em->start = offset;
2398 hole_em->len = end - offset;
2399 hole_em->ram_bytes = hole_em->len;
2400 hole_em->orig_start = offset;
2401
2402 hole_em->block_start = EXTENT_MAP_HOLE;
2403 hole_em->block_len = 0;
2404 hole_em->orig_block_len = 0;
2405 hole_em->bdev = fs_info->fs_devices->latest_bdev;
2406 hole_em->compress_type = BTRFS_COMPRESS_NONE;
2407 hole_em->generation = trans->transid;
2408
2409 do {
2410 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2411 write_lock(&em_tree->lock);
2412 ret = add_extent_mapping(em_tree, hole_em, 1);
2413 write_unlock(&em_tree->lock);
2414 } while (ret == -EEXIST);
2415 free_extent_map(hole_em);
2416 if (ret)
2417 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2418 &inode->runtime_flags);
2419 }
2420
2421 return 0;
2422}
2423
2424
2425
2426
2427
2428
2429
2430static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2431{
2432 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2433 struct extent_map *em;
2434 int ret = 0;
2435
2436 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
2437 round_down(*start, fs_info->sectorsize),
2438 round_up(*len, fs_info->sectorsize), 0);
2439 if (IS_ERR(em))
2440 return PTR_ERR(em);
2441
2442
2443 if (em->block_start == EXTENT_MAP_HOLE) {
2444 ret = 1;
2445 *len = em->start + em->len > *start + *len ?
2446 0 : *start + *len - em->start - em->len;
2447 *start = em->start + em->len;
2448 }
2449 free_extent_map(em);
2450 return ret;
2451}
2452
2453static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2454{
2455 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2456 struct btrfs_root *root = BTRFS_I(inode)->root;
2457 struct extent_state *cached_state = NULL;
2458 struct btrfs_path *path;
2459 struct btrfs_block_rsv *rsv;
2460 struct btrfs_trans_handle *trans;
2461 u64 lockstart;
2462 u64 lockend;
2463 u64 tail_start;
2464 u64 tail_len;
2465 u64 orig_start = offset;
2466 u64 cur_offset;
2467 u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1);
2468 u64 drop_end;
2469 int ret = 0;
2470 int err = 0;
2471 unsigned int rsv_count;
2472 bool same_block;
2473 bool no_holes = btrfs_fs_incompat(fs_info, NO_HOLES);
2474 u64 ino_size;
2475 bool truncated_block = false;
2476 bool updated_inode = false;
2477
2478 ret = btrfs_wait_ordered_range(inode, offset, len);
2479 if (ret)
2480 return ret;
2481
2482 inode_lock(inode);
2483 ino_size = round_up(inode->i_size, fs_info->sectorsize);
2484 ret = find_first_non_hole(inode, &offset, &len);
2485 if (ret < 0)
2486 goto out_only_mutex;
2487 if (ret && !len) {
2488
2489 ret = 0;
2490 goto out_only_mutex;
2491 }
2492
2493 lockstart = round_up(offset, btrfs_inode_sectorsize(inode));
2494 lockend = round_down(offset + len,
2495 btrfs_inode_sectorsize(inode)) - 1;
2496 same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
2497 == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
2498
2499
2500
2501
2502
2503
2504
2505
2506 if (same_block && len < fs_info->sectorsize) {
2507 if (offset < ino_size) {
2508 truncated_block = true;
2509 ret = btrfs_truncate_block(inode, offset, len, 0);
2510 } else {
2511 ret = 0;
2512 }
2513 goto out_only_mutex;
2514 }
2515
2516
2517 if (offset < ino_size) {
2518 truncated_block = true;
2519 ret = btrfs_truncate_block(inode, offset, 0, 0);
2520 if (ret) {
2521 inode_unlock(inode);
2522 return ret;
2523 }
2524 }
2525
2526
2527
2528
2529
2530 if (offset == orig_start) {
2531
2532 len = offset + len - lockstart;
2533 offset = lockstart;
2534 ret = find_first_non_hole(inode, &offset, &len);
2535 if (ret < 0)
2536 goto out_only_mutex;
2537 if (ret && !len) {
2538 ret = 0;
2539 goto out_only_mutex;
2540 }
2541 lockstart = offset;
2542 }
2543
2544
2545 tail_start = lockend + 1;
2546 tail_len = offset + len - tail_start;
2547 if (tail_len) {
2548 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2549 if (unlikely(ret < 0))
2550 goto out_only_mutex;
2551 if (!ret) {
2552
2553 if (tail_start + tail_len < ino_size) {
2554 truncated_block = true;
2555 ret = btrfs_truncate_block(inode,
2556 tail_start + tail_len,
2557 0, 1);
2558 if (ret)
2559 goto out_only_mutex;
2560 }
2561 }
2562 }
2563
2564 if (lockend < lockstart) {
2565 ret = 0;
2566 goto out_only_mutex;
2567 }
2568
2569 while (1) {
2570 struct btrfs_ordered_extent *ordered;
2571
2572 truncate_pagecache_range(inode, lockstart, lockend);
2573
2574 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2575 &cached_state);
2576 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
2577
2578
2579
2580
2581
2582
2583 if ((!ordered ||
2584 (ordered->file_offset + ordered->len <= lockstart ||
2585 ordered->file_offset > lockend)) &&
2586 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2587 if (ordered)
2588 btrfs_put_ordered_extent(ordered);
2589 break;
2590 }
2591 if (ordered)
2592 btrfs_put_ordered_extent(ordered);
2593 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2594 lockend, &cached_state, GFP_NOFS);
2595 ret = btrfs_wait_ordered_range(inode, lockstart,
2596 lockend - lockstart + 1);
2597 if (ret) {
2598 inode_unlock(inode);
2599 return ret;
2600 }
2601 }
2602
2603 path = btrfs_alloc_path();
2604 if (!path) {
2605 ret = -ENOMEM;
2606 goto out;
2607 }
2608
2609 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
2610 if (!rsv) {
2611 ret = -ENOMEM;
2612 goto out_free;
2613 }
2614 rsv->size = btrfs_calc_trans_metadata_size(fs_info, 1);
2615 rsv->failfast = 1;
2616
2617
2618
2619
2620
2621
2622 rsv_count = no_holes ? 2 : 3;
2623 trans = btrfs_start_transaction(root, rsv_count);
2624 if (IS_ERR(trans)) {
2625 err = PTR_ERR(trans);
2626 goto out_free;
2627 }
2628
2629 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
2630 min_size, 0);
2631 BUG_ON(ret);
2632 trans->block_rsv = rsv;
2633
2634 cur_offset = lockstart;
2635 len = lockend - cur_offset;
2636 while (cur_offset < lockend) {
2637 ret = __btrfs_drop_extents(trans, root, inode, path,
2638 cur_offset, lockend + 1,
2639 &drop_end, 1, 0, 0, NULL);
2640 if (ret != -ENOSPC)
2641 break;
2642
2643 trans->block_rsv = &fs_info->trans_block_rsv;
2644
2645 if (cur_offset < drop_end && cur_offset < ino_size) {
2646 ret = fill_holes(trans, BTRFS_I(inode), path,
2647 cur_offset, drop_end);
2648 if (ret) {
2649
2650
2651
2652
2653
2654
2655 btrfs_abort_transaction(trans, ret);
2656 err = ret;
2657 break;
2658 }
2659 }
2660
2661 cur_offset = drop_end;
2662
2663 ret = btrfs_update_inode(trans, root, inode);
2664 if (ret) {
2665 err = ret;
2666 break;
2667 }
2668
2669 btrfs_end_transaction(trans);
2670 btrfs_btree_balance_dirty(fs_info);
2671
2672 trans = btrfs_start_transaction(root, rsv_count);
2673 if (IS_ERR(trans)) {
2674 ret = PTR_ERR(trans);
2675 trans = NULL;
2676 break;
2677 }
2678
2679 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
2680 rsv, min_size, 0);
2681 BUG_ON(ret);
2682 trans->block_rsv = rsv;
2683
2684 ret = find_first_non_hole(inode, &cur_offset, &len);
2685 if (unlikely(ret < 0))
2686 break;
2687 if (ret && !len) {
2688 ret = 0;
2689 break;
2690 }
2691 }
2692
2693 if (ret) {
2694 err = ret;
2695 goto out_trans;
2696 }
2697
2698 trans->block_rsv = &fs_info->trans_block_rsv;
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710 if (drop_end <= lockend)
2711 drop_end = lockend + 1;
2712
2713
2714
2715
2716
2717 if (cur_offset < ino_size && cur_offset < drop_end) {
2718 ret = fill_holes(trans, BTRFS_I(inode), path,
2719 cur_offset, drop_end);
2720 if (ret) {
2721
2722 btrfs_abort_transaction(trans, ret);
2723 err = ret;
2724 goto out_trans;
2725 }
2726 }
2727
2728out_trans:
2729 if (!trans)
2730 goto out_free;
2731
2732 inode_inc_iversion(inode);
2733 inode->i_mtime = inode->i_ctime = current_time(inode);
2734
2735 trans->block_rsv = &fs_info->trans_block_rsv;
2736 ret = btrfs_update_inode(trans, root, inode);
2737 updated_inode = true;
2738 btrfs_end_transaction(trans);
2739 btrfs_btree_balance_dirty(fs_info);
2740out_free:
2741 btrfs_free_path(path);
2742 btrfs_free_block_rsv(fs_info, rsv);
2743out:
2744 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2745 &cached_state, GFP_NOFS);
2746out_only_mutex:
2747 if (!updated_inode && truncated_block && !ret && !err) {
2748
2749
2750
2751
2752
2753
2754
2755 trans = btrfs_start_transaction(root, 1);
2756 if (IS_ERR(trans)) {
2757 err = PTR_ERR(trans);
2758 } else {
2759 err = btrfs_update_inode(trans, root, inode);
2760 ret = btrfs_end_transaction(trans);
2761 }
2762 }
2763 inode_unlock(inode);
2764 if (ret && !err)
2765 err = ret;
2766 return err;
2767}
2768
2769
2770struct falloc_range {
2771 struct list_head list;
2772 u64 start;
2773 u64 len;
2774};
2775
2776
2777
2778
2779
2780
2781
2782static int add_falloc_range(struct list_head *head, u64 start, u64 len)
2783{
2784 struct falloc_range *prev = NULL;
2785 struct falloc_range *range = NULL;
2786
2787 if (list_empty(head))
2788 goto insert;
2789
2790
2791
2792
2793
2794 prev = list_entry(head->prev, struct falloc_range, list);
2795 if (prev->start + prev->len == start) {
2796 prev->len += len;
2797 return 0;
2798 }
2799insert:
2800 range = kmalloc(sizeof(*range), GFP_KERNEL);
2801 if (!range)
2802 return -ENOMEM;
2803 range->start = start;
2804 range->len = len;
2805 list_add_tail(&range->list, head);
2806 return 0;
2807}
2808
2809static long btrfs_fallocate(struct file *file, int mode,
2810 loff_t offset, loff_t len)
2811{
2812 struct inode *inode = file_inode(file);
2813 struct extent_state *cached_state = NULL;
2814 struct extent_changeset *data_reserved = NULL;
2815 struct falloc_range *range;
2816 struct falloc_range *tmp;
2817 struct list_head reserve_list;
2818 u64 cur_offset;
2819 u64 last_byte;
2820 u64 alloc_start;
2821 u64 alloc_end;
2822 u64 alloc_hint = 0;
2823 u64 locked_end;
2824 u64 actual_end = 0;
2825 struct extent_map *em;
2826 int blocksize = btrfs_inode_sectorsize(inode);
2827 int ret;
2828
2829 alloc_start = round_down(offset, blocksize);
2830 alloc_end = round_up(offset + len, blocksize);
2831 cur_offset = alloc_start;
2832
2833
2834 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2835 return -EOPNOTSUPP;
2836
2837 if (mode & FALLOC_FL_PUNCH_HOLE)
2838 return btrfs_punch_hole(inode, offset, len);
2839
2840
2841
2842
2843
2844
2845 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
2846 alloc_end - alloc_start);
2847 if (ret < 0)
2848 return ret;
2849
2850 inode_lock(inode);
2851
2852 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
2853 ret = inode_newsize_ok(inode, offset + len);
2854 if (ret)
2855 goto out;
2856 }
2857
2858
2859
2860
2861
2862
2863
2864
2865 if (alloc_start > inode->i_size) {
2866 ret = btrfs_cont_expand(inode, i_size_read(inode),
2867 alloc_start);
2868 if (ret)
2869 goto out;
2870 } else if (offset + len > inode->i_size) {
2871
2872
2873
2874
2875
2876 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
2877 if (ret)
2878 goto out;
2879 }
2880
2881
2882
2883
2884
2885 ret = btrfs_wait_ordered_range(inode, alloc_start,
2886 alloc_end - alloc_start);
2887 if (ret)
2888 goto out;
2889
2890 locked_end = alloc_end - 1;
2891 while (1) {
2892 struct btrfs_ordered_extent *ordered;
2893
2894
2895
2896
2897 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
2898 locked_end, &cached_state);
2899 ordered = btrfs_lookup_first_ordered_extent(inode,
2900 alloc_end - 1);
2901 if (ordered &&
2902 ordered->file_offset + ordered->len > alloc_start &&
2903 ordered->file_offset < alloc_end) {
2904 btrfs_put_ordered_extent(ordered);
2905 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2906 alloc_start, locked_end,
2907 &cached_state, GFP_KERNEL);
2908
2909
2910
2911
2912 ret = btrfs_wait_ordered_range(inode, alloc_start,
2913 alloc_end - alloc_start);
2914 if (ret)
2915 goto out;
2916 } else {
2917 if (ordered)
2918 btrfs_put_ordered_extent(ordered);
2919 break;
2920 }
2921 }
2922
2923
2924 INIT_LIST_HEAD(&reserve_list);
2925 while (1) {
2926 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
2927 alloc_end - cur_offset, 0);
2928 if (IS_ERR(em)) {
2929 ret = PTR_ERR(em);
2930 break;
2931 }
2932 last_byte = min(extent_map_end(em), alloc_end);
2933 actual_end = min_t(u64, extent_map_end(em), offset + len);
2934 last_byte = ALIGN(last_byte, blocksize);
2935 if (em->block_start == EXTENT_MAP_HOLE ||
2936 (cur_offset >= inode->i_size &&
2937 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
2938 ret = add_falloc_range(&reserve_list, cur_offset,
2939 last_byte - cur_offset);
2940 if (ret < 0) {
2941 free_extent_map(em);
2942 break;
2943 }
2944 ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
2945 cur_offset, last_byte - cur_offset);
2946 if (ret < 0) {
2947 free_extent_map(em);
2948 break;
2949 }
2950 } else {
2951
2952
2953
2954
2955
2956 btrfs_free_reserved_data_space(inode, data_reserved,
2957 cur_offset, last_byte - cur_offset);
2958 }
2959 free_extent_map(em);
2960 cur_offset = last_byte;
2961 if (cur_offset >= alloc_end)
2962 break;
2963 }
2964
2965
2966
2967
2968
2969 list_for_each_entry_safe(range, tmp, &reserve_list, list) {
2970 if (!ret)
2971 ret = btrfs_prealloc_file_range(inode, mode,
2972 range->start,
2973 range->len, i_blocksize(inode),
2974 offset + len, &alloc_hint);
2975 else
2976 btrfs_free_reserved_data_space(inode,
2977 data_reserved, range->start,
2978 range->len);
2979 list_del(&range->list);
2980 kfree(range);
2981 }
2982 if (ret < 0)
2983 goto out_unlock;
2984
2985 if (actual_end > inode->i_size &&
2986 !(mode & FALLOC_FL_KEEP_SIZE)) {
2987 struct btrfs_trans_handle *trans;
2988 struct btrfs_root *root = BTRFS_I(inode)->root;
2989
2990
2991
2992
2993
2994
2995 trans = btrfs_start_transaction(root, 1);
2996 if (IS_ERR(trans)) {
2997 ret = PTR_ERR(trans);
2998 } else {
2999 inode->i_ctime = current_time(inode);
3000 i_size_write(inode, actual_end);
3001 btrfs_ordered_update_i_size(inode, actual_end, NULL);
3002 ret = btrfs_update_inode(trans, root, inode);
3003 if (ret)
3004 btrfs_end_transaction(trans);
3005 else
3006 ret = btrfs_end_transaction(trans);
3007 }
3008 }
3009out_unlock:
3010 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
3011 &cached_state, GFP_KERNEL);
3012out:
3013 inode_unlock(inode);
3014
3015 if (ret != 0)
3016 btrfs_free_reserved_data_space(inode, data_reserved,
3017 alloc_start, alloc_end - cur_offset);
3018 extent_changeset_free(data_reserved);
3019 return ret;
3020}
3021
3022static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
3023{
3024 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3025 struct extent_map *em = NULL;
3026 struct extent_state *cached_state = NULL;
3027 u64 lockstart;
3028 u64 lockend;
3029 u64 start;
3030 u64 len;
3031 int ret = 0;
3032
3033 if (inode->i_size == 0)
3034 return -ENXIO;
3035
3036
3037
3038
3039
3040 start = max_t(loff_t, 0, *offset);
3041
3042 lockstart = round_down(start, fs_info->sectorsize);
3043 lockend = round_up(i_size_read(inode),
3044 fs_info->sectorsize);
3045 if (lockend <= lockstart)
3046 lockend = lockstart + fs_info->sectorsize;
3047 lockend--;
3048 len = lockend - lockstart + 1;
3049
3050 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
3051 &cached_state);
3052
3053 while (start < inode->i_size) {
3054 em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
3055 start, len, 0);
3056 if (IS_ERR(em)) {
3057 ret = PTR_ERR(em);
3058 em = NULL;
3059 break;
3060 }
3061
3062 if (whence == SEEK_HOLE &&
3063 (em->block_start == EXTENT_MAP_HOLE ||
3064 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
3065 break;
3066 else if (whence == SEEK_DATA &&
3067 (em->block_start != EXTENT_MAP_HOLE &&
3068 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
3069 break;
3070
3071 start = em->start + em->len;
3072 free_extent_map(em);
3073 em = NULL;
3074 cond_resched();
3075 }
3076 free_extent_map(em);
3077 if (!ret) {
3078 if (whence == SEEK_DATA && start >= inode->i_size)
3079 ret = -ENXIO;
3080 else
3081 *offset = min_t(loff_t, start, inode->i_size);
3082 }
3083 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
3084 &cached_state, GFP_NOFS);
3085 return ret;
3086}
3087
3088static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
3089{
3090 struct inode *inode = file->f_mapping->host;
3091 int ret;
3092
3093 inode_lock(inode);
3094 switch (whence) {
3095 case SEEK_END:
3096 case SEEK_CUR:
3097 offset = generic_file_llseek(file, offset, whence);
3098 goto out;
3099 case SEEK_DATA:
3100 case SEEK_HOLE:
3101 if (offset >= i_size_read(inode)) {
3102 inode_unlock(inode);
3103 return -ENXIO;
3104 }
3105
3106 ret = find_desired_extent(inode, &offset, whence);
3107 if (ret) {
3108 inode_unlock(inode);
3109 return ret;
3110 }
3111 }
3112
3113 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
3114out:
3115 inode_unlock(inode);
3116 return offset;
3117}
3118
3119static int btrfs_file_open(struct inode *inode, struct file *filp)
3120{
3121 filp->f_mode |= FMODE_NOWAIT;
3122 return generic_file_open(inode, filp);
3123}
3124
3125const struct file_operations btrfs_file_operations = {
3126 .llseek = btrfs_file_llseek,
3127 .read_iter = generic_file_read_iter,
3128 .splice_read = generic_file_splice_read,
3129 .write_iter = btrfs_file_write_iter,
3130 .mmap = btrfs_file_mmap,
3131 .open = btrfs_file_open,
3132 .release = btrfs_release_file,
3133 .fsync = btrfs_sync_file,
3134 .fallocate = btrfs_fallocate,
3135 .unlocked_ioctl = btrfs_ioctl,
3136#ifdef CONFIG_COMPAT
3137 .compat_ioctl = btrfs_compat_ioctl,
3138#endif
3139 .clone_file_range = btrfs_clone_file_range,
3140 .dedupe_file_range = btrfs_dedupe_file_range,
3141};
3142
3143void btrfs_auto_defrag_exit(void)
3144{
3145 kmem_cache_destroy(btrfs_inode_defrag_cachep);
3146}
3147
3148int btrfs_auto_defrag_init(void)
3149{
3150 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
3151 sizeof(struct inode_defrag), 0,
3152 SLAB_MEM_SPREAD,
3153 NULL);
3154 if (!btrfs_inode_defrag_cachep)
3155 return -ENOMEM;
3156
3157 return 0;
3158}
3159
3160int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
3161{
3162 int ret;
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3179 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
3180 &BTRFS_I(inode)->runtime_flags))
3181 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3182
3183 return ret;
3184}
3185