1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19#include <linux/fs.h>
20#include <linux/pagemap.h>
21#include <linux/highmem.h>
22#include <linux/time.h>
23#include <linux/init.h>
24#include <linux/string.h>
25#include <linux/backing-dev.h>
26#include <linux/mpage.h>
27#include <linux/falloc.h>
28#include <linux/swap.h>
29#include <linux/writeback.h>
30#include <linux/compat.h>
31#include <linux/slab.h>
32#include <linux/btrfs.h>
33#include <linux/uio.h>
34#include "ctree.h"
35#include "disk-io.h"
36#include "transaction.h"
37#include "btrfs_inode.h"
38#include "print-tree.h"
39#include "tree-log.h"
40#include "locking.h"
41#include "volumes.h"
42#include "qgroup.h"
43#include "compression.h"
44
45static struct kmem_cache *btrfs_inode_defrag_cachep;
46
47
48
49
50
51struct inode_defrag {
52 struct rb_node rb_node;
53
54 u64 ino;
55
56
57
58
59 u64 transid;
60
61
62 u64 root;
63
64
65 u64 last_offset;
66
67
68 int cycled;
69};
70
71static int __compare_inode_defrag(struct inode_defrag *defrag1,
72 struct inode_defrag *defrag2)
73{
74 if (defrag1->root > defrag2->root)
75 return 1;
76 else if (defrag1->root < defrag2->root)
77 return -1;
78 else if (defrag1->ino > defrag2->ino)
79 return 1;
80 else if (defrag1->ino < defrag2->ino)
81 return -1;
82 else
83 return 0;
84}
85
86
87
88
89
90
91
92
93
94
95static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
96 struct inode_defrag *defrag)
97{
98 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
99 struct inode_defrag *entry;
100 struct rb_node **p;
101 struct rb_node *parent = NULL;
102 int ret;
103
104 p = &fs_info->defrag_inodes.rb_node;
105 while (*p) {
106 parent = *p;
107 entry = rb_entry(parent, struct inode_defrag, rb_node);
108
109 ret = __compare_inode_defrag(defrag, entry);
110 if (ret < 0)
111 p = &parent->rb_left;
112 else if (ret > 0)
113 p = &parent->rb_right;
114 else {
115
116
117
118
119 if (defrag->transid < entry->transid)
120 entry->transid = defrag->transid;
121 if (defrag->last_offset > entry->last_offset)
122 entry->last_offset = defrag->last_offset;
123 return -EEXIST;
124 }
125 }
126 set_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags);
127 rb_link_node(&defrag->rb_node, parent, p);
128 rb_insert_color(&defrag->rb_node, &fs_info->defrag_inodes);
129 return 0;
130}
131
132static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
133{
134 if (!btrfs_test_opt(fs_info, AUTO_DEFRAG))
135 return 0;
136
137 if (btrfs_fs_closing(fs_info))
138 return 0;
139
140 return 1;
141}
142
143
144
145
146
147int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
148 struct btrfs_inode *inode)
149{
150 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
151 struct btrfs_root *root = inode->root;
152 struct inode_defrag *defrag;
153 u64 transid;
154 int ret;
155
156 if (!__need_auto_defrag(fs_info))
157 return 0;
158
159 if (test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags))
160 return 0;
161
162 if (trans)
163 transid = trans->transid;
164 else
165 transid = inode->root->last_trans;
166
167 defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
168 if (!defrag)
169 return -ENOMEM;
170
171 defrag->ino = btrfs_ino(inode);
172 defrag->transid = transid;
173 defrag->root = root->root_key.objectid;
174
175 spin_lock(&fs_info->defrag_inodes_lock);
176 if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
177
178
179
180
181
182 ret = __btrfs_add_inode_defrag(inode, defrag);
183 if (ret)
184 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
185 } else {
186 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
187 }
188 spin_unlock(&fs_info->defrag_inodes_lock);
189 return 0;
190}
191
192
193
194
195
196
197static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
198 struct inode_defrag *defrag)
199{
200 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
201 int ret;
202
203 if (!__need_auto_defrag(fs_info))
204 goto out;
205
206
207
208
209
210 spin_lock(&fs_info->defrag_inodes_lock);
211 ret = __btrfs_add_inode_defrag(inode, defrag);
212 spin_unlock(&fs_info->defrag_inodes_lock);
213 if (ret)
214 goto out;
215 return;
216out:
217 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
218}
219
220
221
222
223
224static struct inode_defrag *
225btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino)
226{
227 struct inode_defrag *entry = NULL;
228 struct inode_defrag tmp;
229 struct rb_node *p;
230 struct rb_node *parent = NULL;
231 int ret;
232
233 tmp.ino = ino;
234 tmp.root = root;
235
236 spin_lock(&fs_info->defrag_inodes_lock);
237 p = fs_info->defrag_inodes.rb_node;
238 while (p) {
239 parent = p;
240 entry = rb_entry(parent, struct inode_defrag, rb_node);
241
242 ret = __compare_inode_defrag(&tmp, entry);
243 if (ret < 0)
244 p = parent->rb_left;
245 else if (ret > 0)
246 p = parent->rb_right;
247 else
248 goto out;
249 }
250
251 if (parent && __compare_inode_defrag(&tmp, entry) > 0) {
252 parent = rb_next(parent);
253 if (parent)
254 entry = rb_entry(parent, struct inode_defrag, rb_node);
255 else
256 entry = NULL;
257 }
258out:
259 if (entry)
260 rb_erase(parent, &fs_info->defrag_inodes);
261 spin_unlock(&fs_info->defrag_inodes_lock);
262 return entry;
263}
264
265void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
266{
267 struct inode_defrag *defrag;
268 struct rb_node *node;
269
270 spin_lock(&fs_info->defrag_inodes_lock);
271 node = rb_first(&fs_info->defrag_inodes);
272 while (node) {
273 rb_erase(node, &fs_info->defrag_inodes);
274 defrag = rb_entry(node, struct inode_defrag, rb_node);
275 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
276
277 cond_resched_lock(&fs_info->defrag_inodes_lock);
278
279 node = rb_first(&fs_info->defrag_inodes);
280 }
281 spin_unlock(&fs_info->defrag_inodes_lock);
282}
283
284#define BTRFS_DEFRAG_BATCH 1024
285
286static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
287 struct inode_defrag *defrag)
288{
289 struct btrfs_root *inode_root;
290 struct inode *inode;
291 struct btrfs_key key;
292 struct btrfs_ioctl_defrag_range_args range;
293 int num_defrag;
294 int index;
295 int ret;
296
297
298 key.objectid = defrag->root;
299 key.type = BTRFS_ROOT_ITEM_KEY;
300 key.offset = (u64)-1;
301
302 index = srcu_read_lock(&fs_info->subvol_srcu);
303
304 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
305 if (IS_ERR(inode_root)) {
306 ret = PTR_ERR(inode_root);
307 goto cleanup;
308 }
309
310 key.objectid = defrag->ino;
311 key.type = BTRFS_INODE_ITEM_KEY;
312 key.offset = 0;
313 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
314 if (IS_ERR(inode)) {
315 ret = PTR_ERR(inode);
316 goto cleanup;
317 }
318 srcu_read_unlock(&fs_info->subvol_srcu, index);
319
320
321 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
322 memset(&range, 0, sizeof(range));
323 range.len = (u64)-1;
324 range.start = defrag->last_offset;
325
326 sb_start_write(fs_info->sb);
327 num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
328 BTRFS_DEFRAG_BATCH);
329 sb_end_write(fs_info->sb);
330
331
332
333
334
335 if (num_defrag == BTRFS_DEFRAG_BATCH) {
336 defrag->last_offset = range.start;
337 btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
338 } else if (defrag->last_offset && !defrag->cycled) {
339
340
341
342
343
344 defrag->last_offset = 0;
345 defrag->cycled = 1;
346 btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
347 } else {
348 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
349 }
350
351 iput(inode);
352 return 0;
353cleanup:
354 srcu_read_unlock(&fs_info->subvol_srcu, index);
355 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
356 return ret;
357}
358
359
360
361
362
363int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
364{
365 struct inode_defrag *defrag;
366 u64 first_ino = 0;
367 u64 root_objectid = 0;
368
369 atomic_inc(&fs_info->defrag_running);
370 while (1) {
371
372 if (test_bit(BTRFS_FS_STATE_REMOUNTING,
373 &fs_info->fs_state))
374 break;
375
376 if (!__need_auto_defrag(fs_info))
377 break;
378
379
380 defrag = btrfs_pick_defrag_inode(fs_info, root_objectid,
381 first_ino);
382 if (!defrag) {
383 if (root_objectid || first_ino) {
384 root_objectid = 0;
385 first_ino = 0;
386 continue;
387 } else {
388 break;
389 }
390 }
391
392 first_ino = defrag->ino + 1;
393 root_objectid = defrag->root;
394
395 __btrfs_run_defrag_inode(fs_info, defrag);
396 }
397 atomic_dec(&fs_info->defrag_running);
398
399
400
401
402
403 wake_up(&fs_info->transaction_wait);
404 return 0;
405}
406
407
408
409
410static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
411 struct page **prepared_pages,
412 struct iov_iter *i)
413{
414 size_t copied = 0;
415 size_t total_copied = 0;
416 int pg = 0;
417 int offset = pos & (PAGE_SIZE - 1);
418
419 while (write_bytes > 0) {
420 size_t count = min_t(size_t,
421 PAGE_SIZE - offset, write_bytes);
422 struct page *page = prepared_pages[pg];
423
424
425
426 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
427
428
429 flush_dcache_page(page);
430
431
432
433
434
435
436
437
438
439
440 if (!PageUptodate(page) && copied < count)
441 copied = 0;
442
443 iov_iter_advance(i, copied);
444 write_bytes -= copied;
445 total_copied += copied;
446
447
448 if (unlikely(copied == 0))
449 break;
450
451 if (copied < PAGE_SIZE - offset) {
452 offset += copied;
453 } else {
454 pg++;
455 offset = 0;
456 }
457 }
458 return total_copied;
459}
460
461
462
463
464static void btrfs_drop_pages(struct page **pages, size_t num_pages)
465{
466 size_t i;
467 for (i = 0; i < num_pages; i++) {
468
469
470
471
472
473
474 ClearPageChecked(pages[i]);
475 unlock_page(pages[i]);
476 put_page(pages[i]);
477 }
478}
479
480
481
482
483
484
485
486
487
488int btrfs_dirty_pages(struct inode *inode, struct page **pages,
489 size_t num_pages, loff_t pos, size_t write_bytes,
490 struct extent_state **cached)
491{
492 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
493 int err = 0;
494 int i;
495 u64 num_bytes;
496 u64 start_pos;
497 u64 end_of_last_block;
498 u64 end_pos = pos + write_bytes;
499 loff_t isize = i_size_read(inode);
500
501 start_pos = pos & ~((u64) fs_info->sectorsize - 1);
502 num_bytes = round_up(write_bytes + pos - start_pos,
503 fs_info->sectorsize);
504
505 end_of_last_block = start_pos + num_bytes - 1;
506 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
507 cached, 0);
508 if (err)
509 return err;
510
511 for (i = 0; i < num_pages; i++) {
512 struct page *p = pages[i];
513 SetPageUptodate(p);
514 ClearPageChecked(p);
515 set_page_dirty(p);
516 }
517
518
519
520
521
522
523 if (end_pos > isize)
524 i_size_write(inode, end_pos);
525 return 0;
526}
527
528
529
530
531
532void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
533 int skip_pinned)
534{
535 struct extent_map *em;
536 struct extent_map *split = NULL;
537 struct extent_map *split2 = NULL;
538 struct extent_map_tree *em_tree = &inode->extent_tree;
539 u64 len = end - start + 1;
540 u64 gen;
541 int ret;
542 int testend = 1;
543 unsigned long flags;
544 int compressed = 0;
545 bool modified;
546
547 WARN_ON(end < start);
548 if (end == (u64)-1) {
549 len = (u64)-1;
550 testend = 0;
551 }
552 while (1) {
553 int no_splits = 0;
554
555 modified = false;
556 if (!split)
557 split = alloc_extent_map();
558 if (!split2)
559 split2 = alloc_extent_map();
560 if (!split || !split2)
561 no_splits = 1;
562
563 write_lock(&em_tree->lock);
564 em = lookup_extent_mapping(em_tree, start, len);
565 if (!em) {
566 write_unlock(&em_tree->lock);
567 break;
568 }
569 flags = em->flags;
570 gen = em->generation;
571 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
572 if (testend && em->start + em->len >= start + len) {
573 free_extent_map(em);
574 write_unlock(&em_tree->lock);
575 break;
576 }
577 start = em->start + em->len;
578 if (testend)
579 len = start + len - (em->start + em->len);
580 free_extent_map(em);
581 write_unlock(&em_tree->lock);
582 continue;
583 }
584 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
585 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
586 clear_bit(EXTENT_FLAG_LOGGING, &flags);
587 modified = !list_empty(&em->list);
588 if (no_splits)
589 goto next;
590
591 if (em->start < start) {
592 split->start = em->start;
593 split->len = start - em->start;
594
595 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
596 split->orig_start = em->orig_start;
597 split->block_start = em->block_start;
598
599 if (compressed)
600 split->block_len = em->block_len;
601 else
602 split->block_len = split->len;
603 split->orig_block_len = max(split->block_len,
604 em->orig_block_len);
605 split->ram_bytes = em->ram_bytes;
606 } else {
607 split->orig_start = split->start;
608 split->block_len = 0;
609 split->block_start = em->block_start;
610 split->orig_block_len = 0;
611 split->ram_bytes = split->len;
612 }
613
614 split->generation = gen;
615 split->bdev = em->bdev;
616 split->flags = flags;
617 split->compress_type = em->compress_type;
618 replace_extent_mapping(em_tree, em, split, modified);
619 free_extent_map(split);
620 split = split2;
621 split2 = NULL;
622 }
623 if (testend && em->start + em->len > start + len) {
624 u64 diff = start + len - em->start;
625
626 split->start = start + len;
627 split->len = em->start + em->len - (start + len);
628 split->bdev = em->bdev;
629 split->flags = flags;
630 split->compress_type = em->compress_type;
631 split->generation = gen;
632
633 if (em->block_start < EXTENT_MAP_LAST_BYTE) {
634 split->orig_block_len = max(em->block_len,
635 em->orig_block_len);
636
637 split->ram_bytes = em->ram_bytes;
638 if (compressed) {
639 split->block_len = em->block_len;
640 split->block_start = em->block_start;
641 split->orig_start = em->orig_start;
642 } else {
643 split->block_len = split->len;
644 split->block_start = em->block_start
645 + diff;
646 split->orig_start = em->orig_start;
647 }
648 } else {
649 split->ram_bytes = split->len;
650 split->orig_start = split->start;
651 split->block_len = 0;
652 split->block_start = em->block_start;
653 split->orig_block_len = 0;
654 }
655
656 if (extent_map_in_tree(em)) {
657 replace_extent_mapping(em_tree, em, split,
658 modified);
659 } else {
660 ret = add_extent_mapping(em_tree, split,
661 modified);
662 ASSERT(ret == 0);
663 }
664 free_extent_map(split);
665 split = NULL;
666 }
667next:
668 if (extent_map_in_tree(em))
669 remove_extent_mapping(em_tree, em);
670 write_unlock(&em_tree->lock);
671
672
673 free_extent_map(em);
674
675 free_extent_map(em);
676 }
677 if (split)
678 free_extent_map(split);
679 if (split2)
680 free_extent_map(split2);
681}
682
683
684
685
686
687
688
689
690
691
692int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
693 struct btrfs_root *root, struct inode *inode,
694 struct btrfs_path *path, u64 start, u64 end,
695 u64 *drop_end, int drop_cache,
696 int replace_extent,
697 u32 extent_item_size,
698 int *key_inserted)
699{
700 struct btrfs_fs_info *fs_info = root->fs_info;
701 struct extent_buffer *leaf;
702 struct btrfs_file_extent_item *fi;
703 struct btrfs_key key;
704 struct btrfs_key new_key;
705 u64 ino = btrfs_ino(BTRFS_I(inode));
706 u64 search_start = start;
707 u64 disk_bytenr = 0;
708 u64 num_bytes = 0;
709 u64 extent_offset = 0;
710 u64 extent_end = 0;
711 u64 last_end = start;
712 int del_nr = 0;
713 int del_slot = 0;
714 int extent_type;
715 int recow;
716 int ret;
717 int modify_tree = -1;
718 int update_refs;
719 int found = 0;
720 int leafs_visited = 0;
721
722 if (drop_cache)
723 btrfs_drop_extent_cache(BTRFS_I(inode), start, end - 1, 0);
724
725 if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
726 modify_tree = 0;
727
728 update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
729 root == fs_info->tree_root);
730 while (1) {
731 recow = 0;
732 ret = btrfs_lookup_file_extent(trans, root, path, ino,
733 search_start, modify_tree);
734 if (ret < 0)
735 break;
736 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
737 leaf = path->nodes[0];
738 btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
739 if (key.objectid == ino &&
740 key.type == BTRFS_EXTENT_DATA_KEY)
741 path->slots[0]--;
742 }
743 ret = 0;
744 leafs_visited++;
745next_slot:
746 leaf = path->nodes[0];
747 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
748 BUG_ON(del_nr > 0);
749 ret = btrfs_next_leaf(root, path);
750 if (ret < 0)
751 break;
752 if (ret > 0) {
753 ret = 0;
754 break;
755 }
756 leafs_visited++;
757 leaf = path->nodes[0];
758 recow = 1;
759 }
760
761 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
762
763 if (key.objectid > ino)
764 break;
765 if (WARN_ON_ONCE(key.objectid < ino) ||
766 key.type < BTRFS_EXTENT_DATA_KEY) {
767 ASSERT(del_nr == 0);
768 path->slots[0]++;
769 goto next_slot;
770 }
771 if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
772 break;
773
774 fi = btrfs_item_ptr(leaf, path->slots[0],
775 struct btrfs_file_extent_item);
776 extent_type = btrfs_file_extent_type(leaf, fi);
777
778 if (extent_type == BTRFS_FILE_EXTENT_REG ||
779 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
780 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
781 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
782 extent_offset = btrfs_file_extent_offset(leaf, fi);
783 extent_end = key.offset +
784 btrfs_file_extent_num_bytes(leaf, fi);
785 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
786 extent_end = key.offset +
787 btrfs_file_extent_inline_len(leaf,
788 path->slots[0], fi);
789 } else {
790
791 BUG();
792 }
793
794
795
796
797
798
799
800
801
802
803 if (extent_end == key.offset && extent_end >= search_start) {
804 last_end = extent_end;
805 goto delete_extent_item;
806 }
807
808 if (extent_end <= search_start) {
809 path->slots[0]++;
810 goto next_slot;
811 }
812
813 found = 1;
814 search_start = max(key.offset, start);
815 if (recow || !modify_tree) {
816 modify_tree = -1;
817 btrfs_release_path(path);
818 continue;
819 }
820
821
822
823
824
825 if (start > key.offset && end < extent_end) {
826 BUG_ON(del_nr > 0);
827 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
828 ret = -EOPNOTSUPP;
829 break;
830 }
831
832 memcpy(&new_key, &key, sizeof(new_key));
833 new_key.offset = start;
834 ret = btrfs_duplicate_item(trans, root, path,
835 &new_key);
836 if (ret == -EAGAIN) {
837 btrfs_release_path(path);
838 continue;
839 }
840 if (ret < 0)
841 break;
842
843 leaf = path->nodes[0];
844 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
845 struct btrfs_file_extent_item);
846 btrfs_set_file_extent_num_bytes(leaf, fi,
847 start - key.offset);
848
849 fi = btrfs_item_ptr(leaf, path->slots[0],
850 struct btrfs_file_extent_item);
851
852 extent_offset += start - key.offset;
853 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
854 btrfs_set_file_extent_num_bytes(leaf, fi,
855 extent_end - start);
856 btrfs_mark_buffer_dirty(leaf);
857
858 if (update_refs && disk_bytenr > 0) {
859 ret = btrfs_inc_extent_ref(trans, fs_info,
860 disk_bytenr, num_bytes, 0,
861 root->root_key.objectid,
862 new_key.objectid,
863 start - extent_offset);
864 BUG_ON(ret);
865 }
866 key.offset = start;
867 }
868
869
870
871
872 last_end = extent_end;
873
874
875
876
877
878 if (start <= key.offset && end < extent_end) {
879 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
880 ret = -EOPNOTSUPP;
881 break;
882 }
883
884 memcpy(&new_key, &key, sizeof(new_key));
885 new_key.offset = end;
886 btrfs_set_item_key_safe(fs_info, path, &new_key);
887
888 extent_offset += end - key.offset;
889 btrfs_set_file_extent_offset(leaf, fi, extent_offset);
890 btrfs_set_file_extent_num_bytes(leaf, fi,
891 extent_end - end);
892 btrfs_mark_buffer_dirty(leaf);
893 if (update_refs && disk_bytenr > 0)
894 inode_sub_bytes(inode, end - key.offset);
895 break;
896 }
897
898 search_start = extent_end;
899
900
901
902
903 if (start > key.offset && end >= extent_end) {
904 BUG_ON(del_nr > 0);
905 if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
906 ret = -EOPNOTSUPP;
907 break;
908 }
909
910 btrfs_set_file_extent_num_bytes(leaf, fi,
911 start - key.offset);
912 btrfs_mark_buffer_dirty(leaf);
913 if (update_refs && disk_bytenr > 0)
914 inode_sub_bytes(inode, extent_end - start);
915 if (end == extent_end)
916 break;
917
918 path->slots[0]++;
919 goto next_slot;
920 }
921
922
923
924
925
926 if (start <= key.offset && end >= extent_end) {
927delete_extent_item:
928 if (del_nr == 0) {
929 del_slot = path->slots[0];
930 del_nr = 1;
931 } else {
932 BUG_ON(del_slot + del_nr != path->slots[0]);
933 del_nr++;
934 }
935
936 if (update_refs &&
937 extent_type == BTRFS_FILE_EXTENT_INLINE) {
938 inode_sub_bytes(inode,
939 extent_end - key.offset);
940 extent_end = ALIGN(extent_end,
941 fs_info->sectorsize);
942 } else if (update_refs && disk_bytenr > 0) {
943 ret = btrfs_free_extent(trans, fs_info,
944 disk_bytenr, num_bytes, 0,
945 root->root_key.objectid,
946 key.objectid, key.offset -
947 extent_offset);
948 BUG_ON(ret);
949 inode_sub_bytes(inode,
950 extent_end - key.offset);
951 }
952
953 if (end == extent_end)
954 break;
955
956 if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
957 path->slots[0]++;
958 goto next_slot;
959 }
960
961 ret = btrfs_del_items(trans, root, path, del_slot,
962 del_nr);
963 if (ret) {
964 btrfs_abort_transaction(trans, ret);
965 break;
966 }
967
968 del_nr = 0;
969 del_slot = 0;
970
971 btrfs_release_path(path);
972 continue;
973 }
974
975 BUG_ON(1);
976 }
977
978 if (!ret && del_nr > 0) {
979
980
981
982
983
984
985 path->slots[0] = del_slot;
986 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
987 if (ret)
988 btrfs_abort_transaction(trans, ret);
989 }
990
991 leaf = path->nodes[0];
992
993
994
995
996
997 if (!ret && replace_extent && leafs_visited == 1 &&
998 (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
999 path->locks[0] == BTRFS_WRITE_LOCK) &&
1000 btrfs_leaf_free_space(fs_info, leaf) >=
1001 sizeof(struct btrfs_item) + extent_item_size) {
1002
1003 key.objectid = ino;
1004 key.type = BTRFS_EXTENT_DATA_KEY;
1005 key.offset = start;
1006 if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
1007 struct btrfs_key slot_key;
1008
1009 btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
1010 if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
1011 path->slots[0]++;
1012 }
1013 setup_items_for_insert(root, path, &key,
1014 &extent_item_size,
1015 extent_item_size,
1016 sizeof(struct btrfs_item) +
1017 extent_item_size, 1);
1018 *key_inserted = 1;
1019 }
1020
1021 if (!replace_extent || !(*key_inserted))
1022 btrfs_release_path(path);
1023 if (drop_end)
1024 *drop_end = found ? min(end, last_end) : end;
1025 return ret;
1026}
1027
1028int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1029 struct btrfs_root *root, struct inode *inode, u64 start,
1030 u64 end, int drop_cache)
1031{
1032 struct btrfs_path *path;
1033 int ret;
1034
1035 path = btrfs_alloc_path();
1036 if (!path)
1037 return -ENOMEM;
1038 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
1039 drop_cache, 0, 0, NULL);
1040 btrfs_free_path(path);
1041 return ret;
1042}
1043
1044static int extent_mergeable(struct extent_buffer *leaf, int slot,
1045 u64 objectid, u64 bytenr, u64 orig_offset,
1046 u64 *start, u64 *end)
1047{
1048 struct btrfs_file_extent_item *fi;
1049 struct btrfs_key key;
1050 u64 extent_end;
1051
1052 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
1053 return 0;
1054
1055 btrfs_item_key_to_cpu(leaf, &key, slot);
1056 if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
1057 return 0;
1058
1059 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
1060 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
1061 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
1062 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
1063 btrfs_file_extent_compression(leaf, fi) ||
1064 btrfs_file_extent_encryption(leaf, fi) ||
1065 btrfs_file_extent_other_encoding(leaf, fi))
1066 return 0;
1067
1068 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1069 if ((*start && *start != key.offset) || (*end && *end != extent_end))
1070 return 0;
1071
1072 *start = key.offset;
1073 *end = extent_end;
1074 return 1;
1075}
1076
1077
1078
1079
1080
1081
1082
1083
1084int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
1085 struct btrfs_inode *inode, u64 start, u64 end)
1086{
1087 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1088 struct btrfs_root *root = inode->root;
1089 struct extent_buffer *leaf;
1090 struct btrfs_path *path;
1091 struct btrfs_file_extent_item *fi;
1092 struct btrfs_key key;
1093 struct btrfs_key new_key;
1094 u64 bytenr;
1095 u64 num_bytes;
1096 u64 extent_end;
1097 u64 orig_offset;
1098 u64 other_start;
1099 u64 other_end;
1100 u64 split;
1101 int del_nr = 0;
1102 int del_slot = 0;
1103 int recow;
1104 int ret;
1105 u64 ino = btrfs_ino(inode);
1106
1107 path = btrfs_alloc_path();
1108 if (!path)
1109 return -ENOMEM;
1110again:
1111 recow = 0;
1112 split = start;
1113 key.objectid = ino;
1114 key.type = BTRFS_EXTENT_DATA_KEY;
1115 key.offset = split;
1116
1117 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1118 if (ret < 0)
1119 goto out;
1120 if (ret > 0 && path->slots[0] > 0)
1121 path->slots[0]--;
1122
1123 leaf = path->nodes[0];
1124 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1125 if (key.objectid != ino ||
1126 key.type != BTRFS_EXTENT_DATA_KEY) {
1127 ret = -EINVAL;
1128 btrfs_abort_transaction(trans, ret);
1129 goto out;
1130 }
1131 fi = btrfs_item_ptr(leaf, path->slots[0],
1132 struct btrfs_file_extent_item);
1133 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
1134 ret = -EINVAL;
1135 btrfs_abort_transaction(trans, ret);
1136 goto out;
1137 }
1138 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
1139 if (key.offset > start || extent_end < end) {
1140 ret = -EINVAL;
1141 btrfs_abort_transaction(trans, ret);
1142 goto out;
1143 }
1144
1145 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1146 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1147 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
1148 memcpy(&new_key, &key, sizeof(new_key));
1149
1150 if (start == key.offset && end < extent_end) {
1151 other_start = 0;
1152 other_end = start;
1153 if (extent_mergeable(leaf, path->slots[0] - 1,
1154 ino, bytenr, orig_offset,
1155 &other_start, &other_end)) {
1156 new_key.offset = end;
1157 btrfs_set_item_key_safe(fs_info, path, &new_key);
1158 fi = btrfs_item_ptr(leaf, path->slots[0],
1159 struct btrfs_file_extent_item);
1160 btrfs_set_file_extent_generation(leaf, fi,
1161 trans->transid);
1162 btrfs_set_file_extent_num_bytes(leaf, fi,
1163 extent_end - end);
1164 btrfs_set_file_extent_offset(leaf, fi,
1165 end - orig_offset);
1166 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1167 struct btrfs_file_extent_item);
1168 btrfs_set_file_extent_generation(leaf, fi,
1169 trans->transid);
1170 btrfs_set_file_extent_num_bytes(leaf, fi,
1171 end - other_start);
1172 btrfs_mark_buffer_dirty(leaf);
1173 goto out;
1174 }
1175 }
1176
1177 if (start > key.offset && end == extent_end) {
1178 other_start = end;
1179 other_end = 0;
1180 if (extent_mergeable(leaf, path->slots[0] + 1,
1181 ino, bytenr, orig_offset,
1182 &other_start, &other_end)) {
1183 fi = btrfs_item_ptr(leaf, path->slots[0],
1184 struct btrfs_file_extent_item);
1185 btrfs_set_file_extent_num_bytes(leaf, fi,
1186 start - key.offset);
1187 btrfs_set_file_extent_generation(leaf, fi,
1188 trans->transid);
1189 path->slots[0]++;
1190 new_key.offset = start;
1191 btrfs_set_item_key_safe(fs_info, path, &new_key);
1192
1193 fi = btrfs_item_ptr(leaf, path->slots[0],
1194 struct btrfs_file_extent_item);
1195 btrfs_set_file_extent_generation(leaf, fi,
1196 trans->transid);
1197 btrfs_set_file_extent_num_bytes(leaf, fi,
1198 other_end - start);
1199 btrfs_set_file_extent_offset(leaf, fi,
1200 start - orig_offset);
1201 btrfs_mark_buffer_dirty(leaf);
1202 goto out;
1203 }
1204 }
1205
1206 while (start > key.offset || end < extent_end) {
1207 if (key.offset == start)
1208 split = end;
1209
1210 new_key.offset = split;
1211 ret = btrfs_duplicate_item(trans, root, path, &new_key);
1212 if (ret == -EAGAIN) {
1213 btrfs_release_path(path);
1214 goto again;
1215 }
1216 if (ret < 0) {
1217 btrfs_abort_transaction(trans, ret);
1218 goto out;
1219 }
1220
1221 leaf = path->nodes[0];
1222 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
1223 struct btrfs_file_extent_item);
1224 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1225 btrfs_set_file_extent_num_bytes(leaf, fi,
1226 split - key.offset);
1227
1228 fi = btrfs_item_ptr(leaf, path->slots[0],
1229 struct btrfs_file_extent_item);
1230
1231 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1232 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
1233 btrfs_set_file_extent_num_bytes(leaf, fi,
1234 extent_end - split);
1235 btrfs_mark_buffer_dirty(leaf);
1236
1237 ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes,
1238 0, root->root_key.objectid,
1239 ino, orig_offset);
1240 if (ret) {
1241 btrfs_abort_transaction(trans, ret);
1242 goto out;
1243 }
1244
1245 if (split == start) {
1246 key.offset = start;
1247 } else {
1248 if (start != key.offset) {
1249 ret = -EINVAL;
1250 btrfs_abort_transaction(trans, ret);
1251 goto out;
1252 }
1253 path->slots[0]--;
1254 extent_end = end;
1255 }
1256 recow = 1;
1257 }
1258
1259 other_start = end;
1260 other_end = 0;
1261 if (extent_mergeable(leaf, path->slots[0] + 1,
1262 ino, bytenr, orig_offset,
1263 &other_start, &other_end)) {
1264 if (recow) {
1265 btrfs_release_path(path);
1266 goto again;
1267 }
1268 extent_end = other_end;
1269 del_slot = path->slots[0] + 1;
1270 del_nr++;
1271 ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
1272 0, root->root_key.objectid,
1273 ino, orig_offset);
1274 if (ret) {
1275 btrfs_abort_transaction(trans, ret);
1276 goto out;
1277 }
1278 }
1279 other_start = 0;
1280 other_end = start;
1281 if (extent_mergeable(leaf, path->slots[0] - 1,
1282 ino, bytenr, orig_offset,
1283 &other_start, &other_end)) {
1284 if (recow) {
1285 btrfs_release_path(path);
1286 goto again;
1287 }
1288 key.offset = other_start;
1289 del_slot = path->slots[0];
1290 del_nr++;
1291 ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes,
1292 0, root->root_key.objectid,
1293 ino, orig_offset);
1294 if (ret) {
1295 btrfs_abort_transaction(trans, ret);
1296 goto out;
1297 }
1298 }
1299 if (del_nr == 0) {
1300 fi = btrfs_item_ptr(leaf, path->slots[0],
1301 struct btrfs_file_extent_item);
1302 btrfs_set_file_extent_type(leaf, fi,
1303 BTRFS_FILE_EXTENT_REG);
1304 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1305 btrfs_mark_buffer_dirty(leaf);
1306 } else {
1307 fi = btrfs_item_ptr(leaf, del_slot - 1,
1308 struct btrfs_file_extent_item);
1309 btrfs_set_file_extent_type(leaf, fi,
1310 BTRFS_FILE_EXTENT_REG);
1311 btrfs_set_file_extent_generation(leaf, fi, trans->transid);
1312 btrfs_set_file_extent_num_bytes(leaf, fi,
1313 extent_end - key.offset);
1314 btrfs_mark_buffer_dirty(leaf);
1315
1316 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
1317 if (ret < 0) {
1318 btrfs_abort_transaction(trans, ret);
1319 goto out;
1320 }
1321 }
1322out:
1323 btrfs_free_path(path);
1324 return 0;
1325}
1326
1327
1328
1329
1330
1331static int prepare_uptodate_page(struct inode *inode,
1332 struct page *page, u64 pos,
1333 bool force_uptodate)
1334{
1335 int ret = 0;
1336
1337 if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
1338 !PageUptodate(page)) {
1339 ret = btrfs_readpage(NULL, page);
1340 if (ret)
1341 return ret;
1342 lock_page(page);
1343 if (!PageUptodate(page)) {
1344 unlock_page(page);
1345 return -EIO;
1346 }
1347 if (page->mapping != inode->i_mapping) {
1348 unlock_page(page);
1349 return -EAGAIN;
1350 }
1351 }
1352 return 0;
1353}
1354
1355
1356
1357
1358static noinline int prepare_pages(struct inode *inode, struct page **pages,
1359 size_t num_pages, loff_t pos,
1360 size_t write_bytes, bool force_uptodate)
1361{
1362 int i;
1363 unsigned long index = pos >> PAGE_SHIFT;
1364 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1365 int err = 0;
1366 int faili;
1367
1368 for (i = 0; i < num_pages; i++) {
1369again:
1370 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1371 mask | __GFP_WRITE);
1372 if (!pages[i]) {
1373 faili = i - 1;
1374 err = -ENOMEM;
1375 goto fail;
1376 }
1377
1378 if (i == 0)
1379 err = prepare_uptodate_page(inode, pages[i], pos,
1380 force_uptodate);
1381 if (!err && i == num_pages - 1)
1382 err = prepare_uptodate_page(inode, pages[i],
1383 pos + write_bytes, false);
1384 if (err) {
1385 put_page(pages[i]);
1386 if (err == -EAGAIN) {
1387 err = 0;
1388 goto again;
1389 }
1390 faili = i - 1;
1391 goto fail;
1392 }
1393 wait_on_page_writeback(pages[i]);
1394 }
1395
1396 return 0;
1397fail:
1398 while (faili >= 0) {
1399 unlock_page(pages[faili]);
1400 put_page(pages[faili]);
1401 faili--;
1402 }
1403 return err;
1404
1405}
1406
1407static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
1408 const u64 start,
1409 const u64 len,
1410 struct extent_state **cached_state)
1411{
1412 u64 search_start = start;
1413 const u64 end = start + len - 1;
1414
1415 while (search_start < end) {
1416 const u64 search_len = end - search_start + 1;
1417 struct extent_map *em;
1418 u64 em_len;
1419 int ret = 0;
1420
1421 em = btrfs_get_extent(inode, NULL, 0, search_start,
1422 search_len, 0);
1423 if (IS_ERR(em))
1424 return PTR_ERR(em);
1425
1426 if (em->block_start != EXTENT_MAP_HOLE)
1427 goto next;
1428
1429 em_len = em->len;
1430 if (em->start < search_start)
1431 em_len -= search_start - em->start;
1432 if (em_len > search_len)
1433 em_len = search_len;
1434
1435 ret = set_extent_bit(&inode->io_tree, search_start,
1436 search_start + em_len - 1,
1437 EXTENT_DELALLOC_NEW,
1438 NULL, cached_state, GFP_NOFS);
1439next:
1440 search_start = extent_map_end(em);
1441 free_extent_map(em);
1442 if (ret)
1443 return ret;
1444 }
1445 return 0;
1446}
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458static noinline int
1459lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
1460 size_t num_pages, loff_t pos,
1461 size_t write_bytes,
1462 u64 *lockstart, u64 *lockend,
1463 struct extent_state **cached_state)
1464{
1465 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1466 u64 start_pos;
1467 u64 last_pos;
1468 int i;
1469 int ret = 0;
1470
1471 start_pos = round_down(pos, fs_info->sectorsize);
1472 last_pos = start_pos
1473 + round_up(pos + write_bytes - start_pos,
1474 fs_info->sectorsize) - 1;
1475
1476 if (start_pos < inode->vfs_inode.i_size ||
1477 (inode->flags & BTRFS_INODE_PREALLOC)) {
1478 struct btrfs_ordered_extent *ordered;
1479 unsigned int clear_bits;
1480
1481 lock_extent_bits(&inode->io_tree, start_pos, last_pos,
1482 cached_state);
1483 ordered = btrfs_lookup_ordered_range(inode, start_pos,
1484 last_pos - start_pos + 1);
1485 if (ordered &&
1486 ordered->file_offset + ordered->len > start_pos &&
1487 ordered->file_offset <= last_pos) {
1488 unlock_extent_cached(&inode->io_tree, start_pos,
1489 last_pos, cached_state, GFP_NOFS);
1490 for (i = 0; i < num_pages; i++) {
1491 unlock_page(pages[i]);
1492 put_page(pages[i]);
1493 }
1494 btrfs_start_ordered_extent(&inode->vfs_inode,
1495 ordered, 1);
1496 btrfs_put_ordered_extent(ordered);
1497 return -EAGAIN;
1498 }
1499 if (ordered)
1500 btrfs_put_ordered_extent(ordered);
1501 ret = btrfs_find_new_delalloc_bytes(inode, start_pos,
1502 last_pos - start_pos + 1,
1503 cached_state);
1504 clear_bits = EXTENT_DIRTY | EXTENT_DELALLOC |
1505 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG;
1506 if (ret)
1507 clear_bits |= EXTENT_DELALLOC_NEW | EXTENT_LOCKED;
1508 clear_extent_bit(&inode->io_tree, start_pos,
1509 last_pos, clear_bits,
1510 (clear_bits & EXTENT_LOCKED) ? 1 : 0,
1511 0, cached_state, GFP_NOFS);
1512 if (ret)
1513 return ret;
1514 *lockstart = start_pos;
1515 *lockend = last_pos;
1516 ret = 1;
1517 }
1518
1519 for (i = 0; i < num_pages; i++) {
1520 if (clear_page_dirty_for_io(pages[i]))
1521 account_page_redirty(pages[i]);
1522 set_page_extent_mapped(pages[i]);
1523 WARN_ON(!PageLocked(pages[i]));
1524 }
1525
1526 return ret;
1527}
1528
1529static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
1530 size_t *write_bytes)
1531{
1532 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
1533 struct btrfs_root *root = inode->root;
1534 struct btrfs_ordered_extent *ordered;
1535 u64 lockstart, lockend;
1536 u64 num_bytes;
1537 int ret;
1538
1539 ret = btrfs_start_write_no_snapshoting(root);
1540 if (!ret)
1541 return -ENOSPC;
1542
1543 lockstart = round_down(pos, fs_info->sectorsize);
1544 lockend = round_up(pos + *write_bytes,
1545 fs_info->sectorsize) - 1;
1546
1547 while (1) {
1548 lock_extent(&inode->io_tree, lockstart, lockend);
1549 ordered = btrfs_lookup_ordered_range(inode, lockstart,
1550 lockend - lockstart + 1);
1551 if (!ordered) {
1552 break;
1553 }
1554 unlock_extent(&inode->io_tree, lockstart, lockend);
1555 btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
1556 btrfs_put_ordered_extent(ordered);
1557 }
1558
1559 num_bytes = lockend - lockstart + 1;
1560 ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
1561 NULL, NULL, NULL);
1562 if (ret <= 0) {
1563 ret = 0;
1564 btrfs_end_write_no_snapshoting(root);
1565 } else {
1566 *write_bytes = min_t(size_t, *write_bytes ,
1567 num_bytes - pos + lockstart);
1568 }
1569
1570 unlock_extent(&inode->io_tree, lockstart, lockend);
1571
1572 return ret;
1573}
1574
1575static noinline ssize_t __btrfs_buffered_write(struct file *file,
1576 struct iov_iter *i,
1577 loff_t pos)
1578{
1579 struct inode *inode = file_inode(file);
1580 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1581 struct btrfs_root *root = BTRFS_I(inode)->root;
1582 struct page **pages = NULL;
1583 struct extent_state *cached_state = NULL;
1584 struct extent_changeset *data_reserved = NULL;
1585 u64 release_bytes = 0;
1586 u64 lockstart;
1587 u64 lockend;
1588 size_t num_written = 0;
1589 int nrptrs;
1590 int ret = 0;
1591 bool only_release_metadata = false;
1592 bool force_page_uptodate = false;
1593 bool need_unlock;
1594
1595 nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
1596 PAGE_SIZE / (sizeof(struct page *)));
1597 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1598 nrptrs = max(nrptrs, 8);
1599 pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
1600 if (!pages)
1601 return -ENOMEM;
1602
1603 while (iov_iter_count(i) > 0) {
1604 size_t offset = pos & (PAGE_SIZE - 1);
1605 size_t sector_offset;
1606 size_t write_bytes = min(iov_iter_count(i),
1607 nrptrs * (size_t)PAGE_SIZE -
1608 offset);
1609 size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
1610 PAGE_SIZE);
1611 size_t reserve_bytes;
1612 size_t dirty_pages;
1613 size_t copied;
1614 size_t dirty_sectors;
1615 size_t num_sectors;
1616
1617 WARN_ON(num_pages > nrptrs);
1618
1619
1620
1621
1622
1623 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
1624 ret = -EFAULT;
1625 break;
1626 }
1627
1628 sector_offset = pos & (fs_info->sectorsize - 1);
1629 reserve_bytes = round_up(write_bytes + sector_offset,
1630 fs_info->sectorsize);
1631
1632 extent_changeset_release(data_reserved);
1633 ret = btrfs_check_data_free_space(inode, &data_reserved, pos,
1634 write_bytes);
1635 if (ret < 0) {
1636 if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1637 BTRFS_INODE_PREALLOC)) &&
1638 check_can_nocow(BTRFS_I(inode), pos,
1639 &write_bytes) > 0) {
1640
1641
1642
1643
1644 only_release_metadata = true;
1645
1646
1647
1648
1649 num_pages = DIV_ROUND_UP(write_bytes + offset,
1650 PAGE_SIZE);
1651 reserve_bytes = round_up(write_bytes +
1652 sector_offset,
1653 fs_info->sectorsize);
1654 } else {
1655 break;
1656 }
1657 }
1658
1659 ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
1660 reserve_bytes);
1661 if (ret) {
1662 if (!only_release_metadata)
1663 btrfs_free_reserved_data_space(inode,
1664 data_reserved, pos,
1665 write_bytes);
1666 else
1667 btrfs_end_write_no_snapshoting(root);
1668 break;
1669 }
1670
1671 release_bytes = reserve_bytes;
1672 need_unlock = false;
1673again:
1674
1675
1676
1677
1678
1679 ret = prepare_pages(inode, pages, num_pages,
1680 pos, write_bytes,
1681 force_page_uptodate);
1682 if (ret)
1683 break;
1684
1685 ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages,
1686 num_pages, pos, write_bytes, &lockstart,
1687 &lockend, &cached_state);
1688 if (ret < 0) {
1689 if (ret == -EAGAIN)
1690 goto again;
1691 break;
1692 } else if (ret > 0) {
1693 need_unlock = true;
1694 ret = 0;
1695 }
1696
1697 copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
1698
1699 num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
1700 dirty_sectors = round_up(copied + sector_offset,
1701 fs_info->sectorsize);
1702 dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
1703
1704
1705
1706
1707
1708 if (copied < write_bytes)
1709 nrptrs = 1;
1710
1711 if (copied == 0) {
1712 force_page_uptodate = true;
1713 dirty_sectors = 0;
1714 dirty_pages = 0;
1715 } else {
1716 force_page_uptodate = false;
1717 dirty_pages = DIV_ROUND_UP(copied + offset,
1718 PAGE_SIZE);
1719 }
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729 if (num_sectors > dirty_sectors) {
1730
1731 release_bytes -= dirty_sectors <<
1732 fs_info->sb->s_blocksize_bits;
1733 if (copied > 0) {
1734 spin_lock(&BTRFS_I(inode)->lock);
1735 BTRFS_I(inode)->outstanding_extents++;
1736 spin_unlock(&BTRFS_I(inode)->lock);
1737 }
1738 if (only_release_metadata) {
1739 btrfs_delalloc_release_metadata(BTRFS_I(inode),
1740 release_bytes);
1741 } else {
1742 u64 __pos;
1743
1744 __pos = round_down(pos,
1745 fs_info->sectorsize) +
1746 (dirty_pages << PAGE_SHIFT);
1747 btrfs_delalloc_release_space(inode,
1748 data_reserved, __pos,
1749 release_bytes);
1750 }
1751 }
1752
1753 release_bytes = round_up(copied + sector_offset,
1754 fs_info->sectorsize);
1755
1756 if (copied > 0)
1757 ret = btrfs_dirty_pages(inode, pages, dirty_pages,
1758 pos, copied, NULL);
1759 if (need_unlock)
1760 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1761 lockstart, lockend, &cached_state,
1762 GFP_NOFS);
1763 if (ret) {
1764 btrfs_drop_pages(pages, num_pages);
1765 break;
1766 }
1767
1768 release_bytes = 0;
1769 if (only_release_metadata)
1770 btrfs_end_write_no_snapshoting(root);
1771
1772 if (only_release_metadata && copied > 0) {
1773 lockstart = round_down(pos,
1774 fs_info->sectorsize);
1775 lockend = round_up(pos + copied,
1776 fs_info->sectorsize) - 1;
1777
1778 set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
1779 lockend, EXTENT_NORESERVE, NULL,
1780 NULL, GFP_NOFS);
1781 only_release_metadata = false;
1782 }
1783
1784 btrfs_drop_pages(pages, num_pages);
1785
1786 cond_resched();
1787
1788 balance_dirty_pages_ratelimited(inode->i_mapping);
1789 if (dirty_pages < (fs_info->nodesize >> PAGE_SHIFT) + 1)
1790 btrfs_btree_balance_dirty(fs_info);
1791
1792 pos += copied;
1793 num_written += copied;
1794 }
1795
1796 kfree(pages);
1797
1798 if (release_bytes) {
1799 if (only_release_metadata) {
1800 btrfs_end_write_no_snapshoting(root);
1801 btrfs_delalloc_release_metadata(BTRFS_I(inode),
1802 release_bytes);
1803 } else {
1804 btrfs_delalloc_release_space(inode, data_reserved,
1805 round_down(pos, fs_info->sectorsize),
1806 release_bytes);
1807 }
1808 }
1809
1810 extent_changeset_free(data_reserved);
1811 return num_written ? num_written : ret;
1812}
1813
1814static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
1815{
1816 struct file *file = iocb->ki_filp;
1817 struct inode *inode = file_inode(file);
1818 loff_t pos = iocb->ki_pos;
1819 ssize_t written;
1820 ssize_t written_buffered;
1821 loff_t endbyte;
1822 int err;
1823
1824 written = generic_file_direct_write(iocb, from);
1825
1826 if (written < 0 || !iov_iter_count(from))
1827 return written;
1828
1829 pos += written;
1830 written_buffered = __btrfs_buffered_write(file, from, pos);
1831 if (written_buffered < 0) {
1832 err = written_buffered;
1833 goto out;
1834 }
1835
1836
1837
1838
1839 endbyte = pos + written_buffered - 1;
1840 err = btrfs_fdatawrite_range(inode, pos, endbyte);
1841 if (err)
1842 goto out;
1843 err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
1844 if (err)
1845 goto out;
1846 written += written_buffered;
1847 iocb->ki_pos = pos + written_buffered;
1848 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
1849 endbyte >> PAGE_SHIFT);
1850out:
1851 return written ? written : err;
1852}
1853
1854static void update_time_for_write(struct inode *inode)
1855{
1856 struct timespec now;
1857
1858 if (IS_NOCMTIME(inode))
1859 return;
1860
1861 now = current_time(inode);
1862 if (!timespec_equal(&inode->i_mtime, &now))
1863 inode->i_mtime = now;
1864
1865 if (!timespec_equal(&inode->i_ctime, &now))
1866 inode->i_ctime = now;
1867
1868 if (IS_I_VERSION(inode))
1869 inode_inc_iversion(inode);
1870}
1871
1872static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1873 struct iov_iter *from)
1874{
1875 struct file *file = iocb->ki_filp;
1876 struct inode *inode = file_inode(file);
1877 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
1878 struct btrfs_root *root = BTRFS_I(inode)->root;
1879 u64 start_pos;
1880 u64 end_pos;
1881 ssize_t num_written = 0;
1882 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1883 ssize_t err;
1884 loff_t pos;
1885 size_t count = iov_iter_count(from);
1886 loff_t oldsize;
1887 int clean_page = 0;
1888
1889 if (!inode_trylock(inode)) {
1890 if (iocb->ki_flags & IOCB_NOWAIT)
1891 return -EAGAIN;
1892 inode_lock(inode);
1893 }
1894
1895 err = generic_write_checks(iocb, from);
1896 if (err <= 0) {
1897 inode_unlock(inode);
1898 return err;
1899 }
1900
1901 pos = iocb->ki_pos;
1902 if (iocb->ki_flags & IOCB_NOWAIT) {
1903
1904
1905
1906
1907 if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
1908 BTRFS_INODE_PREALLOC)) ||
1909 check_can_nocow(BTRFS_I(inode), pos, &count) <= 0) {
1910 inode_unlock(inode);
1911 return -EAGAIN;
1912 }
1913 }
1914
1915 current->backing_dev_info = inode_to_bdi(inode);
1916 err = file_remove_privs(file);
1917 if (err) {
1918 inode_unlock(inode);
1919 goto out;
1920 }
1921
1922
1923
1924
1925
1926
1927
1928 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
1929 inode_unlock(inode);
1930 err = -EROFS;
1931 goto out;
1932 }
1933
1934
1935
1936
1937
1938
1939
1940 update_time_for_write(inode);
1941
1942 start_pos = round_down(pos, fs_info->sectorsize);
1943 oldsize = i_size_read(inode);
1944 if (start_pos > oldsize) {
1945
1946 end_pos = round_up(pos + count,
1947 fs_info->sectorsize);
1948 err = btrfs_cont_expand(inode, oldsize, end_pos);
1949 if (err) {
1950 inode_unlock(inode);
1951 goto out;
1952 }
1953 if (start_pos > round_up(oldsize, fs_info->sectorsize))
1954 clean_page = 1;
1955 }
1956
1957 if (sync)
1958 atomic_inc(&BTRFS_I(inode)->sync_writers);
1959
1960 if (iocb->ki_flags & IOCB_DIRECT) {
1961 num_written = __btrfs_direct_write(iocb, from);
1962 } else {
1963 num_written = __btrfs_buffered_write(file, from, pos);
1964 if (num_written > 0)
1965 iocb->ki_pos = pos + num_written;
1966 if (clean_page)
1967 pagecache_isize_extended(inode, oldsize,
1968 i_size_read(inode));
1969 }
1970
1971 inode_unlock(inode);
1972
1973
1974
1975
1976
1977
1978 spin_lock(&BTRFS_I(inode)->lock);
1979 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1980 spin_unlock(&BTRFS_I(inode)->lock);
1981 if (num_written > 0)
1982 num_written = generic_write_sync(iocb, num_written);
1983
1984 if (sync)
1985 atomic_dec(&BTRFS_I(inode)->sync_writers);
1986out:
1987 current->backing_dev_info = NULL;
1988 return num_written ? num_written : err;
1989}
1990
1991int btrfs_release_file(struct inode *inode, struct file *filp)
1992{
1993 if (filp->private_data)
1994 btrfs_ioctl_trans_end(filp);
1995
1996
1997
1998
1999
2000
2001 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
2002 &BTRFS_I(inode)->runtime_flags))
2003 filemap_flush(inode->i_mapping);
2004 return 0;
2005}
2006
2007static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
2008{
2009 int ret;
2010
2011 atomic_inc(&BTRFS_I(inode)->sync_writers);
2012 ret = btrfs_fdatawrite_range(inode, start, end);
2013 atomic_dec(&BTRFS_I(inode)->sync_writers);
2014
2015 return ret;
2016}
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
2030{
2031 struct dentry *dentry = file_dentry(file);
2032 struct inode *inode = d_inode(dentry);
2033 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2034 struct btrfs_root *root = BTRFS_I(inode)->root;
2035 struct btrfs_trans_handle *trans;
2036 struct btrfs_log_ctx ctx;
2037 int ret = 0, err;
2038 bool full_sync = 0;
2039 u64 len;
2040
2041
2042
2043
2044
2045 len = (u64)end - (u64)start + 1;
2046 trace_btrfs_sync_file(file, datasync);
2047
2048
2049
2050
2051
2052
2053
2054 ret = start_ordered_ops(inode, start, end);
2055 if (ret)
2056 goto out;
2057
2058 inode_lock(inode);
2059 atomic_inc(&root->log_batch);
2060 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2061 &BTRFS_I(inode)->runtime_flags);
2062
2063
2064
2065
2066 if (full_sync) {
2067
2068
2069
2070
2071
2072
2073 ret = btrfs_wait_ordered_range(inode, start, len);
2074 } else {
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107 ret = start_ordered_ops(inode, start, end);
2108 }
2109 if (ret) {
2110 inode_unlock(inode);
2111 goto out;
2112 }
2113 atomic_inc(&root->log_batch);
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143 smp_mb();
2144 if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
2145 (full_sync && BTRFS_I(inode)->last_trans <=
2146 fs_info->last_trans_committed) ||
2147 (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
2148 BTRFS_I(inode)->last_trans
2149 <= fs_info->last_trans_committed)) {
2150
2151
2152
2153
2154
2155 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2156 &BTRFS_I(inode)->runtime_flags);
2157
2158
2159
2160
2161
2162
2163
2164 ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
2165 inode_unlock(inode);
2166 goto out;
2167 }
2168
2169
2170
2171
2172 if (file->private_data)
2173 btrfs_ioctl_trans_end(file);
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186 trans = btrfs_start_transaction(root, 0);
2187 if (IS_ERR(trans)) {
2188 ret = PTR_ERR(trans);
2189 inode_unlock(inode);
2190 goto out;
2191 }
2192 trans->sync = true;
2193
2194 btrfs_init_log_ctx(&ctx, inode);
2195
2196 ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
2197 if (ret < 0) {
2198
2199 ret = 1;
2200 }
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212 inode_unlock(inode);
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227 if (ctx.io_err) {
2228 btrfs_end_transaction(trans);
2229 ret = ctx.io_err;
2230 goto out;
2231 }
2232
2233 if (ret != BTRFS_NO_LOG_SYNC) {
2234 if (!ret) {
2235 ret = btrfs_sync_log(trans, root, &ctx);
2236 if (!ret) {
2237 ret = btrfs_end_transaction(trans);
2238 goto out;
2239 }
2240 }
2241 if (!full_sync) {
2242 ret = btrfs_wait_ordered_range(inode, start, len);
2243 if (ret) {
2244 btrfs_end_transaction(trans);
2245 goto out;
2246 }
2247 }
2248 ret = btrfs_commit_transaction(trans);
2249 } else {
2250 ret = btrfs_end_transaction(trans);
2251 }
2252out:
2253 err = file_check_and_advance_wb_err(file);
2254 if (!ret)
2255 ret = err;
2256 return ret > 0 ? -EIO : ret;
2257}
2258
2259static const struct vm_operations_struct btrfs_file_vm_ops = {
2260 .fault = filemap_fault,
2261 .map_pages = filemap_map_pages,
2262 .page_mkwrite = btrfs_page_mkwrite,
2263};
2264
2265static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
2266{
2267 struct address_space *mapping = filp->f_mapping;
2268
2269 if (!mapping->a_ops->readpage)
2270 return -ENOEXEC;
2271
2272 file_accessed(filp);
2273 vma->vm_ops = &btrfs_file_vm_ops;
2274
2275 return 0;
2276}
2277
2278static int hole_mergeable(struct btrfs_inode *inode, struct extent_buffer *leaf,
2279 int slot, u64 start, u64 end)
2280{
2281 struct btrfs_file_extent_item *fi;
2282 struct btrfs_key key;
2283
2284 if (slot < 0 || slot >= btrfs_header_nritems(leaf))
2285 return 0;
2286
2287 btrfs_item_key_to_cpu(leaf, &key, slot);
2288 if (key.objectid != btrfs_ino(inode) ||
2289 key.type != BTRFS_EXTENT_DATA_KEY)
2290 return 0;
2291
2292 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
2293
2294 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
2295 return 0;
2296
2297 if (btrfs_file_extent_disk_bytenr(leaf, fi))
2298 return 0;
2299
2300 if (key.offset == end)
2301 return 1;
2302 if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
2303 return 1;
2304 return 0;
2305}
2306
2307static int fill_holes(struct btrfs_trans_handle *trans,
2308 struct btrfs_inode *inode,
2309 struct btrfs_path *path, u64 offset, u64 end)
2310{
2311 struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
2312 struct btrfs_root *root = inode->root;
2313 struct extent_buffer *leaf;
2314 struct btrfs_file_extent_item *fi;
2315 struct extent_map *hole_em;
2316 struct extent_map_tree *em_tree = &inode->extent_tree;
2317 struct btrfs_key key;
2318 int ret;
2319
2320 if (btrfs_fs_incompat(fs_info, NO_HOLES))
2321 goto out;
2322
2323 key.objectid = btrfs_ino(inode);
2324 key.type = BTRFS_EXTENT_DATA_KEY;
2325 key.offset = offset;
2326
2327 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
2328 if (ret <= 0) {
2329
2330
2331
2332
2333 if (ret == 0)
2334 ret = -EINVAL;
2335 return ret;
2336 }
2337
2338 leaf = path->nodes[0];
2339 if (hole_mergeable(inode, leaf, path->slots[0] - 1, offset, end)) {
2340 u64 num_bytes;
2341
2342 path->slots[0]--;
2343 fi = btrfs_item_ptr(leaf, path->slots[0],
2344 struct btrfs_file_extent_item);
2345 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
2346 end - offset;
2347 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2348 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2349 btrfs_set_file_extent_offset(leaf, fi, 0);
2350 btrfs_mark_buffer_dirty(leaf);
2351 goto out;
2352 }
2353
2354 if (hole_mergeable(inode, leaf, path->slots[0], offset, end)) {
2355 u64 num_bytes;
2356
2357 key.offset = offset;
2358 btrfs_set_item_key_safe(fs_info, path, &key);
2359 fi = btrfs_item_ptr(leaf, path->slots[0],
2360 struct btrfs_file_extent_item);
2361 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
2362 offset;
2363 btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
2364 btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
2365 btrfs_set_file_extent_offset(leaf, fi, 0);
2366 btrfs_mark_buffer_dirty(leaf);
2367 goto out;
2368 }
2369 btrfs_release_path(path);
2370
2371 ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode),
2372 offset, 0, 0, end - offset, 0, end - offset, 0, 0, 0);
2373 if (ret)
2374 return ret;
2375
2376out:
2377 btrfs_release_path(path);
2378
2379 hole_em = alloc_extent_map();
2380 if (!hole_em) {
2381 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2382 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
2383 } else {
2384 hole_em->start = offset;
2385 hole_em->len = end - offset;
2386 hole_em->ram_bytes = hole_em->len;
2387 hole_em->orig_start = offset;
2388
2389 hole_em->block_start = EXTENT_MAP_HOLE;
2390 hole_em->block_len = 0;
2391 hole_em->orig_block_len = 0;
2392 hole_em->bdev = fs_info->fs_devices->latest_bdev;
2393 hole_em->compress_type = BTRFS_COMPRESS_NONE;
2394 hole_em->generation = trans->transid;
2395
2396 do {
2397 btrfs_drop_extent_cache(inode, offset, end - 1, 0);
2398 write_lock(&em_tree->lock);
2399 ret = add_extent_mapping(em_tree, hole_em, 1);
2400 write_unlock(&em_tree->lock);
2401 } while (ret == -EEXIST);
2402 free_extent_map(hole_em);
2403 if (ret)
2404 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
2405 &inode->runtime_flags);
2406 }
2407
2408 return 0;
2409}
2410
2411
2412
2413
2414
2415
2416
2417static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
2418{
2419 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2420 struct extent_map *em;
2421 int ret = 0;
2422
2423 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
2424 round_down(*start, fs_info->sectorsize),
2425 round_up(*len, fs_info->sectorsize), 0);
2426 if (IS_ERR(em))
2427 return PTR_ERR(em);
2428
2429
2430 if (em->block_start == EXTENT_MAP_HOLE) {
2431 ret = 1;
2432 *len = em->start + em->len > *start + *len ?
2433 0 : *start + *len - em->start - em->len;
2434 *start = em->start + em->len;
2435 }
2436 free_extent_map(em);
2437 return ret;
2438}
2439
2440static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2441{
2442 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
2443 struct btrfs_root *root = BTRFS_I(inode)->root;
2444 struct extent_state *cached_state = NULL;
2445 struct btrfs_path *path;
2446 struct btrfs_block_rsv *rsv;
2447 struct btrfs_trans_handle *trans;
2448 u64 lockstart;
2449 u64 lockend;
2450 u64 tail_start;
2451 u64 tail_len;
2452 u64 orig_start = offset;
2453 u64 cur_offset;
2454 u64 min_size = btrfs_calc_trans_metadata_size(fs_info, 1);
2455 u64 drop_end;
2456 int ret = 0;
2457 int err = 0;
2458 unsigned int rsv_count;
2459 bool same_block;
2460 bool no_holes = btrfs_fs_incompat(fs_info, NO_HOLES);
2461 u64 ino_size;
2462 bool truncated_block = false;
2463 bool updated_inode = false;
2464
2465 ret = btrfs_wait_ordered_range(inode, offset, len);
2466 if (ret)
2467 return ret;
2468
2469 inode_lock(inode);
2470 ino_size = round_up(inode->i_size, fs_info->sectorsize);
2471 ret = find_first_non_hole(inode, &offset, &len);
2472 if (ret < 0)
2473 goto out_only_mutex;
2474 if (ret && !len) {
2475
2476 ret = 0;
2477 goto out_only_mutex;
2478 }
2479
2480 lockstart = round_up(offset, btrfs_inode_sectorsize(inode));
2481 lockend = round_down(offset + len,
2482 btrfs_inode_sectorsize(inode)) - 1;
2483 same_block = (BTRFS_BYTES_TO_BLKS(fs_info, offset))
2484 == (BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1));
2485
2486
2487
2488
2489
2490
2491
2492
2493 if (same_block && len < fs_info->sectorsize) {
2494 if (offset < ino_size) {
2495 truncated_block = true;
2496 ret = btrfs_truncate_block(inode, offset, len, 0);
2497 } else {
2498 ret = 0;
2499 }
2500 goto out_only_mutex;
2501 }
2502
2503
2504 if (offset < ino_size) {
2505 truncated_block = true;
2506 ret = btrfs_truncate_block(inode, offset, 0, 0);
2507 if (ret) {
2508 inode_unlock(inode);
2509 return ret;
2510 }
2511 }
2512
2513
2514
2515
2516
2517 if (offset == orig_start) {
2518
2519 len = offset + len - lockstart;
2520 offset = lockstart;
2521 ret = find_first_non_hole(inode, &offset, &len);
2522 if (ret < 0)
2523 goto out_only_mutex;
2524 if (ret && !len) {
2525 ret = 0;
2526 goto out_only_mutex;
2527 }
2528 lockstart = offset;
2529 }
2530
2531
2532 tail_start = lockend + 1;
2533 tail_len = offset + len - tail_start;
2534 if (tail_len) {
2535 ret = find_first_non_hole(inode, &tail_start, &tail_len);
2536 if (unlikely(ret < 0))
2537 goto out_only_mutex;
2538 if (!ret) {
2539
2540 if (tail_start + tail_len < ino_size) {
2541 truncated_block = true;
2542 ret = btrfs_truncate_block(inode,
2543 tail_start + tail_len,
2544 0, 1);
2545 if (ret)
2546 goto out_only_mutex;
2547 }
2548 }
2549 }
2550
2551 if (lockend < lockstart) {
2552 ret = 0;
2553 goto out_only_mutex;
2554 }
2555
2556 while (1) {
2557 struct btrfs_ordered_extent *ordered;
2558
2559 truncate_pagecache_range(inode, lockstart, lockend);
2560
2561 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2562 &cached_state);
2563 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
2564
2565
2566
2567
2568
2569
2570 if ((!ordered ||
2571 (ordered->file_offset + ordered->len <= lockstart ||
2572 ordered->file_offset > lockend)) &&
2573 !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
2574 if (ordered)
2575 btrfs_put_ordered_extent(ordered);
2576 break;
2577 }
2578 if (ordered)
2579 btrfs_put_ordered_extent(ordered);
2580 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
2581 lockend, &cached_state, GFP_NOFS);
2582 ret = btrfs_wait_ordered_range(inode, lockstart,
2583 lockend - lockstart + 1);
2584 if (ret) {
2585 inode_unlock(inode);
2586 return ret;
2587 }
2588 }
2589
2590 path = btrfs_alloc_path();
2591 if (!path) {
2592 ret = -ENOMEM;
2593 goto out;
2594 }
2595
2596 rsv = btrfs_alloc_block_rsv(fs_info, BTRFS_BLOCK_RSV_TEMP);
2597 if (!rsv) {
2598 ret = -ENOMEM;
2599 goto out_free;
2600 }
2601 rsv->size = btrfs_calc_trans_metadata_size(fs_info, 1);
2602 rsv->failfast = 1;
2603
2604
2605
2606
2607
2608
2609 rsv_count = no_holes ? 2 : 3;
2610 trans = btrfs_start_transaction(root, rsv_count);
2611 if (IS_ERR(trans)) {
2612 err = PTR_ERR(trans);
2613 goto out_free;
2614 }
2615
2616 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
2617 min_size, 0);
2618 BUG_ON(ret);
2619 trans->block_rsv = rsv;
2620
2621 cur_offset = lockstart;
2622 len = lockend - cur_offset;
2623 while (cur_offset < lockend) {
2624 ret = __btrfs_drop_extents(trans, root, inode, path,
2625 cur_offset, lockend + 1,
2626 &drop_end, 1, 0, 0, NULL);
2627 if (ret != -ENOSPC)
2628 break;
2629
2630 trans->block_rsv = &fs_info->trans_block_rsv;
2631
2632 if (cur_offset < drop_end && cur_offset < ino_size) {
2633 ret = fill_holes(trans, BTRFS_I(inode), path,
2634 cur_offset, drop_end);
2635 if (ret) {
2636
2637
2638
2639
2640
2641
2642 btrfs_abort_transaction(trans, ret);
2643 err = ret;
2644 break;
2645 }
2646 }
2647
2648 cur_offset = drop_end;
2649
2650 ret = btrfs_update_inode(trans, root, inode);
2651 if (ret) {
2652 err = ret;
2653 break;
2654 }
2655
2656 btrfs_end_transaction(trans);
2657 btrfs_btree_balance_dirty(fs_info);
2658
2659 trans = btrfs_start_transaction(root, rsv_count);
2660 if (IS_ERR(trans)) {
2661 ret = PTR_ERR(trans);
2662 trans = NULL;
2663 break;
2664 }
2665
2666 ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
2667 rsv, min_size, 0);
2668 BUG_ON(ret);
2669 trans->block_rsv = rsv;
2670
2671 ret = find_first_non_hole(inode, &cur_offset, &len);
2672 if (unlikely(ret < 0))
2673 break;
2674 if (ret && !len) {
2675 ret = 0;
2676 break;
2677 }
2678 }
2679
2680 if (ret) {
2681 err = ret;
2682 goto out_trans;
2683 }
2684
2685 trans->block_rsv = &fs_info->trans_block_rsv;
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697 if (drop_end <= lockend)
2698 drop_end = lockend + 1;
2699
2700
2701
2702
2703
2704 if (cur_offset < ino_size && cur_offset < drop_end) {
2705 ret = fill_holes(trans, BTRFS_I(inode), path,
2706 cur_offset, drop_end);
2707 if (ret) {
2708
2709 btrfs_abort_transaction(trans, ret);
2710 err = ret;
2711 goto out_trans;
2712 }
2713 }
2714
2715out_trans:
2716 if (!trans)
2717 goto out_free;
2718
2719 inode_inc_iversion(inode);
2720 inode->i_mtime = inode->i_ctime = current_time(inode);
2721
2722 trans->block_rsv = &fs_info->trans_block_rsv;
2723 ret = btrfs_update_inode(trans, root, inode);
2724 updated_inode = true;
2725 btrfs_end_transaction(trans);
2726 btrfs_btree_balance_dirty(fs_info);
2727out_free:
2728 btrfs_free_path(path);
2729 btrfs_free_block_rsv(fs_info, rsv);
2730out:
2731 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2732 &cached_state, GFP_NOFS);
2733out_only_mutex:
2734 if (!updated_inode && truncated_block && !ret && !err) {
2735
2736
2737
2738
2739
2740
2741
2742 trans = btrfs_start_transaction(root, 1);
2743 if (IS_ERR(trans)) {
2744 err = PTR_ERR(trans);
2745 } else {
2746 err = btrfs_update_inode(trans, root, inode);
2747 ret = btrfs_end_transaction(trans);
2748 }
2749 }
2750 inode_unlock(inode);
2751 if (ret && !err)
2752 err = ret;
2753 return err;
2754}
2755
2756
2757struct falloc_range {
2758 struct list_head list;
2759 u64 start;
2760 u64 len;
2761};
2762
2763
2764
2765
2766
2767
2768
2769static int add_falloc_range(struct list_head *head, u64 start, u64 len)
2770{
2771 struct falloc_range *prev = NULL;
2772 struct falloc_range *range = NULL;
2773
2774 if (list_empty(head))
2775 goto insert;
2776
2777
2778
2779
2780
2781 prev = list_entry(head->prev, struct falloc_range, list);
2782 if (prev->start + prev->len == start) {
2783 prev->len += len;
2784 return 0;
2785 }
2786insert:
2787 range = kmalloc(sizeof(*range), GFP_KERNEL);
2788 if (!range)
2789 return -ENOMEM;
2790 range->start = start;
2791 range->len = len;
2792 list_add_tail(&range->list, head);
2793 return 0;
2794}
2795
2796static long btrfs_fallocate(struct file *file, int mode,
2797 loff_t offset, loff_t len)
2798{
2799 struct inode *inode = file_inode(file);
2800 struct extent_state *cached_state = NULL;
2801 struct extent_changeset *data_reserved = NULL;
2802 struct falloc_range *range;
2803 struct falloc_range *tmp;
2804 struct list_head reserve_list;
2805 u64 cur_offset;
2806 u64 last_byte;
2807 u64 alloc_start;
2808 u64 alloc_end;
2809 u64 alloc_hint = 0;
2810 u64 locked_end;
2811 u64 actual_end = 0;
2812 struct extent_map *em;
2813 int blocksize = btrfs_inode_sectorsize(inode);
2814 int ret;
2815
2816 alloc_start = round_down(offset, blocksize);
2817 alloc_end = round_up(offset + len, blocksize);
2818 cur_offset = alloc_start;
2819
2820
2821 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
2822 return -EOPNOTSUPP;
2823
2824 if (mode & FALLOC_FL_PUNCH_HOLE)
2825 return btrfs_punch_hole(inode, offset, len);
2826
2827
2828
2829
2830
2831
2832 ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
2833 alloc_end - alloc_start);
2834 if (ret < 0)
2835 return ret;
2836
2837 inode_lock(inode);
2838
2839 if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
2840 ret = inode_newsize_ok(inode, offset + len);
2841 if (ret)
2842 goto out;
2843 }
2844
2845
2846
2847
2848
2849
2850
2851
2852 if (alloc_start > inode->i_size) {
2853 ret = btrfs_cont_expand(inode, i_size_read(inode),
2854 alloc_start);
2855 if (ret)
2856 goto out;
2857 } else if (offset + len > inode->i_size) {
2858
2859
2860
2861
2862
2863 ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
2864 if (ret)
2865 goto out;
2866 }
2867
2868
2869
2870
2871
2872 ret = btrfs_wait_ordered_range(inode, alloc_start,
2873 alloc_end - alloc_start);
2874 if (ret)
2875 goto out;
2876
2877 locked_end = alloc_end - 1;
2878 while (1) {
2879 struct btrfs_ordered_extent *ordered;
2880
2881
2882
2883
2884 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
2885 locked_end, &cached_state);
2886 ordered = btrfs_lookup_first_ordered_extent(inode,
2887 alloc_end - 1);
2888 if (ordered &&
2889 ordered->file_offset + ordered->len > alloc_start &&
2890 ordered->file_offset < alloc_end) {
2891 btrfs_put_ordered_extent(ordered);
2892 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
2893 alloc_start, locked_end,
2894 &cached_state, GFP_KERNEL);
2895
2896
2897
2898
2899 ret = btrfs_wait_ordered_range(inode, alloc_start,
2900 alloc_end - alloc_start);
2901 if (ret)
2902 goto out;
2903 } else {
2904 if (ordered)
2905 btrfs_put_ordered_extent(ordered);
2906 break;
2907 }
2908 }
2909
2910
2911 INIT_LIST_HEAD(&reserve_list);
2912 while (1) {
2913 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
2914 alloc_end - cur_offset, 0);
2915 if (IS_ERR(em)) {
2916 ret = PTR_ERR(em);
2917 break;
2918 }
2919 last_byte = min(extent_map_end(em), alloc_end);
2920 actual_end = min_t(u64, extent_map_end(em), offset + len);
2921 last_byte = ALIGN(last_byte, blocksize);
2922 if (em->block_start == EXTENT_MAP_HOLE ||
2923 (cur_offset >= inode->i_size &&
2924 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
2925 ret = add_falloc_range(&reserve_list, cur_offset,
2926 last_byte - cur_offset);
2927 if (ret < 0) {
2928 free_extent_map(em);
2929 break;
2930 }
2931 ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
2932 cur_offset, last_byte - cur_offset);
2933 if (ret < 0) {
2934 free_extent_map(em);
2935 break;
2936 }
2937 } else {
2938
2939
2940
2941
2942
2943 btrfs_free_reserved_data_space(inode, data_reserved,
2944 cur_offset, last_byte - cur_offset);
2945 }
2946 free_extent_map(em);
2947 cur_offset = last_byte;
2948 if (cur_offset >= alloc_end)
2949 break;
2950 }
2951
2952
2953
2954
2955
2956 list_for_each_entry_safe(range, tmp, &reserve_list, list) {
2957 if (!ret)
2958 ret = btrfs_prealloc_file_range(inode, mode,
2959 range->start,
2960 range->len, i_blocksize(inode),
2961 offset + len, &alloc_hint);
2962 else
2963 btrfs_free_reserved_data_space(inode,
2964 data_reserved, range->start,
2965 range->len);
2966 list_del(&range->list);
2967 kfree(range);
2968 }
2969 if (ret < 0)
2970 goto out_unlock;
2971
2972 if (actual_end > inode->i_size &&
2973 !(mode & FALLOC_FL_KEEP_SIZE)) {
2974 struct btrfs_trans_handle *trans;
2975 struct btrfs_root *root = BTRFS_I(inode)->root;
2976
2977
2978
2979
2980
2981
2982 trans = btrfs_start_transaction(root, 1);
2983 if (IS_ERR(trans)) {
2984 ret = PTR_ERR(trans);
2985 } else {
2986 inode->i_ctime = current_time(inode);
2987 i_size_write(inode, actual_end);
2988 btrfs_ordered_update_i_size(inode, actual_end, NULL);
2989 ret = btrfs_update_inode(trans, root, inode);
2990 if (ret)
2991 btrfs_end_transaction(trans);
2992 else
2993 ret = btrfs_end_transaction(trans);
2994 }
2995 }
2996out_unlock:
2997 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
2998 &cached_state, GFP_KERNEL);
2999out:
3000 inode_unlock(inode);
3001
3002 if (ret != 0)
3003 btrfs_free_reserved_data_space(inode, data_reserved,
3004 alloc_start, alloc_end - cur_offset);
3005 extent_changeset_free(data_reserved);
3006 return ret;
3007}
3008
3009static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
3010{
3011 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
3012 struct extent_map *em = NULL;
3013 struct extent_state *cached_state = NULL;
3014 u64 lockstart;
3015 u64 lockend;
3016 u64 start;
3017 u64 len;
3018 int ret = 0;
3019
3020 if (inode->i_size == 0)
3021 return -ENXIO;
3022
3023
3024
3025
3026
3027 start = max_t(loff_t, 0, *offset);
3028
3029 lockstart = round_down(start, fs_info->sectorsize);
3030 lockend = round_up(i_size_read(inode),
3031 fs_info->sectorsize);
3032 if (lockend <= lockstart)
3033 lockend = lockstart + fs_info->sectorsize;
3034 lockend--;
3035 len = lockend - lockstart + 1;
3036
3037 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
3038 &cached_state);
3039
3040 while (start < inode->i_size) {
3041 em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0,
3042 start, len, 0);
3043 if (IS_ERR(em)) {
3044 ret = PTR_ERR(em);
3045 em = NULL;
3046 break;
3047 }
3048
3049 if (whence == SEEK_HOLE &&
3050 (em->block_start == EXTENT_MAP_HOLE ||
3051 test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
3052 break;
3053 else if (whence == SEEK_DATA &&
3054 (em->block_start != EXTENT_MAP_HOLE &&
3055 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
3056 break;
3057
3058 start = em->start + em->len;
3059 free_extent_map(em);
3060 em = NULL;
3061 cond_resched();
3062 }
3063 free_extent_map(em);
3064 if (!ret) {
3065 if (whence == SEEK_DATA && start >= inode->i_size)
3066 ret = -ENXIO;
3067 else
3068 *offset = min_t(loff_t, start, inode->i_size);
3069 }
3070 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
3071 &cached_state, GFP_NOFS);
3072 return ret;
3073}
3074
3075static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
3076{
3077 struct inode *inode = file->f_mapping->host;
3078 int ret;
3079
3080 inode_lock(inode);
3081 switch (whence) {
3082 case SEEK_END:
3083 case SEEK_CUR:
3084 offset = generic_file_llseek(file, offset, whence);
3085 goto out;
3086 case SEEK_DATA:
3087 case SEEK_HOLE:
3088 if (offset >= i_size_read(inode)) {
3089 inode_unlock(inode);
3090 return -ENXIO;
3091 }
3092
3093 ret = find_desired_extent(inode, &offset, whence);
3094 if (ret) {
3095 inode_unlock(inode);
3096 return ret;
3097 }
3098 }
3099
3100 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
3101out:
3102 inode_unlock(inode);
3103 return offset;
3104}
3105
3106static int btrfs_file_open(struct inode *inode, struct file *filp)
3107{
3108 filp->f_mode |= FMODE_AIO_NOWAIT;
3109 return generic_file_open(inode, filp);
3110}
3111
3112const struct file_operations btrfs_file_operations = {
3113 .llseek = btrfs_file_llseek,
3114 .read_iter = generic_file_read_iter,
3115 .splice_read = generic_file_splice_read,
3116 .write_iter = btrfs_file_write_iter,
3117 .mmap = btrfs_file_mmap,
3118 .open = btrfs_file_open,
3119 .release = btrfs_release_file,
3120 .fsync = btrfs_sync_file,
3121 .fallocate = btrfs_fallocate,
3122 .unlocked_ioctl = btrfs_ioctl,
3123#ifdef CONFIG_COMPAT
3124 .compat_ioctl = btrfs_compat_ioctl,
3125#endif
3126 .clone_file_range = btrfs_clone_file_range,
3127 .dedupe_file_range = btrfs_dedupe_file_range,
3128};
3129
3130void btrfs_auto_defrag_exit(void)
3131{
3132 kmem_cache_destroy(btrfs_inode_defrag_cachep);
3133}
3134
3135int btrfs_auto_defrag_init(void)
3136{
3137 btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
3138 sizeof(struct inode_defrag), 0,
3139 SLAB_MEM_SPREAD,
3140 NULL);
3141 if (!btrfs_inode_defrag_cachep)
3142 return -ENOMEM;
3143
3144 return 0;
3145}
3146
3147int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
3148{
3149 int ret;
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3166 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
3167 &BTRFS_I(inode)->runtime_flags))
3168 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
3169
3170 return ret;
3171}
3172